Commit | Line | Data |
---|---|---|
30edc14b KRW |
1 | /* |
2 | * PCI Backend Operations - respond to PCI requests from Frontend | |
3 | * | |
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
5 | */ | |
283c0972 JP |
6 | |
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
8 | ||
30edc14b KRW |
9 | #include <linux/module.h> |
10 | #include <linux/wait.h> | |
11 | #include <linux/bitops.h> | |
12 | #include <xen/events.h> | |
13 | #include <linux/sched.h> | |
14 | #include "pciback.h" | |
15 | ||
16 | int verbose_request; | |
17 | module_param(verbose_request, int, 0644); | |
18 | ||
a92336a1 KRW |
19 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); |
20 | ||
0513fe9e | 21 | /* Ensure a device is has the fake IRQ handler "turned on/off" and is |
a92336a1 | 22 | * ready to be exported. This MUST be run after xen_pcibk_reset_device |
0513fe9e KRW |
23 | * which does the actual PCI device enable/disable. |
24 | */ | |
a92336a1 | 25 | static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) |
0513fe9e | 26 | { |
a92336a1 | 27 | struct xen_pcibk_dev_data *dev_data; |
0513fe9e KRW |
28 | int rc; |
29 | int enable = 0; | |
30 | ||
31 | dev_data = pci_get_drvdata(dev); | |
32 | if (!dev_data) | |
33 | return; | |
34 | ||
35 | /* We don't deal with bridges */ | |
36 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) | |
37 | return; | |
38 | ||
39 | if (reset) { | |
40 | dev_data->enable_intx = 0; | |
41 | dev_data->ack_intr = 0; | |
42 | } | |
43 | enable = dev_data->enable_intx; | |
44 | ||
45 | /* Asked to disable, but ISR isn't runnig */ | |
46 | if (!enable && !dev_data->isr_on) | |
47 | return; | |
48 | ||
49 | /* Squirrel away the IRQs in the dev_data. We need this | |
50 | * b/c when device transitions to MSI, the dev->irq is | |
51 | * overwritten with the MSI vector. | |
52 | */ | |
53 | if (enable) | |
54 | dev_data->irq = dev->irq; | |
55 | ||
e17ab35f KRW |
56 | /* |
57 | * SR-IOV devices in all use MSI-X and have no legacy | |
58 | * interrupts, so inhibit creating a fake IRQ handler for them. | |
59 | */ | |
60 | if (dev_data->irq == 0) | |
61 | goto out; | |
62 | ||
0513fe9e KRW |
63 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", |
64 | dev_data->irq_name, | |
65 | dev_data->irq, | |
66 | pci_is_enabled(dev) ? "on" : "off", | |
67 | dev->msi_enabled ? "MSI" : "", | |
68 | dev->msix_enabled ? "MSI/X" : "", | |
69 | dev_data->isr_on ? "enable" : "disable", | |
70 | enable ? "enable" : "disable"); | |
71 | ||
72 | if (enable) { | |
a396f3a2 KRW |
73 | /* |
74 | * The MSI or MSI-X should not have an IRQ handler. Otherwise | |
75 | * if the guest terminates we BUG_ON in free_msi_irqs. | |
76 | */ | |
77 | if (dev->msi_enabled || dev->msix_enabled) | |
78 | goto out; | |
79 | ||
0513fe9e | 80 | rc = request_irq(dev_data->irq, |
a92336a1 | 81 | xen_pcibk_guest_interrupt, IRQF_SHARED, |
0513fe9e KRW |
82 | dev_data->irq_name, dev); |
83 | if (rc) { | |
84 | dev_err(&dev->dev, "%s: failed to install fake IRQ " \ | |
85 | "handler for IRQ %d! (rc:%d)\n", | |
86 | dev_data->irq_name, dev_data->irq, rc); | |
87 | goto out; | |
88 | } | |
89 | } else { | |
90 | free_irq(dev_data->irq, dev); | |
91 | dev_data->irq = 0; | |
92 | } | |
93 | dev_data->isr_on = enable; | |
94 | dev_data->ack_intr = enable; | |
95 | out: | |
96 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", | |
97 | dev_data->irq_name, | |
98 | dev_data->irq, | |
99 | pci_is_enabled(dev) ? "on" : "off", | |
100 | dev->msi_enabled ? "MSI" : "", | |
101 | dev->msix_enabled ? "MSI/X" : "", | |
102 | enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : | |
103 | (dev_data->isr_on ? "failed to disable" : "disabled")); | |
104 | } | |
105 | ||
30edc14b | 106 | /* Ensure a device is "turned off" and ready to be exported. |
a92336a1 | 107 | * (Also see xen_pcibk_config_reset to ensure virtual configuration space is |
30edc14b KRW |
108 | * ready to be re-exported) |
109 | */ | |
a92336a1 | 110 | void xen_pcibk_reset_device(struct pci_dev *dev) |
30edc14b KRW |
111 | { |
112 | u16 cmd; | |
113 | ||
a92336a1 | 114 | xen_pcibk_control_isr(dev, 1 /* reset device */); |
0513fe9e | 115 | |
30edc14b KRW |
116 | /* Disable devices (but not bridges) */ |
117 | if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | |
a2be65fd KRW |
118 | #ifdef CONFIG_PCI_MSI |
119 | /* The guest could have been abruptly killed without | |
120 | * disabling MSI/MSI-X interrupts.*/ | |
121 | if (dev->msix_enabled) | |
122 | pci_disable_msix(dev); | |
123 | if (dev->msi_enabled) | |
124 | pci_disable_msi(dev); | |
125 | #endif | |
bdc5c181 KRW |
126 | if (pci_is_enabled(dev)) |
127 | pci_disable_device(dev); | |
30edc14b KRW |
128 | |
129 | pci_write_config_word(dev, PCI_COMMAND, 0); | |
130 | ||
131 | dev->is_busmaster = 0; | |
132 | } else { | |
133 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
134 | if (cmd & (PCI_COMMAND_INVALIDATE)) { | |
135 | cmd &= ~(PCI_COMMAND_INVALIDATE); | |
136 | pci_write_config_word(dev, PCI_COMMAND, cmd); | |
137 | ||
138 | dev->is_busmaster = 0; | |
139 | } | |
140 | } | |
141 | } | |
a92336a1 KRW |
142 | |
143 | #ifdef CONFIG_PCI_MSI | |
144 | static | |
145 | int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, | |
146 | struct pci_dev *dev, struct xen_pci_op *op) | |
147 | { | |
148 | struct xen_pcibk_dev_data *dev_data; | |
a92336a1 KRW |
149 | int status; |
150 | ||
151 | if (unlikely(verbose_request)) | |
152 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); | |
153 | ||
56441f3c KRW |
154 | if (dev->msi_enabled) |
155 | status = -EALREADY; | |
156 | else if (dev->msix_enabled) | |
157 | status = -ENXIO; | |
158 | else | |
159 | status = pci_enable_msi(dev); | |
a92336a1 KRW |
160 | |
161 | if (status) { | |
283c0972 | 162 | pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", |
51ac8893 JB |
163 | pci_name(dev), pdev->xdev->otherend_id, |
164 | status); | |
a92336a1 KRW |
165 | op->value = 0; |
166 | return XEN_PCI_ERR_op_failed; | |
167 | } | |
168 | ||
169 | /* The value the guest needs is actually the IDT vector, not the | |
170 | * the local domain's IRQ number. */ | |
171 | ||
172 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
173 | if (unlikely(verbose_request)) | |
174 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
175 | op->value); | |
176 | ||
177 | dev_data = pci_get_drvdata(dev); | |
178 | if (dev_data) | |
179 | dev_data->ack_intr = 0; | |
180 | ||
181 | return 0; | |
182 | } | |
183 | ||
184 | static | |
185 | int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, | |
186 | struct pci_dev *dev, struct xen_pci_op *op) | |
187 | { | |
a92336a1 KRW |
188 | if (unlikely(verbose_request)) |
189 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", | |
190 | pci_name(dev)); | |
a92336a1 | 191 | |
7cfb905b KRW |
192 | if (dev->msi_enabled) { |
193 | struct xen_pcibk_dev_data *dev_data; | |
194 | ||
195 | pci_disable_msi(dev); | |
196 | ||
197 | dev_data = pci_get_drvdata(dev); | |
198 | if (dev_data) | |
199 | dev_data->ack_intr = 1; | |
200 | } | |
a92336a1 KRW |
201 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; |
202 | if (unlikely(verbose_request)) | |
203 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
204 | op->value); | |
a92336a1 KRW |
205 | return 0; |
206 | } | |
207 | ||
208 | static | |
209 | int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, | |
210 | struct pci_dev *dev, struct xen_pci_op *op) | |
211 | { | |
212 | struct xen_pcibk_dev_data *dev_data; | |
213 | int i, result; | |
214 | struct msix_entry *entries; | |
408fb0e5 | 215 | u16 cmd; |
a92336a1 KRW |
216 | |
217 | if (unlikely(verbose_request)) | |
218 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", | |
219 | pci_name(dev)); | |
5e0ce145 | 220 | |
a92336a1 KRW |
221 | if (op->value > SH_INFO_MAX_VEC) |
222 | return -EINVAL; | |
223 | ||
5e0ce145 KRW |
224 | if (dev->msix_enabled) |
225 | return -EALREADY; | |
226 | ||
408fb0e5 KRW |
227 | /* |
228 | * PCI_COMMAND_MEMORY must be enabled, otherwise we may not be able | |
229 | * to access the BARs where the MSI-X entries reside. | |
230 | */ | |
231 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
232 | if (dev->msi_enabled || !(cmd & PCI_COMMAND_MEMORY)) | |
5e0ce145 KRW |
233 | return -ENXIO; |
234 | ||
a92336a1 KRW |
235 | entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); |
236 | if (entries == NULL) | |
237 | return -ENOMEM; | |
238 | ||
239 | for (i = 0; i < op->value; i++) { | |
240 | entries[i].entry = op->msix_entries[i].entry; | |
241 | entries[i].vector = op->msix_entries[i].vector; | |
242 | } | |
243 | ||
efdfa3ed | 244 | result = pci_enable_msix_exact(dev, entries, op->value); |
a92336a1 KRW |
245 | if (result == 0) { |
246 | for (i = 0; i < op->value; i++) { | |
247 | op->msix_entries[i].entry = entries[i].entry; | |
c0914e61 | 248 | if (entries[i].vector) { |
a92336a1 KRW |
249 | op->msix_entries[i].vector = |
250 | xen_pirq_from_irq(entries[i].vector); | |
251 | if (unlikely(verbose_request)) | |
252 | printk(KERN_DEBUG DRV_NAME ": %s: " \ | |
253 | "MSI-X[%d]: %d\n", | |
254 | pci_name(dev), i, | |
255 | op->msix_entries[i].vector); | |
c0914e61 | 256 | } |
a92336a1 | 257 | } |
51ac8893 | 258 | } else |
283c0972 | 259 | pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n", |
51ac8893 JB |
260 | pci_name(dev), pdev->xdev->otherend_id, |
261 | result); | |
a92336a1 KRW |
262 | kfree(entries); |
263 | ||
264 | op->value = result; | |
265 | dev_data = pci_get_drvdata(dev); | |
266 | if (dev_data) | |
267 | dev_data->ack_intr = 0; | |
268 | ||
0ee46eca | 269 | return result > 0 ? 0 : result; |
a92336a1 KRW |
270 | } |
271 | ||
272 | static | |
273 | int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, | |
274 | struct pci_dev *dev, struct xen_pci_op *op) | |
275 | { | |
a92336a1 KRW |
276 | if (unlikely(verbose_request)) |
277 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", | |
278 | pci_name(dev)); | |
a92336a1 | 279 | |
7cfb905b KRW |
280 | if (dev->msix_enabled) { |
281 | struct xen_pcibk_dev_data *dev_data; | |
282 | ||
283 | pci_disable_msix(dev); | |
284 | ||
285 | dev_data = pci_get_drvdata(dev); | |
286 | if (dev_data) | |
287 | dev_data->ack_intr = 1; | |
288 | } | |
a92336a1 KRW |
289 | /* |
290 | * SR-IOV devices (which don't have any legacy IRQ) have | |
291 | * an undefined IRQ value of zero. | |
292 | */ | |
293 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
294 | if (unlikely(verbose_request)) | |
7cfb905b KRW |
295 | printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", |
296 | pci_name(dev), op->value); | |
a92336a1 KRW |
297 | return 0; |
298 | } | |
299 | #endif | |
30edc14b KRW |
300 | /* |
301 | * Now the same evtchn is used for both pcifront conf_read_write request | |
302 | * as well as pcie aer front end ack. We use a new work_queue to schedule | |
a92336a1 | 303 | * xen_pcibk conf_read_write service for avoiding confict with aer_core |
30edc14b KRW |
304 | * do_recovery job which also use the system default work_queue |
305 | */ | |
a92336a1 | 306 | void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) |
30edc14b KRW |
307 | { |
308 | /* Check that frontend is requesting an operation and that we are not | |
309 | * already processing a request */ | |
310 | if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) | |
311 | && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { | |
a92336a1 | 312 | queue_work(xen_pcibk_wq, &pdev->op_work); |
30edc14b KRW |
313 | } |
314 | /*_XEN_PCIB_active should have been cleared by pcifront. And also make | |
a92336a1 | 315 | sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ |
30edc14b KRW |
316 | if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) |
317 | && test_bit(_PCIB_op_pending, &pdev->flags)) { | |
a92336a1 | 318 | wake_up(&xen_pcibk_aer_wait_queue); |
30edc14b KRW |
319 | } |
320 | } | |
321 | ||
322 | /* Performing the configuration space reads/writes must not be done in atomic | |
323 | * context because some of the pci_* functions can sleep (mostly due to ACPI | |
324 | * use of semaphores). This function is intended to be called from a work | |
a92336a1 | 325 | * queue in process context taking a struct xen_pcibk_device as a parameter */ |
30edc14b | 326 | |
a92336a1 | 327 | void xen_pcibk_do_op(struct work_struct *data) |
30edc14b | 328 | { |
a92336a1 KRW |
329 | struct xen_pcibk_device *pdev = |
330 | container_of(data, struct xen_pcibk_device, op_work); | |
30edc14b | 331 | struct pci_dev *dev; |
a92336a1 | 332 | struct xen_pcibk_dev_data *dev_data = NULL; |
8135cf8b | 333 | struct xen_pci_op *op = &pdev->op; |
0513fe9e | 334 | int test_intx = 0; |
30edc14b | 335 | |
8135cf8b KRW |
336 | *op = pdev->sh_info->op; |
337 | barrier(); | |
a92336a1 | 338 | dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); |
30edc14b KRW |
339 | |
340 | if (dev == NULL) | |
341 | op->err = XEN_PCI_ERR_dev_not_found; | |
342 | else { | |
0513fe9e KRW |
343 | dev_data = pci_get_drvdata(dev); |
344 | if (dev_data) | |
345 | test_intx = dev_data->enable_intx; | |
30edc14b KRW |
346 | switch (op->cmd) { |
347 | case XEN_PCI_OP_conf_read: | |
a92336a1 | 348 | op->err = xen_pcibk_config_read(dev, |
30edc14b KRW |
349 | op->offset, op->size, &op->value); |
350 | break; | |
351 | case XEN_PCI_OP_conf_write: | |
a92336a1 | 352 | op->err = xen_pcibk_config_write(dev, |
30edc14b KRW |
353 | op->offset, op->size, op->value); |
354 | break; | |
355 | #ifdef CONFIG_PCI_MSI | |
356 | case XEN_PCI_OP_enable_msi: | |
a92336a1 | 357 | op->err = xen_pcibk_enable_msi(pdev, dev, op); |
30edc14b KRW |
358 | break; |
359 | case XEN_PCI_OP_disable_msi: | |
a92336a1 | 360 | op->err = xen_pcibk_disable_msi(pdev, dev, op); |
30edc14b KRW |
361 | break; |
362 | case XEN_PCI_OP_enable_msix: | |
a92336a1 | 363 | op->err = xen_pcibk_enable_msix(pdev, dev, op); |
30edc14b KRW |
364 | break; |
365 | case XEN_PCI_OP_disable_msix: | |
a92336a1 | 366 | op->err = xen_pcibk_disable_msix(pdev, dev, op); |
30edc14b KRW |
367 | break; |
368 | #endif | |
369 | default: | |
370 | op->err = XEN_PCI_ERR_not_implemented; | |
371 | break; | |
372 | } | |
373 | } | |
0513fe9e KRW |
374 | if (!op->err && dev && dev_data) { |
375 | /* Transition detected */ | |
376 | if ((dev_data->enable_intx != test_intx)) | |
a92336a1 | 377 | xen_pcibk_control_isr(dev, 0 /* no reset */); |
0513fe9e | 378 | } |
8135cf8b KRW |
379 | pdev->sh_info->op.err = op->err; |
380 | pdev->sh_info->op.value = op->value; | |
381 | #ifdef CONFIG_PCI_MSI | |
382 | if (op->cmd == XEN_PCI_OP_enable_msix && op->err == 0) { | |
383 | unsigned int i; | |
384 | ||
385 | for (i = 0; i < op->value; i++) | |
386 | pdev->sh_info->op.msix_entries[i].vector = | |
387 | op->msix_entries[i].vector; | |
388 | } | |
389 | #endif | |
30edc14b KRW |
390 | /* Tell the driver domain that we're done. */ |
391 | wmb(); | |
392 | clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
393 | notify_remote_via_irq(pdev->evtchn_irq); | |
394 | ||
395 | /* Mark that we're done. */ | |
4e857c58 | 396 | smp_mb__before_atomic(); /* /after/ clearing PCIF_active */ |
30edc14b | 397 | clear_bit(_PDEVF_op_active, &pdev->flags); |
4e857c58 | 398 | smp_mb__after_atomic(); /* /before/ final check for work */ |
30edc14b KRW |
399 | |
400 | /* Check to see if the driver domain tried to start another request in | |
401 | * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. | |
402 | */ | |
a92336a1 | 403 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
404 | } |
405 | ||
a92336a1 | 406 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) |
30edc14b | 407 | { |
a92336a1 | 408 | struct xen_pcibk_device *pdev = dev_id; |
30edc14b | 409 | |
a92336a1 | 410 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
411 | |
412 | return IRQ_HANDLED; | |
413 | } | |
a92336a1 | 414 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) |
0513fe9e KRW |
415 | { |
416 | struct pci_dev *dev = (struct pci_dev *)dev_id; | |
a92336a1 | 417 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); |
0513fe9e KRW |
418 | |
419 | if (dev_data->isr_on && dev_data->ack_intr) { | |
420 | dev_data->handled++; | |
421 | if ((dev_data->handled % 1000) == 0) { | |
422 | if (xen_test_irq_shared(irq)) { | |
283c0972 | 423 | pr_info("%s IRQ line is not shared " |
0513fe9e KRW |
424 | "with other domains. Turning ISR off\n", |
425 | dev_data->irq_name); | |
426 | dev_data->ack_intr = 0; | |
427 | } | |
428 | } | |
429 | return IRQ_HANDLED; | |
430 | } | |
431 | return IRQ_NONE; | |
432 | } |