Commit | Line | Data |
---|---|---|
30edc14b KRW |
1 | /* |
2 | * PCI Backend Operations - respond to PCI requests from Frontend | |
3 | * | |
4 | * Author: Ryan Wilson <hap9@epoch.ncsc.mil> | |
5 | */ | |
283c0972 JP |
6 | |
7 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
8 | ||
30edc14b KRW |
9 | #include <linux/module.h> |
10 | #include <linux/wait.h> | |
11 | #include <linux/bitops.h> | |
12 | #include <xen/events.h> | |
13 | #include <linux/sched.h> | |
14 | #include "pciback.h" | |
15 | ||
16 | int verbose_request; | |
17 | module_param(verbose_request, int, 0644); | |
18 | ||
a92336a1 KRW |
19 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id); |
20 | ||
0513fe9e | 21 | /* Ensure a device is has the fake IRQ handler "turned on/off" and is |
a92336a1 | 22 | * ready to be exported. This MUST be run after xen_pcibk_reset_device |
0513fe9e KRW |
23 | * which does the actual PCI device enable/disable. |
24 | */ | |
a92336a1 | 25 | static void xen_pcibk_control_isr(struct pci_dev *dev, int reset) |
0513fe9e | 26 | { |
a92336a1 | 27 | struct xen_pcibk_dev_data *dev_data; |
0513fe9e KRW |
28 | int rc; |
29 | int enable = 0; | |
30 | ||
31 | dev_data = pci_get_drvdata(dev); | |
32 | if (!dev_data) | |
33 | return; | |
34 | ||
35 | /* We don't deal with bridges */ | |
36 | if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) | |
37 | return; | |
38 | ||
39 | if (reset) { | |
40 | dev_data->enable_intx = 0; | |
41 | dev_data->ack_intr = 0; | |
42 | } | |
43 | enable = dev_data->enable_intx; | |
44 | ||
45 | /* Asked to disable, but ISR isn't runnig */ | |
46 | if (!enable && !dev_data->isr_on) | |
47 | return; | |
48 | ||
49 | /* Squirrel away the IRQs in the dev_data. We need this | |
50 | * b/c when device transitions to MSI, the dev->irq is | |
51 | * overwritten with the MSI vector. | |
52 | */ | |
53 | if (enable) | |
54 | dev_data->irq = dev->irq; | |
55 | ||
e17ab35f KRW |
56 | /* |
57 | * SR-IOV devices in all use MSI-X and have no legacy | |
58 | * interrupts, so inhibit creating a fake IRQ handler for them. | |
59 | */ | |
60 | if (dev_data->irq == 0) | |
61 | goto out; | |
62 | ||
0513fe9e KRW |
63 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s-> %s\n", |
64 | dev_data->irq_name, | |
65 | dev_data->irq, | |
66 | pci_is_enabled(dev) ? "on" : "off", | |
67 | dev->msi_enabled ? "MSI" : "", | |
68 | dev->msix_enabled ? "MSI/X" : "", | |
69 | dev_data->isr_on ? "enable" : "disable", | |
70 | enable ? "enable" : "disable"); | |
71 | ||
72 | if (enable) { | |
73 | rc = request_irq(dev_data->irq, | |
a92336a1 | 74 | xen_pcibk_guest_interrupt, IRQF_SHARED, |
0513fe9e KRW |
75 | dev_data->irq_name, dev); |
76 | if (rc) { | |
77 | dev_err(&dev->dev, "%s: failed to install fake IRQ " \ | |
78 | "handler for IRQ %d! (rc:%d)\n", | |
79 | dev_data->irq_name, dev_data->irq, rc); | |
80 | goto out; | |
81 | } | |
82 | } else { | |
83 | free_irq(dev_data->irq, dev); | |
84 | dev_data->irq = 0; | |
85 | } | |
86 | dev_data->isr_on = enable; | |
87 | dev_data->ack_intr = enable; | |
88 | out: | |
89 | dev_dbg(&dev->dev, "%s: #%d %s %s%s %s\n", | |
90 | dev_data->irq_name, | |
91 | dev_data->irq, | |
92 | pci_is_enabled(dev) ? "on" : "off", | |
93 | dev->msi_enabled ? "MSI" : "", | |
94 | dev->msix_enabled ? "MSI/X" : "", | |
95 | enable ? (dev_data->isr_on ? "enabled" : "failed to enable") : | |
96 | (dev_data->isr_on ? "failed to disable" : "disabled")); | |
97 | } | |
98 | ||
30edc14b | 99 | /* Ensure a device is "turned off" and ready to be exported. |
a92336a1 | 100 | * (Also see xen_pcibk_config_reset to ensure virtual configuration space is |
30edc14b KRW |
101 | * ready to be re-exported) |
102 | */ | |
a92336a1 | 103 | void xen_pcibk_reset_device(struct pci_dev *dev) |
30edc14b KRW |
104 | { |
105 | u16 cmd; | |
106 | ||
a92336a1 | 107 | xen_pcibk_control_isr(dev, 1 /* reset device */); |
0513fe9e | 108 | |
30edc14b KRW |
109 | /* Disable devices (but not bridges) */ |
110 | if (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) { | |
a2be65fd KRW |
111 | #ifdef CONFIG_PCI_MSI |
112 | /* The guest could have been abruptly killed without | |
113 | * disabling MSI/MSI-X interrupts.*/ | |
114 | if (dev->msix_enabled) | |
115 | pci_disable_msix(dev); | |
116 | if (dev->msi_enabled) | |
117 | pci_disable_msi(dev); | |
118 | #endif | |
bdc5c181 KRW |
119 | if (pci_is_enabled(dev)) |
120 | pci_disable_device(dev); | |
30edc14b KRW |
121 | |
122 | pci_write_config_word(dev, PCI_COMMAND, 0); | |
123 | ||
124 | dev->is_busmaster = 0; | |
125 | } else { | |
126 | pci_read_config_word(dev, PCI_COMMAND, &cmd); | |
127 | if (cmd & (PCI_COMMAND_INVALIDATE)) { | |
128 | cmd &= ~(PCI_COMMAND_INVALIDATE); | |
129 | pci_write_config_word(dev, PCI_COMMAND, cmd); | |
130 | ||
131 | dev->is_busmaster = 0; | |
132 | } | |
133 | } | |
134 | } | |
a92336a1 KRW |
135 | |
136 | #ifdef CONFIG_PCI_MSI | |
137 | static | |
138 | int xen_pcibk_enable_msi(struct xen_pcibk_device *pdev, | |
139 | struct pci_dev *dev, struct xen_pci_op *op) | |
140 | { | |
141 | struct xen_pcibk_dev_data *dev_data; | |
a92336a1 KRW |
142 | int status; |
143 | ||
144 | if (unlikely(verbose_request)) | |
145 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI\n", pci_name(dev)); | |
146 | ||
147 | status = pci_enable_msi(dev); | |
148 | ||
149 | if (status) { | |
283c0972 | 150 | pr_warn_ratelimited("%s: error enabling MSI for guest %u: err %d\n", |
51ac8893 JB |
151 | pci_name(dev), pdev->xdev->otherend_id, |
152 | status); | |
a92336a1 KRW |
153 | op->value = 0; |
154 | return XEN_PCI_ERR_op_failed; | |
155 | } | |
156 | ||
157 | /* The value the guest needs is actually the IDT vector, not the | |
158 | * the local domain's IRQ number. */ | |
159 | ||
160 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
161 | if (unlikely(verbose_request)) | |
162 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
163 | op->value); | |
164 | ||
165 | dev_data = pci_get_drvdata(dev); | |
166 | if (dev_data) | |
167 | dev_data->ack_intr = 0; | |
168 | ||
169 | return 0; | |
170 | } | |
171 | ||
172 | static | |
173 | int xen_pcibk_disable_msi(struct xen_pcibk_device *pdev, | |
174 | struct pci_dev *dev, struct xen_pci_op *op) | |
175 | { | |
176 | struct xen_pcibk_dev_data *dev_data; | |
177 | ||
178 | if (unlikely(verbose_request)) | |
179 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI\n", | |
180 | pci_name(dev)); | |
181 | pci_disable_msi(dev); | |
182 | ||
183 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
184 | if (unlikely(verbose_request)) | |
185 | printk(KERN_DEBUG DRV_NAME ": %s: MSI: %d\n", pci_name(dev), | |
186 | op->value); | |
187 | dev_data = pci_get_drvdata(dev); | |
188 | if (dev_data) | |
189 | dev_data->ack_intr = 1; | |
190 | return 0; | |
191 | } | |
192 | ||
193 | static | |
194 | int xen_pcibk_enable_msix(struct xen_pcibk_device *pdev, | |
195 | struct pci_dev *dev, struct xen_pci_op *op) | |
196 | { | |
197 | struct xen_pcibk_dev_data *dev_data; | |
198 | int i, result; | |
199 | struct msix_entry *entries; | |
200 | ||
201 | if (unlikely(verbose_request)) | |
202 | printk(KERN_DEBUG DRV_NAME ": %s: enable MSI-X\n", | |
203 | pci_name(dev)); | |
204 | if (op->value > SH_INFO_MAX_VEC) | |
205 | return -EINVAL; | |
206 | ||
207 | entries = kmalloc(op->value * sizeof(*entries), GFP_KERNEL); | |
208 | if (entries == NULL) | |
209 | return -ENOMEM; | |
210 | ||
211 | for (i = 0; i < op->value; i++) { | |
212 | entries[i].entry = op->msix_entries[i].entry; | |
213 | entries[i].vector = op->msix_entries[i].vector; | |
214 | } | |
215 | ||
efdfa3ed | 216 | result = pci_enable_msix_exact(dev, entries, op->value); |
a92336a1 KRW |
217 | if (result == 0) { |
218 | for (i = 0; i < op->value; i++) { | |
219 | op->msix_entries[i].entry = entries[i].entry; | |
c0914e61 | 220 | if (entries[i].vector) { |
a92336a1 KRW |
221 | op->msix_entries[i].vector = |
222 | xen_pirq_from_irq(entries[i].vector); | |
223 | if (unlikely(verbose_request)) | |
224 | printk(KERN_DEBUG DRV_NAME ": %s: " \ | |
225 | "MSI-X[%d]: %d\n", | |
226 | pci_name(dev), i, | |
227 | op->msix_entries[i].vector); | |
c0914e61 | 228 | } |
a92336a1 | 229 | } |
51ac8893 | 230 | } else |
283c0972 | 231 | pr_warn_ratelimited("%s: error enabling MSI-X for guest %u: err %d!\n", |
51ac8893 JB |
232 | pci_name(dev), pdev->xdev->otherend_id, |
233 | result); | |
a92336a1 KRW |
234 | kfree(entries); |
235 | ||
236 | op->value = result; | |
237 | dev_data = pci_get_drvdata(dev); | |
238 | if (dev_data) | |
239 | dev_data->ack_intr = 0; | |
240 | ||
0ee46eca | 241 | return result > 0 ? 0 : result; |
a92336a1 KRW |
242 | } |
243 | ||
244 | static | |
245 | int xen_pcibk_disable_msix(struct xen_pcibk_device *pdev, | |
246 | struct pci_dev *dev, struct xen_pci_op *op) | |
247 | { | |
248 | struct xen_pcibk_dev_data *dev_data; | |
249 | if (unlikely(verbose_request)) | |
250 | printk(KERN_DEBUG DRV_NAME ": %s: disable MSI-X\n", | |
251 | pci_name(dev)); | |
252 | pci_disable_msix(dev); | |
253 | ||
254 | /* | |
255 | * SR-IOV devices (which don't have any legacy IRQ) have | |
256 | * an undefined IRQ value of zero. | |
257 | */ | |
258 | op->value = dev->irq ? xen_pirq_from_irq(dev->irq) : 0; | |
259 | if (unlikely(verbose_request)) | |
260 | printk(KERN_DEBUG DRV_NAME ": %s: MSI-X: %d\n", pci_name(dev), | |
261 | op->value); | |
262 | dev_data = pci_get_drvdata(dev); | |
263 | if (dev_data) | |
264 | dev_data->ack_intr = 1; | |
265 | return 0; | |
266 | } | |
267 | #endif | |
30edc14b KRW |
268 | /* |
269 | * Now the same evtchn is used for both pcifront conf_read_write request | |
270 | * as well as pcie aer front end ack. We use a new work_queue to schedule | |
a92336a1 | 271 | * xen_pcibk conf_read_write service for avoiding confict with aer_core |
30edc14b KRW |
272 | * do_recovery job which also use the system default work_queue |
273 | */ | |
a92336a1 | 274 | void xen_pcibk_test_and_schedule_op(struct xen_pcibk_device *pdev) |
30edc14b KRW |
275 | { |
276 | /* Check that frontend is requesting an operation and that we are not | |
277 | * already processing a request */ | |
278 | if (test_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags) | |
279 | && !test_and_set_bit(_PDEVF_op_active, &pdev->flags)) { | |
a92336a1 | 280 | queue_work(xen_pcibk_wq, &pdev->op_work); |
30edc14b KRW |
281 | } |
282 | /*_XEN_PCIB_active should have been cleared by pcifront. And also make | |
a92336a1 | 283 | sure xen_pcibk is waiting for ack by checking _PCIB_op_pending*/ |
30edc14b KRW |
284 | if (!test_bit(_XEN_PCIB_active, (unsigned long *)&pdev->sh_info->flags) |
285 | && test_bit(_PCIB_op_pending, &pdev->flags)) { | |
a92336a1 | 286 | wake_up(&xen_pcibk_aer_wait_queue); |
30edc14b KRW |
287 | } |
288 | } | |
289 | ||
290 | /* Performing the configuration space reads/writes must not be done in atomic | |
291 | * context because some of the pci_* functions can sleep (mostly due to ACPI | |
292 | * use of semaphores). This function is intended to be called from a work | |
a92336a1 | 293 | * queue in process context taking a struct xen_pcibk_device as a parameter */ |
30edc14b | 294 | |
a92336a1 | 295 | void xen_pcibk_do_op(struct work_struct *data) |
30edc14b | 296 | { |
a92336a1 KRW |
297 | struct xen_pcibk_device *pdev = |
298 | container_of(data, struct xen_pcibk_device, op_work); | |
30edc14b | 299 | struct pci_dev *dev; |
a92336a1 | 300 | struct xen_pcibk_dev_data *dev_data = NULL; |
30edc14b | 301 | struct xen_pci_op *op = &pdev->sh_info->op; |
0513fe9e | 302 | int test_intx = 0; |
30edc14b | 303 | |
a92336a1 | 304 | dev = xen_pcibk_get_pci_dev(pdev, op->domain, op->bus, op->devfn); |
30edc14b KRW |
305 | |
306 | if (dev == NULL) | |
307 | op->err = XEN_PCI_ERR_dev_not_found; | |
308 | else { | |
0513fe9e KRW |
309 | dev_data = pci_get_drvdata(dev); |
310 | if (dev_data) | |
311 | test_intx = dev_data->enable_intx; | |
30edc14b KRW |
312 | switch (op->cmd) { |
313 | case XEN_PCI_OP_conf_read: | |
a92336a1 | 314 | op->err = xen_pcibk_config_read(dev, |
30edc14b KRW |
315 | op->offset, op->size, &op->value); |
316 | break; | |
317 | case XEN_PCI_OP_conf_write: | |
a92336a1 | 318 | op->err = xen_pcibk_config_write(dev, |
30edc14b KRW |
319 | op->offset, op->size, op->value); |
320 | break; | |
321 | #ifdef CONFIG_PCI_MSI | |
322 | case XEN_PCI_OP_enable_msi: | |
a92336a1 | 323 | op->err = xen_pcibk_enable_msi(pdev, dev, op); |
30edc14b KRW |
324 | break; |
325 | case XEN_PCI_OP_disable_msi: | |
a92336a1 | 326 | op->err = xen_pcibk_disable_msi(pdev, dev, op); |
30edc14b KRW |
327 | break; |
328 | case XEN_PCI_OP_enable_msix: | |
a92336a1 | 329 | op->err = xen_pcibk_enable_msix(pdev, dev, op); |
30edc14b KRW |
330 | break; |
331 | case XEN_PCI_OP_disable_msix: | |
a92336a1 | 332 | op->err = xen_pcibk_disable_msix(pdev, dev, op); |
30edc14b KRW |
333 | break; |
334 | #endif | |
335 | default: | |
336 | op->err = XEN_PCI_ERR_not_implemented; | |
337 | break; | |
338 | } | |
339 | } | |
0513fe9e KRW |
340 | if (!op->err && dev && dev_data) { |
341 | /* Transition detected */ | |
342 | if ((dev_data->enable_intx != test_intx)) | |
a92336a1 | 343 | xen_pcibk_control_isr(dev, 0 /* no reset */); |
0513fe9e | 344 | } |
30edc14b KRW |
345 | /* Tell the driver domain that we're done. */ |
346 | wmb(); | |
347 | clear_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); | |
348 | notify_remote_via_irq(pdev->evtchn_irq); | |
349 | ||
350 | /* Mark that we're done. */ | |
351 | smp_mb__before_clear_bit(); /* /after/ clearing PCIF_active */ | |
352 | clear_bit(_PDEVF_op_active, &pdev->flags); | |
353 | smp_mb__after_clear_bit(); /* /before/ final check for work */ | |
354 | ||
355 | /* Check to see if the driver domain tried to start another request in | |
356 | * between clearing _XEN_PCIF_active and clearing _PDEVF_op_active. | |
357 | */ | |
a92336a1 | 358 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
359 | } |
360 | ||
a92336a1 | 361 | irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id) |
30edc14b | 362 | { |
a92336a1 | 363 | struct xen_pcibk_device *pdev = dev_id; |
30edc14b | 364 | |
a92336a1 | 365 | xen_pcibk_test_and_schedule_op(pdev); |
30edc14b KRW |
366 | |
367 | return IRQ_HANDLED; | |
368 | } | |
a92336a1 | 369 | static irqreturn_t xen_pcibk_guest_interrupt(int irq, void *dev_id) |
0513fe9e KRW |
370 | { |
371 | struct pci_dev *dev = (struct pci_dev *)dev_id; | |
a92336a1 | 372 | struct xen_pcibk_dev_data *dev_data = pci_get_drvdata(dev); |
0513fe9e KRW |
373 | |
374 | if (dev_data->isr_on && dev_data->ack_intr) { | |
375 | dev_data->handled++; | |
376 | if ((dev_data->handled % 1000) == 0) { | |
377 | if (xen_test_irq_shared(irq)) { | |
283c0972 | 378 | pr_info("%s IRQ line is not shared " |
0513fe9e KRW |
379 | "with other domains. Turning ISR off\n", |
380 | dev_data->irq_name); | |
381 | dev_data->ack_intr = 0; | |
382 | } | |
383 | } | |
384 | return IRQ_HANDLED; | |
385 | } | |
386 | return IRQ_NONE; | |
387 | } |