Commit | Line | Data |
---|---|---|
89e1f7d4 AW |
1 | /* |
2 | * VFIO PCI config space virtualization | |
3 | * | |
4 | * Copyright (C) 2012 Red Hat, Inc. All rights reserved. | |
5 | * Author: Alex Williamson <alex.williamson@redhat.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | * Derived from original vfio: | |
12 | * Copyright 2010 Cisco Systems, Inc. All rights reserved. | |
13 | * Author: Tom Lyon, pugs@cisco.com | |
14 | */ | |
15 | ||
16 | /* | |
17 | * This code handles reading and writing of PCI configuration registers. | |
18 | * This is hairy because we want to allow a lot of flexibility to the | |
19 | * user driver, but cannot trust it with all of the config fields. | |
20 | * Tables determine which fields can be read and written, as well as | |
21 | * which fields are 'virtualized' - special actions and translations to | |
22 | * make it appear to the user that he has control, when in fact things | |
23 | * must be negotiated with the underlying OS. | |
24 | */ | |
25 | ||
26 | #include <linux/fs.h> | |
27 | #include <linux/pci.h> | |
28 | #include <linux/uaccess.h> | |
29 | #include <linux/vfio.h> | |
25e9789d | 30 | #include <linux/slab.h> |
89e1f7d4 AW |
31 | |
32 | #include "vfio_pci_private.h" | |
33 | ||
34 | #define PCI_CFG_SPACE_SIZE 256 | |
35 | ||
36 | /* Useful "pseudo" capabilities */ | |
37 | #define PCI_CAP_ID_BASIC 0 | |
38 | #define PCI_CAP_ID_INVALID 0xFF | |
39 | ||
40 | #define is_bar(offset) \ | |
41 | ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ | |
42 | (offset >= PCI_ROM_ADDRESS && offset < PCI_ROM_ADDRESS + 4)) | |
43 | ||
44 | /* | |
45 | * Lengths of PCI Config Capabilities | |
46 | * 0: Removed from the user visible capability list | |
47 | * FF: Variable length | |
48 | */ | |
49 | static u8 pci_cap_length[] = { | |
50 | [PCI_CAP_ID_BASIC] = PCI_STD_HEADER_SIZEOF, /* pci config header */ | |
51 | [PCI_CAP_ID_PM] = PCI_PM_SIZEOF, | |
52 | [PCI_CAP_ID_AGP] = PCI_AGP_SIZEOF, | |
53 | [PCI_CAP_ID_VPD] = PCI_CAP_VPD_SIZEOF, | |
54 | [PCI_CAP_ID_SLOTID] = 0, /* bridge - don't care */ | |
55 | [PCI_CAP_ID_MSI] = 0xFF, /* 10, 14, 20, or 24 */ | |
56 | [PCI_CAP_ID_CHSWP] = 0, /* cpci - not yet */ | |
57 | [PCI_CAP_ID_PCIX] = 0xFF, /* 8 or 24 */ | |
58 | [PCI_CAP_ID_HT] = 0xFF, /* hypertransport */ | |
59 | [PCI_CAP_ID_VNDR] = 0xFF, /* variable */ | |
60 | [PCI_CAP_ID_DBG] = 0, /* debug - don't care */ | |
61 | [PCI_CAP_ID_CCRC] = 0, /* cpci - not yet */ | |
62 | [PCI_CAP_ID_SHPC] = 0, /* hotswap - not yet */ | |
63 | [PCI_CAP_ID_SSVID] = 0, /* bridge - don't care */ | |
64 | [PCI_CAP_ID_AGP3] = 0, /* AGP8x - not yet */ | |
65 | [PCI_CAP_ID_SECDEV] = 0, /* secure device not yet */ | |
66 | [PCI_CAP_ID_EXP] = 0xFF, /* 20 or 44 */ | |
67 | [PCI_CAP_ID_MSIX] = PCI_CAP_MSIX_SIZEOF, | |
68 | [PCI_CAP_ID_SATA] = 0xFF, | |
69 | [PCI_CAP_ID_AF] = PCI_CAP_AF_SIZEOF, | |
70 | }; | |
71 | ||
72 | /* | |
73 | * Lengths of PCIe/PCI-X Extended Config Capabilities | |
74 | * 0: Removed or masked from the user visible capabilty list | |
75 | * FF: Variable length | |
76 | */ | |
77 | static u16 pci_ext_cap_length[] = { | |
78 | [PCI_EXT_CAP_ID_ERR] = PCI_ERR_ROOT_COMMAND, | |
79 | [PCI_EXT_CAP_ID_VC] = 0xFF, | |
80 | [PCI_EXT_CAP_ID_DSN] = PCI_EXT_CAP_DSN_SIZEOF, | |
81 | [PCI_EXT_CAP_ID_PWR] = PCI_EXT_CAP_PWR_SIZEOF, | |
82 | [PCI_EXT_CAP_ID_RCLD] = 0, /* root only - don't care */ | |
83 | [PCI_EXT_CAP_ID_RCILC] = 0, /* root only - don't care */ | |
84 | [PCI_EXT_CAP_ID_RCEC] = 0, /* root only - don't care */ | |
85 | [PCI_EXT_CAP_ID_MFVC] = 0xFF, | |
86 | [PCI_EXT_CAP_ID_VC9] = 0xFF, /* same as CAP_ID_VC */ | |
87 | [PCI_EXT_CAP_ID_RCRB] = 0, /* root only - don't care */ | |
88 | [PCI_EXT_CAP_ID_VNDR] = 0xFF, | |
89 | [PCI_EXT_CAP_ID_CAC] = 0, /* obsolete */ | |
90 | [PCI_EXT_CAP_ID_ACS] = 0xFF, | |
91 | [PCI_EXT_CAP_ID_ARI] = PCI_EXT_CAP_ARI_SIZEOF, | |
92 | [PCI_EXT_CAP_ID_ATS] = PCI_EXT_CAP_ATS_SIZEOF, | |
93 | [PCI_EXT_CAP_ID_SRIOV] = PCI_EXT_CAP_SRIOV_SIZEOF, | |
94 | [PCI_EXT_CAP_ID_MRIOV] = 0, /* not yet */ | |
95 | [PCI_EXT_CAP_ID_MCAST] = PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF, | |
96 | [PCI_EXT_CAP_ID_PRI] = PCI_EXT_CAP_PRI_SIZEOF, | |
97 | [PCI_EXT_CAP_ID_AMD_XXX] = 0, /* not yet */ | |
98 | [PCI_EXT_CAP_ID_REBAR] = 0xFF, | |
99 | [PCI_EXT_CAP_ID_DPA] = 0xFF, | |
100 | [PCI_EXT_CAP_ID_TPH] = 0xFF, | |
101 | [PCI_EXT_CAP_ID_LTR] = PCI_EXT_CAP_LTR_SIZEOF, | |
102 | [PCI_EXT_CAP_ID_SECPCI] = 0, /* not yet */ | |
103 | [PCI_EXT_CAP_ID_PMUX] = 0, /* not yet */ | |
104 | [PCI_EXT_CAP_ID_PASID] = 0, /* not yet */ | |
105 | }; | |
106 | ||
107 | /* | |
108 | * Read/Write Permission Bits - one bit for each bit in capability | |
109 | * Any field can be read if it exists, but what is read depends on | |
110 | * whether the field is 'virtualized', or just pass thru to the | |
111 | * hardware. Any virtualized field is also virtualized for writes. | |
112 | * Writes are only permitted if they have a 1 bit here. | |
113 | */ | |
114 | struct perm_bits { | |
115 | u8 *virt; /* read/write virtual data, not hw */ | |
116 | u8 *write; /* writeable bits */ | |
117 | int (*readfn)(struct vfio_pci_device *vdev, int pos, int count, | |
118 | struct perm_bits *perm, int offset, __le32 *val); | |
119 | int (*writefn)(struct vfio_pci_device *vdev, int pos, int count, | |
120 | struct perm_bits *perm, int offset, __le32 val); | |
121 | }; | |
122 | ||
123 | #define NO_VIRT 0 | |
124 | #define ALL_VIRT 0xFFFFFFFFU | |
125 | #define NO_WRITE 0 | |
126 | #define ALL_WRITE 0xFFFFFFFFU | |
127 | ||
128 | static int vfio_user_config_read(struct pci_dev *pdev, int offset, | |
129 | __le32 *val, int count) | |
130 | { | |
131 | int ret = -EINVAL; | |
132 | u32 tmp_val = 0; | |
133 | ||
134 | switch (count) { | |
135 | case 1: | |
136 | { | |
137 | u8 tmp; | |
138 | ret = pci_user_read_config_byte(pdev, offset, &tmp); | |
139 | tmp_val = tmp; | |
140 | break; | |
141 | } | |
142 | case 2: | |
143 | { | |
144 | u16 tmp; | |
145 | ret = pci_user_read_config_word(pdev, offset, &tmp); | |
146 | tmp_val = tmp; | |
147 | break; | |
148 | } | |
149 | case 4: | |
150 | ret = pci_user_read_config_dword(pdev, offset, &tmp_val); | |
151 | break; | |
152 | } | |
153 | ||
154 | *val = cpu_to_le32(tmp_val); | |
155 | ||
156 | return pcibios_err_to_errno(ret); | |
157 | } | |
158 | ||
159 | static int vfio_user_config_write(struct pci_dev *pdev, int offset, | |
160 | __le32 val, int count) | |
161 | { | |
162 | int ret = -EINVAL; | |
163 | u32 tmp_val = le32_to_cpu(val); | |
164 | ||
165 | switch (count) { | |
166 | case 1: | |
167 | ret = pci_user_write_config_byte(pdev, offset, tmp_val); | |
168 | break; | |
169 | case 2: | |
170 | ret = pci_user_write_config_word(pdev, offset, tmp_val); | |
171 | break; | |
172 | case 4: | |
173 | ret = pci_user_write_config_dword(pdev, offset, tmp_val); | |
174 | break; | |
175 | } | |
176 | ||
177 | return pcibios_err_to_errno(ret); | |
178 | } | |
179 | ||
180 | static int vfio_default_config_read(struct vfio_pci_device *vdev, int pos, | |
181 | int count, struct perm_bits *perm, | |
182 | int offset, __le32 *val) | |
183 | { | |
184 | __le32 virt = 0; | |
185 | ||
186 | memcpy(val, vdev->vconfig + pos, count); | |
187 | ||
188 | memcpy(&virt, perm->virt + offset, count); | |
189 | ||
190 | /* Any non-virtualized bits? */ | |
191 | if (cpu_to_le32(~0U >> (32 - (count * 8))) != virt) { | |
192 | struct pci_dev *pdev = vdev->pdev; | |
193 | __le32 phys_val = 0; | |
194 | int ret; | |
195 | ||
196 | ret = vfio_user_config_read(pdev, pos, &phys_val, count); | |
197 | if (ret) | |
198 | return ret; | |
199 | ||
200 | *val = (phys_val & ~virt) | (*val & virt); | |
201 | } | |
202 | ||
203 | return count; | |
204 | } | |
205 | ||
206 | static int vfio_default_config_write(struct vfio_pci_device *vdev, int pos, | |
207 | int count, struct perm_bits *perm, | |
208 | int offset, __le32 val) | |
209 | { | |
210 | __le32 virt = 0, write = 0; | |
211 | ||
212 | memcpy(&write, perm->write + offset, count); | |
213 | ||
214 | if (!write) | |
215 | return count; /* drop, no writable bits */ | |
216 | ||
217 | memcpy(&virt, perm->virt + offset, count); | |
218 | ||
219 | /* Virtualized and writable bits go to vconfig */ | |
220 | if (write & virt) { | |
221 | __le32 virt_val = 0; | |
222 | ||
223 | memcpy(&virt_val, vdev->vconfig + pos, count); | |
224 | ||
225 | virt_val &= ~(write & virt); | |
226 | virt_val |= (val & (write & virt)); | |
227 | ||
228 | memcpy(vdev->vconfig + pos, &virt_val, count); | |
229 | } | |
230 | ||
231 | /* Non-virtualzed and writable bits go to hardware */ | |
232 | if (write & ~virt) { | |
233 | struct pci_dev *pdev = vdev->pdev; | |
234 | __le32 phys_val = 0; | |
235 | int ret; | |
236 | ||
237 | ret = vfio_user_config_read(pdev, pos, &phys_val, count); | |
238 | if (ret) | |
239 | return ret; | |
240 | ||
241 | phys_val &= ~(write & ~virt); | |
242 | phys_val |= (val & (write & ~virt)); | |
243 | ||
244 | ret = vfio_user_config_write(pdev, pos, phys_val, count); | |
245 | if (ret) | |
246 | return ret; | |
247 | } | |
248 | ||
249 | return count; | |
250 | } | |
251 | ||
252 | /* Allow direct read from hardware, except for capability next pointer */ | |
253 | static int vfio_direct_config_read(struct vfio_pci_device *vdev, int pos, | |
254 | int count, struct perm_bits *perm, | |
255 | int offset, __le32 *val) | |
256 | { | |
257 | int ret; | |
258 | ||
259 | ret = vfio_user_config_read(vdev->pdev, pos, val, count); | |
260 | if (ret) | |
261 | return pcibios_err_to_errno(ret); | |
262 | ||
263 | if (pos >= PCI_CFG_SPACE_SIZE) { /* Extended cap header mangling */ | |
264 | if (offset < 4) | |
265 | memcpy(val, vdev->vconfig + pos, count); | |
266 | } else if (pos >= PCI_STD_HEADER_SIZEOF) { /* Std cap mangling */ | |
267 | if (offset == PCI_CAP_LIST_ID && count > 1) | |
268 | memcpy(val, vdev->vconfig + pos, | |
269 | min(PCI_CAP_FLAGS, count)); | |
270 | else if (offset == PCI_CAP_LIST_NEXT) | |
271 | memcpy(val, vdev->vconfig + pos, 1); | |
272 | } | |
273 | ||
274 | return count; | |
275 | } | |
276 | ||
a7d1ea1c AW |
277 | /* Raw access skips any kind of virtualization */ |
278 | static int vfio_raw_config_write(struct vfio_pci_device *vdev, int pos, | |
279 | int count, struct perm_bits *perm, | |
280 | int offset, __le32 val) | |
89e1f7d4 AW |
281 | { |
282 | int ret; | |
283 | ||
284 | ret = vfio_user_config_write(vdev->pdev, pos, val, count); | |
285 | if (ret) | |
286 | return ret; | |
287 | ||
288 | return count; | |
289 | } | |
290 | ||
a7d1ea1c AW |
291 | static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos, |
292 | int count, struct perm_bits *perm, | |
293 | int offset, __le32 *val) | |
294 | { | |
295 | int ret; | |
296 | ||
297 | ret = vfio_user_config_read(vdev->pdev, pos, val, count); | |
298 | if (ret) | |
299 | return pcibios_err_to_errno(ret); | |
300 | ||
301 | return count; | |
302 | } | |
303 | ||
304 | /* Default capability regions to read-only, no-virtualization */ | |
89e1f7d4 AW |
305 | static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { |
306 | [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } | |
307 | }; | |
308 | static struct perm_bits ecap_perms[PCI_EXT_CAP_ID_MAX + 1] = { | |
309 | [0 ... PCI_EXT_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } | |
310 | }; | |
a7d1ea1c AW |
311 | /* |
312 | * Default unassigned regions to raw read-write access. Some devices | |
313 | * require this to function as they hide registers between the gaps in | |
314 | * config space (be2net). Like MMIO and I/O port registers, we have | |
315 | * to trust the hardware isolation. | |
316 | */ | |
317 | static struct perm_bits unassigned_perms = { | |
318 | .readfn = vfio_raw_config_read, | |
319 | .writefn = vfio_raw_config_write | |
320 | }; | |
89e1f7d4 AW |
321 | |
322 | static void free_perm_bits(struct perm_bits *perm) | |
323 | { | |
324 | kfree(perm->virt); | |
325 | kfree(perm->write); | |
326 | perm->virt = NULL; | |
327 | perm->write = NULL; | |
328 | } | |
329 | ||
330 | static int alloc_perm_bits(struct perm_bits *perm, int size) | |
331 | { | |
332 | /* | |
333 | * Round up all permission bits to the next dword, this lets us | |
334 | * ignore whether a read/write exceeds the defined capability | |
335 | * structure. We can do this because: | |
336 | * - Standard config space is already dword aligned | |
337 | * - Capabilities are all dword alinged (bits 0:1 of next reserved) | |
338 | * - Express capabilities defined as dword aligned | |
339 | */ | |
340 | size = round_up(size, 4); | |
341 | ||
342 | /* | |
343 | * Zero state is | |
344 | * - All Readable, None Writeable, None Virtualized | |
345 | */ | |
346 | perm->virt = kzalloc(size, GFP_KERNEL); | |
347 | perm->write = kzalloc(size, GFP_KERNEL); | |
348 | if (!perm->virt || !perm->write) { | |
349 | free_perm_bits(perm); | |
350 | return -ENOMEM; | |
351 | } | |
352 | ||
353 | perm->readfn = vfio_default_config_read; | |
354 | perm->writefn = vfio_default_config_write; | |
355 | ||
356 | return 0; | |
357 | } | |
358 | ||
359 | /* | |
360 | * Helper functions for filling in permission tables | |
361 | */ | |
362 | static inline void p_setb(struct perm_bits *p, int off, u8 virt, u8 write) | |
363 | { | |
364 | p->virt[off] = virt; | |
365 | p->write[off] = write; | |
366 | } | |
367 | ||
368 | /* Handle endian-ness - pci and tables are little-endian */ | |
369 | static inline void p_setw(struct perm_bits *p, int off, u16 virt, u16 write) | |
370 | { | |
371 | *(__le16 *)(&p->virt[off]) = cpu_to_le16(virt); | |
372 | *(__le16 *)(&p->write[off]) = cpu_to_le16(write); | |
373 | } | |
374 | ||
375 | /* Handle endian-ness - pci and tables are little-endian */ | |
376 | static inline void p_setd(struct perm_bits *p, int off, u32 virt, u32 write) | |
377 | { | |
378 | *(__le32 *)(&p->virt[off]) = cpu_to_le32(virt); | |
379 | *(__le32 *)(&p->write[off]) = cpu_to_le32(write); | |
380 | } | |
381 | ||
382 | /* | |
383 | * Restore the *real* BARs after we detect a FLR or backdoor reset. | |
384 | * (backdoor = some device specific technique that we didn't catch) | |
385 | */ | |
386 | static void vfio_bar_restore(struct vfio_pci_device *vdev) | |
387 | { | |
388 | struct pci_dev *pdev = vdev->pdev; | |
389 | u32 *rbar = vdev->rbar; | |
390 | int i; | |
391 | ||
392 | if (pdev->is_virtfn) | |
393 | return; | |
394 | ||
395 | pr_info("%s: %s reset recovery - restoring bars\n", | |
396 | __func__, dev_name(&pdev->dev)); | |
397 | ||
398 | for (i = PCI_BASE_ADDRESS_0; i <= PCI_BASE_ADDRESS_5; i += 4, rbar++) | |
399 | pci_user_write_config_dword(pdev, i, *rbar); | |
400 | ||
401 | pci_user_write_config_dword(pdev, PCI_ROM_ADDRESS, *rbar); | |
402 | } | |
403 | ||
404 | static __le32 vfio_generate_bar_flags(struct pci_dev *pdev, int bar) | |
405 | { | |
406 | unsigned long flags = pci_resource_flags(pdev, bar); | |
407 | u32 val; | |
408 | ||
409 | if (flags & IORESOURCE_IO) | |
410 | return cpu_to_le32(PCI_BASE_ADDRESS_SPACE_IO); | |
411 | ||
412 | val = PCI_BASE_ADDRESS_SPACE_MEMORY; | |
413 | ||
414 | if (flags & IORESOURCE_PREFETCH) | |
415 | val |= PCI_BASE_ADDRESS_MEM_PREFETCH; | |
416 | ||
417 | if (flags & IORESOURCE_MEM_64) | |
418 | val |= PCI_BASE_ADDRESS_MEM_TYPE_64; | |
419 | ||
420 | return cpu_to_le32(val); | |
421 | } | |
422 | ||
423 | /* | |
424 | * Pretend we're hardware and tweak the values of the *virtual* PCI BARs | |
425 | * to reflect the hardware capabilities. This implements BAR sizing. | |
426 | */ | |
427 | static void vfio_bar_fixup(struct vfio_pci_device *vdev) | |
428 | { | |
429 | struct pci_dev *pdev = vdev->pdev; | |
430 | int i; | |
431 | __le32 *bar; | |
432 | u64 mask; | |
433 | ||
434 | bar = (__le32 *)&vdev->vconfig[PCI_BASE_ADDRESS_0]; | |
435 | ||
436 | for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++, bar++) { | |
437 | if (!pci_resource_start(pdev, i)) { | |
438 | *bar = 0; /* Unmapped by host = unimplemented to user */ | |
439 | continue; | |
440 | } | |
441 | ||
442 | mask = ~(pci_resource_len(pdev, i) - 1); | |
443 | ||
444 | *bar &= cpu_to_le32((u32)mask); | |
445 | *bar |= vfio_generate_bar_flags(pdev, i); | |
446 | ||
447 | if (*bar & cpu_to_le32(PCI_BASE_ADDRESS_MEM_TYPE_64)) { | |
448 | bar++; | |
449 | *bar &= cpu_to_le32((u32)(mask >> 32)); | |
450 | i++; | |
451 | } | |
452 | } | |
453 | ||
454 | bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; | |
455 | ||
456 | /* | |
457 | * NB. we expose the actual BAR size here, regardless of whether | |
458 | * we can read it. When we report the REGION_INFO for the ROM | |
459 | * we report what PCI tells us is the actual ROM size. | |
460 | */ | |
461 | if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { | |
462 | mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); | |
463 | mask |= PCI_ROM_ADDRESS_ENABLE; | |
464 | *bar &= cpu_to_le32((u32)mask); | |
465 | } else | |
466 | *bar = 0; | |
467 | ||
468 | vdev->bardirty = false; | |
469 | } | |
470 | ||
471 | static int vfio_basic_config_read(struct vfio_pci_device *vdev, int pos, | |
472 | int count, struct perm_bits *perm, | |
473 | int offset, __le32 *val) | |
474 | { | |
475 | if (is_bar(offset)) /* pos == offset for basic config */ | |
476 | vfio_bar_fixup(vdev); | |
477 | ||
478 | count = vfio_default_config_read(vdev, pos, count, perm, offset, val); | |
479 | ||
480 | /* Mask in virtual memory enable for SR-IOV devices */ | |
481 | if (offset == PCI_COMMAND && vdev->pdev->is_virtfn) { | |
482 | u16 cmd = le16_to_cpu(*(__le16 *)&vdev->vconfig[PCI_COMMAND]); | |
483 | u32 tmp_val = le32_to_cpu(*val); | |
484 | ||
485 | tmp_val |= cmd & PCI_COMMAND_MEMORY; | |
486 | *val = cpu_to_le32(tmp_val); | |
487 | } | |
488 | ||
489 | return count; | |
490 | } | |
491 | ||
492 | static int vfio_basic_config_write(struct vfio_pci_device *vdev, int pos, | |
493 | int count, struct perm_bits *perm, | |
494 | int offset, __le32 val) | |
495 | { | |
496 | struct pci_dev *pdev = vdev->pdev; | |
497 | __le16 *virt_cmd; | |
498 | u16 new_cmd = 0; | |
499 | int ret; | |
500 | ||
501 | virt_cmd = (__le16 *)&vdev->vconfig[PCI_COMMAND]; | |
502 | ||
503 | if (offset == PCI_COMMAND) { | |
504 | bool phys_mem, virt_mem, new_mem, phys_io, virt_io, new_io; | |
505 | u16 phys_cmd; | |
506 | ||
507 | ret = pci_user_read_config_word(pdev, PCI_COMMAND, &phys_cmd); | |
508 | if (ret) | |
509 | return ret; | |
510 | ||
511 | new_cmd = le32_to_cpu(val); | |
512 | ||
513 | phys_mem = !!(phys_cmd & PCI_COMMAND_MEMORY); | |
514 | virt_mem = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_MEMORY); | |
515 | new_mem = !!(new_cmd & PCI_COMMAND_MEMORY); | |
516 | ||
517 | phys_io = !!(phys_cmd & PCI_COMMAND_IO); | |
518 | virt_io = !!(le16_to_cpu(*virt_cmd) & PCI_COMMAND_IO); | |
519 | new_io = !!(new_cmd & PCI_COMMAND_IO); | |
520 | ||
521 | /* | |
522 | * If the user is writing mem/io enable (new_mem/io) and we | |
523 | * think it's already enabled (virt_mem/io), but the hardware | |
524 | * shows it disabled (phys_mem/io, then the device has | |
525 | * undergone some kind of backdoor reset and needs to be | |
526 | * restored before we allow it to enable the bars. | |
527 | * SR-IOV devices will trigger this, but we catch them later | |
528 | */ | |
529 | if ((new_mem && virt_mem && !phys_mem) || | |
530 | (new_io && virt_io && !phys_io)) | |
531 | vfio_bar_restore(vdev); | |
532 | } | |
533 | ||
534 | count = vfio_default_config_write(vdev, pos, count, perm, offset, val); | |
535 | if (count < 0) | |
536 | return count; | |
537 | ||
538 | /* | |
539 | * Save current memory/io enable bits in vconfig to allow for | |
540 | * the test above next time. | |
541 | */ | |
542 | if (offset == PCI_COMMAND) { | |
543 | u16 mask = PCI_COMMAND_MEMORY | PCI_COMMAND_IO; | |
544 | ||
545 | *virt_cmd &= cpu_to_le16(~mask); | |
546 | *virt_cmd |= cpu_to_le16(new_cmd & mask); | |
547 | } | |
548 | ||
549 | /* Emulate INTx disable */ | |
550 | if (offset >= PCI_COMMAND && offset <= PCI_COMMAND + 1) { | |
551 | bool virt_intx_disable; | |
552 | ||
553 | virt_intx_disable = !!(le16_to_cpu(*virt_cmd) & | |
554 | PCI_COMMAND_INTX_DISABLE); | |
555 | ||
556 | if (virt_intx_disable && !vdev->virq_disabled) { | |
557 | vdev->virq_disabled = true; | |
558 | vfio_pci_intx_mask(vdev); | |
559 | } else if (!virt_intx_disable && vdev->virq_disabled) { | |
560 | vdev->virq_disabled = false; | |
561 | vfio_pci_intx_unmask(vdev); | |
562 | } | |
563 | } | |
564 | ||
565 | if (is_bar(offset)) | |
566 | vdev->bardirty = true; | |
567 | ||
568 | return count; | |
569 | } | |
570 | ||
571 | /* Permissions for the Basic PCI Header */ | |
572 | static int __init init_pci_cap_basic_perm(struct perm_bits *perm) | |
573 | { | |
574 | if (alloc_perm_bits(perm, PCI_STD_HEADER_SIZEOF)) | |
575 | return -ENOMEM; | |
576 | ||
577 | perm->readfn = vfio_basic_config_read; | |
578 | perm->writefn = vfio_basic_config_write; | |
579 | ||
580 | /* Virtualized for SR-IOV functions, which just have FFFF */ | |
581 | p_setw(perm, PCI_VENDOR_ID, (u16)ALL_VIRT, NO_WRITE); | |
582 | p_setw(perm, PCI_DEVICE_ID, (u16)ALL_VIRT, NO_WRITE); | |
583 | ||
584 | /* | |
585 | * Virtualize INTx disable, we use it internally for interrupt | |
586 | * control and can emulate it for non-PCI 2.3 devices. | |
587 | */ | |
588 | p_setw(perm, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE, (u16)ALL_WRITE); | |
589 | ||
590 | /* Virtualize capability list, we might want to skip/disable */ | |
591 | p_setw(perm, PCI_STATUS, PCI_STATUS_CAP_LIST, NO_WRITE); | |
592 | ||
593 | /* No harm to write */ | |
594 | p_setb(perm, PCI_CACHE_LINE_SIZE, NO_VIRT, (u8)ALL_WRITE); | |
595 | p_setb(perm, PCI_LATENCY_TIMER, NO_VIRT, (u8)ALL_WRITE); | |
596 | p_setb(perm, PCI_BIST, NO_VIRT, (u8)ALL_WRITE); | |
597 | ||
598 | /* Virtualize all bars, can't touch the real ones */ | |
599 | p_setd(perm, PCI_BASE_ADDRESS_0, ALL_VIRT, ALL_WRITE); | |
600 | p_setd(perm, PCI_BASE_ADDRESS_1, ALL_VIRT, ALL_WRITE); | |
601 | p_setd(perm, PCI_BASE_ADDRESS_2, ALL_VIRT, ALL_WRITE); | |
602 | p_setd(perm, PCI_BASE_ADDRESS_3, ALL_VIRT, ALL_WRITE); | |
603 | p_setd(perm, PCI_BASE_ADDRESS_4, ALL_VIRT, ALL_WRITE); | |
604 | p_setd(perm, PCI_BASE_ADDRESS_5, ALL_VIRT, ALL_WRITE); | |
605 | p_setd(perm, PCI_ROM_ADDRESS, ALL_VIRT, ALL_WRITE); | |
606 | ||
607 | /* Allow us to adjust capability chain */ | |
608 | p_setb(perm, PCI_CAPABILITY_LIST, (u8)ALL_VIRT, NO_WRITE); | |
609 | ||
610 | /* Sometimes used by sw, just virtualize */ | |
611 | p_setb(perm, PCI_INTERRUPT_LINE, (u8)ALL_VIRT, (u8)ALL_WRITE); | |
612 | return 0; | |
613 | } | |
614 | ||
2dd11948 AW |
615 | static int vfio_pm_config_write(struct vfio_pci_device *vdev, int pos, |
616 | int count, struct perm_bits *perm, | |
617 | int offset, __le32 val) | |
618 | { | |
619 | count = vfio_default_config_write(vdev, pos, count, perm, offset, val); | |
620 | if (count < 0) | |
621 | return count; | |
622 | ||
623 | if (offset == PCI_PM_CTRL) { | |
624 | pci_power_t state; | |
625 | ||
626 | switch (le32_to_cpu(val) & PCI_PM_CTRL_STATE_MASK) { | |
627 | case 0: | |
628 | state = PCI_D0; | |
629 | break; | |
630 | case 1: | |
631 | state = PCI_D1; | |
632 | break; | |
633 | case 2: | |
634 | state = PCI_D2; | |
635 | break; | |
636 | case 3: | |
637 | state = PCI_D3hot; | |
638 | break; | |
639 | } | |
640 | ||
641 | pci_set_power_state(vdev->pdev, state); | |
642 | } | |
643 | ||
644 | return count; | |
645 | } | |
646 | ||
89e1f7d4 AW |
647 | /* Permissions for the Power Management capability */ |
648 | static int __init init_pci_cap_pm_perm(struct perm_bits *perm) | |
649 | { | |
650 | if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_PM])) | |
651 | return -ENOMEM; | |
652 | ||
2dd11948 AW |
653 | perm->writefn = vfio_pm_config_write; |
654 | ||
89e1f7d4 AW |
655 | /* |
656 | * We always virtualize the next field so we can remove | |
657 | * capabilities from the chain if we want to. | |
658 | */ | |
659 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | |
660 | ||
661 | /* | |
2dd11948 AW |
662 | * Power management is defined *per function*, so we can let |
663 | * the user change power state, but we trap and initiate the | |
664 | * change ourselves, so the state bits are read-only. | |
89e1f7d4 | 665 | */ |
2dd11948 | 666 | p_setd(perm, PCI_PM_CTRL, NO_VIRT, ~PCI_PM_CTRL_STATE_MASK); |
89e1f7d4 AW |
667 | return 0; |
668 | } | |
669 | ||
670 | /* Permissions for PCI-X capability */ | |
671 | static int __init init_pci_cap_pcix_perm(struct perm_bits *perm) | |
672 | { | |
673 | /* Alloc 24, but only 8 are used in v0 */ | |
674 | if (alloc_perm_bits(perm, PCI_CAP_PCIX_SIZEOF_V2)) | |
675 | return -ENOMEM; | |
676 | ||
677 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | |
678 | ||
679 | p_setw(perm, PCI_X_CMD, NO_VIRT, (u16)ALL_WRITE); | |
680 | p_setd(perm, PCI_X_ECC_CSR, NO_VIRT, ALL_WRITE); | |
681 | return 0; | |
682 | } | |
683 | ||
684 | /* Permissions for PCI Express capability */ | |
685 | static int __init init_pci_cap_exp_perm(struct perm_bits *perm) | |
686 | { | |
687 | /* Alloc larger of two possible sizes */ | |
688 | if (alloc_perm_bits(perm, PCI_CAP_EXP_ENDPOINT_SIZEOF_V2)) | |
689 | return -ENOMEM; | |
690 | ||
691 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | |
692 | ||
693 | /* | |
694 | * Allow writes to device control fields (includes FLR!) | |
695 | * but not to devctl_phantom which could confuse IOMMU | |
696 | * or to the ARI bit in devctl2 which is set at probe time | |
697 | */ | |
698 | p_setw(perm, PCI_EXP_DEVCTL, NO_VIRT, ~PCI_EXP_DEVCTL_PHANTOM); | |
699 | p_setw(perm, PCI_EXP_DEVCTL2, NO_VIRT, ~PCI_EXP_DEVCTL2_ARI); | |
700 | return 0; | |
701 | } | |
702 | ||
703 | /* Permissions for Advanced Function capability */ | |
704 | static int __init init_pci_cap_af_perm(struct perm_bits *perm) | |
705 | { | |
706 | if (alloc_perm_bits(perm, pci_cap_length[PCI_CAP_ID_AF])) | |
707 | return -ENOMEM; | |
708 | ||
709 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | |
710 | p_setb(perm, PCI_AF_CTRL, NO_VIRT, PCI_AF_CTRL_FLR); | |
711 | return 0; | |
712 | } | |
713 | ||
714 | /* Permissions for Advanced Error Reporting extended capability */ | |
715 | static int __init init_pci_ext_cap_err_perm(struct perm_bits *perm) | |
716 | { | |
717 | u32 mask; | |
718 | ||
719 | if (alloc_perm_bits(perm, pci_ext_cap_length[PCI_EXT_CAP_ID_ERR])) | |
720 | return -ENOMEM; | |
721 | ||
722 | /* | |
723 | * Virtualize the first dword of all express capabilities | |
724 | * because it includes the next pointer. This lets us later | |
725 | * remove capabilities from the chain if we need to. | |
726 | */ | |
727 | p_setd(perm, 0, ALL_VIRT, NO_WRITE); | |
728 | ||
729 | /* Writable bits mask */ | |
846fc709 | 730 | mask = PCI_ERR_UNC_UND | /* Undefined */ |
89e1f7d4 AW |
731 | PCI_ERR_UNC_DLP | /* Data Link Protocol */ |
732 | PCI_ERR_UNC_SURPDN | /* Surprise Down */ | |
733 | PCI_ERR_UNC_POISON_TLP | /* Poisoned TLP */ | |
734 | PCI_ERR_UNC_FCP | /* Flow Control Protocol */ | |
735 | PCI_ERR_UNC_COMP_TIME | /* Completion Timeout */ | |
736 | PCI_ERR_UNC_COMP_ABORT | /* Completer Abort */ | |
737 | PCI_ERR_UNC_UNX_COMP | /* Unexpected Completion */ | |
738 | PCI_ERR_UNC_RX_OVER | /* Receiver Overflow */ | |
739 | PCI_ERR_UNC_MALF_TLP | /* Malformed TLP */ | |
740 | PCI_ERR_UNC_ECRC | /* ECRC Error Status */ | |
741 | PCI_ERR_UNC_UNSUP | /* Unsupported Request */ | |
742 | PCI_ERR_UNC_ACSV | /* ACS Violation */ | |
743 | PCI_ERR_UNC_INTN | /* internal error */ | |
744 | PCI_ERR_UNC_MCBTLP | /* MC blocked TLP */ | |
745 | PCI_ERR_UNC_ATOMEG | /* Atomic egress blocked */ | |
746 | PCI_ERR_UNC_TLPPRE; /* TLP prefix blocked */ | |
747 | p_setd(perm, PCI_ERR_UNCOR_STATUS, NO_VIRT, mask); | |
748 | p_setd(perm, PCI_ERR_UNCOR_MASK, NO_VIRT, mask); | |
749 | p_setd(perm, PCI_ERR_UNCOR_SEVER, NO_VIRT, mask); | |
750 | ||
751 | mask = PCI_ERR_COR_RCVR | /* Receiver Error Status */ | |
752 | PCI_ERR_COR_BAD_TLP | /* Bad TLP Status */ | |
753 | PCI_ERR_COR_BAD_DLLP | /* Bad DLLP Status */ | |
754 | PCI_ERR_COR_REP_ROLL | /* REPLAY_NUM Rollover */ | |
755 | PCI_ERR_COR_REP_TIMER | /* Replay Timer Timeout */ | |
756 | PCI_ERR_COR_ADV_NFAT | /* Advisory Non-Fatal */ | |
757 | PCI_ERR_COR_INTERNAL | /* Corrected Internal */ | |
758 | PCI_ERR_COR_LOG_OVER; /* Header Log Overflow */ | |
759 | p_setd(perm, PCI_ERR_COR_STATUS, NO_VIRT, mask); | |
760 | p_setd(perm, PCI_ERR_COR_MASK, NO_VIRT, mask); | |
761 | ||
762 | mask = PCI_ERR_CAP_ECRC_GENE | /* ECRC Generation Enable */ | |
763 | PCI_ERR_CAP_ECRC_CHKE; /* ECRC Check Enable */ | |
764 | p_setd(perm, PCI_ERR_CAP, NO_VIRT, mask); | |
765 | return 0; | |
766 | } | |
767 | ||
768 | /* Permissions for Power Budgeting extended capability */ | |
769 | static int __init init_pci_ext_cap_pwr_perm(struct perm_bits *perm) | |
770 | { | |
771 | if (alloc_perm_bits(perm, pci_ext_cap_length[PCI_EXT_CAP_ID_PWR])) | |
772 | return -ENOMEM; | |
773 | ||
774 | p_setd(perm, 0, ALL_VIRT, NO_WRITE); | |
775 | ||
776 | /* Writing the data selector is OK, the info is still read-only */ | |
777 | p_setb(perm, PCI_PWR_DATA, NO_VIRT, (u8)ALL_WRITE); | |
778 | return 0; | |
779 | } | |
780 | ||
781 | /* | |
782 | * Initialize the shared permission tables | |
783 | */ | |
784 | void vfio_pci_uninit_perm_bits(void) | |
785 | { | |
786 | free_perm_bits(&cap_perms[PCI_CAP_ID_BASIC]); | |
787 | ||
788 | free_perm_bits(&cap_perms[PCI_CAP_ID_PM]); | |
789 | free_perm_bits(&cap_perms[PCI_CAP_ID_PCIX]); | |
790 | free_perm_bits(&cap_perms[PCI_CAP_ID_EXP]); | |
791 | free_perm_bits(&cap_perms[PCI_CAP_ID_AF]); | |
792 | ||
793 | free_perm_bits(&ecap_perms[PCI_EXT_CAP_ID_ERR]); | |
794 | free_perm_bits(&ecap_perms[PCI_EXT_CAP_ID_PWR]); | |
795 | } | |
796 | ||
797 | int __init vfio_pci_init_perm_bits(void) | |
798 | { | |
799 | int ret; | |
800 | ||
801 | /* Basic config space */ | |
802 | ret = init_pci_cap_basic_perm(&cap_perms[PCI_CAP_ID_BASIC]); | |
803 | ||
804 | /* Capabilities */ | |
805 | ret |= init_pci_cap_pm_perm(&cap_perms[PCI_CAP_ID_PM]); | |
a7d1ea1c | 806 | cap_perms[PCI_CAP_ID_VPD].writefn = vfio_raw_config_write; |
89e1f7d4 | 807 | ret |= init_pci_cap_pcix_perm(&cap_perms[PCI_CAP_ID_PCIX]); |
a7d1ea1c | 808 | cap_perms[PCI_CAP_ID_VNDR].writefn = vfio_raw_config_write; |
89e1f7d4 AW |
809 | ret |= init_pci_cap_exp_perm(&cap_perms[PCI_CAP_ID_EXP]); |
810 | ret |= init_pci_cap_af_perm(&cap_perms[PCI_CAP_ID_AF]); | |
811 | ||
812 | /* Extended capabilities */ | |
813 | ret |= init_pci_ext_cap_err_perm(&ecap_perms[PCI_EXT_CAP_ID_ERR]); | |
814 | ret |= init_pci_ext_cap_pwr_perm(&ecap_perms[PCI_EXT_CAP_ID_PWR]); | |
a7d1ea1c | 815 | ecap_perms[PCI_EXT_CAP_ID_VNDR].writefn = vfio_raw_config_write; |
89e1f7d4 AW |
816 | |
817 | if (ret) | |
818 | vfio_pci_uninit_perm_bits(); | |
819 | ||
820 | return ret; | |
821 | } | |
822 | ||
823 | static int vfio_find_cap_start(struct vfio_pci_device *vdev, int pos) | |
824 | { | |
825 | u8 cap; | |
826 | int base = (pos >= PCI_CFG_SPACE_SIZE) ? PCI_CFG_SPACE_SIZE : | |
827 | PCI_STD_HEADER_SIZEOF; | |
89e1f7d4 AW |
828 | cap = vdev->pci_config_map[pos]; |
829 | ||
830 | if (cap == PCI_CAP_ID_BASIC) | |
831 | return 0; | |
832 | ||
833 | /* XXX Can we have to abutting capabilities of the same type? */ | |
834 | while (pos - 1 >= base && vdev->pci_config_map[pos - 1] == cap) | |
835 | pos--; | |
836 | ||
180b1381 | 837 | return pos; |
89e1f7d4 AW |
838 | } |
839 | ||
840 | static int vfio_msi_config_read(struct vfio_pci_device *vdev, int pos, | |
841 | int count, struct perm_bits *perm, | |
842 | int offset, __le32 *val) | |
843 | { | |
844 | /* Update max available queue size from msi_qmax */ | |
845 | if (offset <= PCI_MSI_FLAGS && offset + count >= PCI_MSI_FLAGS) { | |
846 | __le16 *flags; | |
847 | int start; | |
848 | ||
849 | start = vfio_find_cap_start(vdev, pos); | |
850 | ||
851 | flags = (__le16 *)&vdev->vconfig[start]; | |
852 | ||
853 | *flags &= cpu_to_le16(~PCI_MSI_FLAGS_QMASK); | |
854 | *flags |= cpu_to_le16(vdev->msi_qmax << 1); | |
855 | } | |
856 | ||
857 | return vfio_default_config_read(vdev, pos, count, perm, offset, val); | |
858 | } | |
859 | ||
860 | static int vfio_msi_config_write(struct vfio_pci_device *vdev, int pos, | |
861 | int count, struct perm_bits *perm, | |
862 | int offset, __le32 val) | |
863 | { | |
864 | count = vfio_default_config_write(vdev, pos, count, perm, offset, val); | |
865 | if (count < 0) | |
866 | return count; | |
867 | ||
868 | /* Fixup and write configured queue size and enable to hardware */ | |
869 | if (offset <= PCI_MSI_FLAGS && offset + count >= PCI_MSI_FLAGS) { | |
870 | __le16 *pflags; | |
871 | u16 flags; | |
872 | int start, ret; | |
873 | ||
874 | start = vfio_find_cap_start(vdev, pos); | |
875 | ||
876 | pflags = (__le16 *)&vdev->vconfig[start + PCI_MSI_FLAGS]; | |
877 | ||
878 | flags = le16_to_cpu(*pflags); | |
879 | ||
880 | /* MSI is enabled via ioctl */ | |
881 | if (!is_msi(vdev)) | |
882 | flags &= ~PCI_MSI_FLAGS_ENABLE; | |
883 | ||
884 | /* Check queue size */ | |
885 | if ((flags & PCI_MSI_FLAGS_QSIZE) >> 4 > vdev->msi_qmax) { | |
886 | flags &= ~PCI_MSI_FLAGS_QSIZE; | |
887 | flags |= vdev->msi_qmax << 4; | |
888 | } | |
889 | ||
890 | /* Write back to virt and to hardware */ | |
891 | *pflags = cpu_to_le16(flags); | |
892 | ret = pci_user_write_config_word(vdev->pdev, | |
893 | start + PCI_MSI_FLAGS, | |
894 | flags); | |
895 | if (ret) | |
896 | return pcibios_err_to_errno(ret); | |
897 | } | |
898 | ||
899 | return count; | |
900 | } | |
901 | ||
902 | /* | |
903 | * MSI determination is per-device, so this routine gets used beyond | |
904 | * initialization time. Don't add __init | |
905 | */ | |
906 | static int init_pci_cap_msi_perm(struct perm_bits *perm, int len, u16 flags) | |
907 | { | |
908 | if (alloc_perm_bits(perm, len)) | |
909 | return -ENOMEM; | |
910 | ||
911 | perm->readfn = vfio_msi_config_read; | |
912 | perm->writefn = vfio_msi_config_write; | |
913 | ||
914 | p_setb(perm, PCI_CAP_LIST_NEXT, (u8)ALL_VIRT, NO_WRITE); | |
915 | ||
916 | /* | |
917 | * The upper byte of the control register is reserved, | |
918 | * just setup the lower byte. | |
919 | */ | |
920 | p_setb(perm, PCI_MSI_FLAGS, (u8)ALL_VIRT, (u8)ALL_WRITE); | |
921 | p_setd(perm, PCI_MSI_ADDRESS_LO, ALL_VIRT, ALL_WRITE); | |
922 | if (flags & PCI_MSI_FLAGS_64BIT) { | |
923 | p_setd(perm, PCI_MSI_ADDRESS_HI, ALL_VIRT, ALL_WRITE); | |
924 | p_setw(perm, PCI_MSI_DATA_64, (u16)ALL_VIRT, (u16)ALL_WRITE); | |
925 | if (flags & PCI_MSI_FLAGS_MASKBIT) { | |
926 | p_setd(perm, PCI_MSI_MASK_64, NO_VIRT, ALL_WRITE); | |
927 | p_setd(perm, PCI_MSI_PENDING_64, NO_VIRT, ALL_WRITE); | |
928 | } | |
929 | } else { | |
930 | p_setw(perm, PCI_MSI_DATA_32, (u16)ALL_VIRT, (u16)ALL_WRITE); | |
931 | if (flags & PCI_MSI_FLAGS_MASKBIT) { | |
932 | p_setd(perm, PCI_MSI_MASK_32, NO_VIRT, ALL_WRITE); | |
933 | p_setd(perm, PCI_MSI_PENDING_32, NO_VIRT, ALL_WRITE); | |
934 | } | |
935 | } | |
936 | return 0; | |
937 | } | |
938 | ||
939 | /* Determine MSI CAP field length; initialize msi_perms on 1st call per vdev */ | |
940 | static int vfio_msi_cap_len(struct vfio_pci_device *vdev, u8 pos) | |
941 | { | |
942 | struct pci_dev *pdev = vdev->pdev; | |
943 | int len, ret; | |
944 | u16 flags; | |
945 | ||
946 | ret = pci_read_config_word(pdev, pos + PCI_MSI_FLAGS, &flags); | |
947 | if (ret) | |
948 | return pcibios_err_to_errno(ret); | |
949 | ||
950 | len = 10; /* Minimum size */ | |
951 | if (flags & PCI_MSI_FLAGS_64BIT) | |
952 | len += 4; | |
953 | if (flags & PCI_MSI_FLAGS_MASKBIT) | |
954 | len += 10; | |
955 | ||
956 | if (vdev->msi_perm) | |
957 | return len; | |
958 | ||
959 | vdev->msi_perm = kmalloc(sizeof(struct perm_bits), GFP_KERNEL); | |
960 | if (!vdev->msi_perm) | |
961 | return -ENOMEM; | |
962 | ||
963 | ret = init_pci_cap_msi_perm(vdev->msi_perm, len, flags); | |
964 | if (ret) | |
965 | return ret; | |
966 | ||
967 | return len; | |
968 | } | |
969 | ||
970 | /* Determine extended capability length for VC (2 & 9) and MFVC */ | |
971 | static int vfio_vc_cap_len(struct vfio_pci_device *vdev, u16 pos) | |
972 | { | |
973 | struct pci_dev *pdev = vdev->pdev; | |
974 | u32 tmp; | |
975 | int ret, evcc, phases, vc_arb; | |
976 | int len = PCI_CAP_VC_BASE_SIZEOF; | |
977 | ||
274127a1 | 978 | ret = pci_read_config_dword(pdev, pos + PCI_VC_PORT_CAP1, &tmp); |
89e1f7d4 AW |
979 | if (ret) |
980 | return pcibios_err_to_errno(ret); | |
981 | ||
274127a1 AW |
982 | evcc = tmp & PCI_VC_CAP1_EVCC; /* extended vc count */ |
983 | ret = pci_read_config_dword(pdev, pos + PCI_VC_PORT_CAP2, &tmp); | |
89e1f7d4 AW |
984 | if (ret) |
985 | return pcibios_err_to_errno(ret); | |
986 | ||
274127a1 | 987 | if (tmp & PCI_VC_CAP2_128_PHASE) |
89e1f7d4 | 988 | phases = 128; |
274127a1 | 989 | else if (tmp & PCI_VC_CAP2_64_PHASE) |
89e1f7d4 | 990 | phases = 64; |
274127a1 | 991 | else if (tmp & PCI_VC_CAP2_32_PHASE) |
89e1f7d4 AW |
992 | phases = 32; |
993 | else | |
994 | phases = 0; | |
995 | ||
996 | vc_arb = phases * 4; | |
997 | ||
998 | /* | |
999 | * Port arbitration tables are root & switch only; | |
1000 | * function arbitration tables are function 0 only. | |
1001 | * In either case, we'll never let user write them so | |
1002 | * we don't care how big they are | |
1003 | */ | |
1004 | len += (1 + evcc) * PCI_CAP_VC_PER_VC_SIZEOF; | |
1005 | if (vc_arb) { | |
1006 | len = round_up(len, 16); | |
1007 | len += vc_arb / 8; | |
1008 | } | |
1009 | return len; | |
1010 | } | |
1011 | ||
1012 | static int vfio_cap_len(struct vfio_pci_device *vdev, u8 cap, u8 pos) | |
1013 | { | |
1014 | struct pci_dev *pdev = vdev->pdev; | |
17638db1 | 1015 | u32 dword; |
89e1f7d4 AW |
1016 | u16 word; |
1017 | u8 byte; | |
1018 | int ret; | |
1019 | ||
1020 | switch (cap) { | |
1021 | case PCI_CAP_ID_MSI: | |
1022 | return vfio_msi_cap_len(vdev, pos); | |
1023 | case PCI_CAP_ID_PCIX: | |
1024 | ret = pci_read_config_word(pdev, pos + PCI_X_CMD, &word); | |
1025 | if (ret) | |
1026 | return pcibios_err_to_errno(ret); | |
1027 | ||
1028 | if (PCI_X_CMD_VERSION(word)) { | |
17638db1 AW |
1029 | /* Test for extended capabilities */ |
1030 | pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword); | |
1031 | vdev->extended_caps = (dword != 0); | |
89e1f7d4 AW |
1032 | return PCI_CAP_PCIX_SIZEOF_V2; |
1033 | } else | |
1034 | return PCI_CAP_PCIX_SIZEOF_V0; | |
1035 | case PCI_CAP_ID_VNDR: | |
1036 | /* length follows next field */ | |
1037 | ret = pci_read_config_byte(pdev, pos + PCI_CAP_FLAGS, &byte); | |
1038 | if (ret) | |
1039 | return pcibios_err_to_errno(ret); | |
1040 | ||
1041 | return byte; | |
1042 | case PCI_CAP_ID_EXP: | |
17638db1 AW |
1043 | /* Test for extended capabilities */ |
1044 | pci_read_config_dword(pdev, PCI_CFG_SPACE_SIZE, &dword); | |
1045 | vdev->extended_caps = (dword != 0); | |
5641ade4 | 1046 | |
17638db1 | 1047 | /* length based on version */ |
aa2cba51 | 1048 | if ((pcie_caps_reg(pdev) & PCI_EXP_FLAGS_VERS) == 1) |
89e1f7d4 | 1049 | return PCI_CAP_EXP_ENDPOINT_SIZEOF_V1; |
5641ade4 | 1050 | else |
89e1f7d4 | 1051 | return PCI_CAP_EXP_ENDPOINT_SIZEOF_V2; |
89e1f7d4 AW |
1052 | case PCI_CAP_ID_HT: |
1053 | ret = pci_read_config_byte(pdev, pos + 3, &byte); | |
1054 | if (ret) | |
1055 | return pcibios_err_to_errno(ret); | |
1056 | ||
1057 | return (byte & HT_3BIT_CAP_MASK) ? | |
1058 | HT_CAP_SIZEOF_SHORT : HT_CAP_SIZEOF_LONG; | |
1059 | case PCI_CAP_ID_SATA: | |
1060 | ret = pci_read_config_byte(pdev, pos + PCI_SATA_REGS, &byte); | |
1061 | if (ret) | |
1062 | return pcibios_err_to_errno(ret); | |
1063 | ||
1064 | byte &= PCI_SATA_REGS_MASK; | |
1065 | if (byte == PCI_SATA_REGS_INLINE) | |
1066 | return PCI_SATA_SIZEOF_LONG; | |
1067 | else | |
1068 | return PCI_SATA_SIZEOF_SHORT; | |
1069 | default: | |
1070 | pr_warn("%s: %s unknown length for pci cap 0x%x@0x%x\n", | |
1071 | dev_name(&pdev->dev), __func__, cap, pos); | |
1072 | } | |
1073 | ||
1074 | return 0; | |
1075 | } | |
1076 | ||
1077 | static int vfio_ext_cap_len(struct vfio_pci_device *vdev, u16 ecap, u16 epos) | |
1078 | { | |
1079 | struct pci_dev *pdev = vdev->pdev; | |
1080 | u8 byte; | |
1081 | u32 dword; | |
1082 | int ret; | |
1083 | ||
1084 | switch (ecap) { | |
1085 | case PCI_EXT_CAP_ID_VNDR: | |
1086 | ret = pci_read_config_dword(pdev, epos + PCI_VSEC_HDR, &dword); | |
1087 | if (ret) | |
1088 | return pcibios_err_to_errno(ret); | |
1089 | ||
1090 | return dword >> PCI_VSEC_HDR_LEN_SHIFT; | |
1091 | case PCI_EXT_CAP_ID_VC: | |
1092 | case PCI_EXT_CAP_ID_VC9: | |
1093 | case PCI_EXT_CAP_ID_MFVC: | |
1094 | return vfio_vc_cap_len(vdev, epos); | |
1095 | case PCI_EXT_CAP_ID_ACS: | |
1096 | ret = pci_read_config_byte(pdev, epos + PCI_ACS_CAP, &byte); | |
1097 | if (ret) | |
1098 | return pcibios_err_to_errno(ret); | |
1099 | ||
1100 | if (byte & PCI_ACS_EC) { | |
1101 | int bits; | |
1102 | ||
1103 | ret = pci_read_config_byte(pdev, | |
1104 | epos + PCI_ACS_EGRESS_BITS, | |
1105 | &byte); | |
1106 | if (ret) | |
1107 | return pcibios_err_to_errno(ret); | |
1108 | ||
1109 | bits = byte ? round_up(byte, 32) : 256; | |
1110 | return 8 + (bits / 8); | |
1111 | } | |
1112 | return 8; | |
1113 | ||
1114 | case PCI_EXT_CAP_ID_REBAR: | |
1115 | ret = pci_read_config_byte(pdev, epos + PCI_REBAR_CTRL, &byte); | |
1116 | if (ret) | |
1117 | return pcibios_err_to_errno(ret); | |
1118 | ||
1119 | byte &= PCI_REBAR_CTRL_NBAR_MASK; | |
1120 | byte >>= PCI_REBAR_CTRL_NBAR_SHIFT; | |
1121 | ||
1122 | return 4 + (byte * 8); | |
1123 | case PCI_EXT_CAP_ID_DPA: | |
1124 | ret = pci_read_config_byte(pdev, epos + PCI_DPA_CAP, &byte); | |
1125 | if (ret) | |
1126 | return pcibios_err_to_errno(ret); | |
1127 | ||
1128 | byte &= PCI_DPA_CAP_SUBSTATE_MASK; | |
afa63252 | 1129 | return PCI_DPA_BASE_SIZEOF + byte + 1; |
89e1f7d4 AW |
1130 | case PCI_EXT_CAP_ID_TPH: |
1131 | ret = pci_read_config_dword(pdev, epos + PCI_TPH_CAP, &dword); | |
1132 | if (ret) | |
1133 | return pcibios_err_to_errno(ret); | |
1134 | ||
1135 | if ((dword & PCI_TPH_CAP_LOC_MASK) == PCI_TPH_LOC_CAP) { | |
1136 | int sts; | |
1137 | ||
afa63252 | 1138 | sts = dword & PCI_TPH_CAP_ST_MASK; |
89e1f7d4 | 1139 | sts >>= PCI_TPH_CAP_ST_SHIFT; |
afa63252 | 1140 | return PCI_TPH_BASE_SIZEOF + (sts * 2) + 2; |
89e1f7d4 AW |
1141 | } |
1142 | return PCI_TPH_BASE_SIZEOF; | |
1143 | default: | |
1144 | pr_warn("%s: %s unknown length for pci ecap 0x%x@0x%x\n", | |
1145 | dev_name(&pdev->dev), __func__, ecap, epos); | |
1146 | } | |
1147 | ||
1148 | return 0; | |
1149 | } | |
1150 | ||
1151 | static int vfio_fill_vconfig_bytes(struct vfio_pci_device *vdev, | |
1152 | int offset, int size) | |
1153 | { | |
1154 | struct pci_dev *pdev = vdev->pdev; | |
1155 | int ret = 0; | |
1156 | ||
1157 | /* | |
1158 | * We try to read physical config space in the largest chunks | |
1159 | * we can, assuming that all of the fields support dword access. | |
1160 | * pci_save_state() makes this same assumption and seems to do ok. | |
1161 | */ | |
1162 | while (size) { | |
1163 | int filled; | |
1164 | ||
1165 | if (size >= 4 && !(offset % 4)) { | |
1166 | __le32 *dwordp = (__le32 *)&vdev->vconfig[offset]; | |
1167 | u32 dword; | |
1168 | ||
1169 | ret = pci_read_config_dword(pdev, offset, &dword); | |
1170 | if (ret) | |
1171 | return ret; | |
1172 | *dwordp = cpu_to_le32(dword); | |
1173 | filled = 4; | |
1174 | } else if (size >= 2 && !(offset % 2)) { | |
1175 | __le16 *wordp = (__le16 *)&vdev->vconfig[offset]; | |
1176 | u16 word; | |
1177 | ||
1178 | ret = pci_read_config_word(pdev, offset, &word); | |
1179 | if (ret) | |
1180 | return ret; | |
1181 | *wordp = cpu_to_le16(word); | |
1182 | filled = 2; | |
1183 | } else { | |
1184 | u8 *byte = &vdev->vconfig[offset]; | |
1185 | ret = pci_read_config_byte(pdev, offset, byte); | |
1186 | if (ret) | |
1187 | return ret; | |
1188 | filled = 1; | |
1189 | } | |
1190 | ||
1191 | offset += filled; | |
1192 | size -= filled; | |
1193 | } | |
1194 | ||
1195 | return ret; | |
1196 | } | |
1197 | ||
1198 | static int vfio_cap_init(struct vfio_pci_device *vdev) | |
1199 | { | |
1200 | struct pci_dev *pdev = vdev->pdev; | |
1201 | u8 *map = vdev->pci_config_map; | |
1202 | u16 status; | |
1203 | u8 pos, *prev, cap; | |
1204 | int loops, ret, caps = 0; | |
1205 | ||
1206 | /* Any capabilities? */ | |
1207 | ret = pci_read_config_word(pdev, PCI_STATUS, &status); | |
1208 | if (ret) | |
1209 | return ret; | |
1210 | ||
1211 | if (!(status & PCI_STATUS_CAP_LIST)) | |
1212 | return 0; /* Done */ | |
1213 | ||
1214 | ret = pci_read_config_byte(pdev, PCI_CAPABILITY_LIST, &pos); | |
1215 | if (ret) | |
1216 | return ret; | |
1217 | ||
1218 | /* Mark the previous position in case we want to skip a capability */ | |
1219 | prev = &vdev->vconfig[PCI_CAPABILITY_LIST]; | |
1220 | ||
1221 | /* We can bound our loop, capabilities are dword aligned */ | |
1222 | loops = (PCI_CFG_SPACE_SIZE - PCI_STD_HEADER_SIZEOF) / PCI_CAP_SIZEOF; | |
1223 | while (pos && loops--) { | |
1224 | u8 next; | |
1225 | int i, len = 0; | |
1226 | ||
1227 | ret = pci_read_config_byte(pdev, pos, &cap); | |
1228 | if (ret) | |
1229 | return ret; | |
1230 | ||
1231 | ret = pci_read_config_byte(pdev, | |
1232 | pos + PCI_CAP_LIST_NEXT, &next); | |
1233 | if (ret) | |
1234 | return ret; | |
1235 | ||
1236 | if (cap <= PCI_CAP_ID_MAX) { | |
1237 | len = pci_cap_length[cap]; | |
1238 | if (len == 0xFF) { /* Variable length */ | |
1239 | len = vfio_cap_len(vdev, cap, pos); | |
1240 | if (len < 0) | |
1241 | return len; | |
1242 | } | |
1243 | } | |
1244 | ||
1245 | if (!len) { | |
1246 | pr_info("%s: %s hiding cap 0x%x\n", | |
1247 | __func__, dev_name(&pdev->dev), cap); | |
1248 | *prev = next; | |
1249 | pos = next; | |
1250 | continue; | |
1251 | } | |
1252 | ||
1253 | /* Sanity check, do we overlap other capabilities? */ | |
180b1381 AW |
1254 | for (i = 0; i < len; i++) { |
1255 | if (likely(map[pos + i] == PCI_CAP_ID_INVALID)) | |
89e1f7d4 AW |
1256 | continue; |
1257 | ||
1258 | pr_warn("%s: %s pci config conflict @0x%x, was cap 0x%x now cap 0x%x\n", | |
1259 | __func__, dev_name(&pdev->dev), | |
1260 | pos + i, map[pos + i], cap); | |
1261 | } | |
1262 | ||
180b1381 | 1263 | memset(map + pos, cap, len); |
89e1f7d4 AW |
1264 | ret = vfio_fill_vconfig_bytes(vdev, pos, len); |
1265 | if (ret) | |
1266 | return ret; | |
1267 | ||
1268 | prev = &vdev->vconfig[pos + PCI_CAP_LIST_NEXT]; | |
1269 | pos = next; | |
1270 | caps++; | |
1271 | } | |
1272 | ||
1273 | /* If we didn't fill any capabilities, clear the status flag */ | |
1274 | if (!caps) { | |
1275 | __le16 *vstatus = (__le16 *)&vdev->vconfig[PCI_STATUS]; | |
1276 | *vstatus &= ~cpu_to_le16(PCI_STATUS_CAP_LIST); | |
1277 | } | |
1278 | ||
1279 | return 0; | |
1280 | } | |
1281 | ||
1282 | static int vfio_ecap_init(struct vfio_pci_device *vdev) | |
1283 | { | |
1284 | struct pci_dev *pdev = vdev->pdev; | |
1285 | u8 *map = vdev->pci_config_map; | |
1286 | u16 epos; | |
1287 | __le32 *prev = NULL; | |
1288 | int loops, ret, ecaps = 0; | |
1289 | ||
1290 | if (!vdev->extended_caps) | |
1291 | return 0; | |
1292 | ||
1293 | epos = PCI_CFG_SPACE_SIZE; | |
1294 | ||
1295 | loops = (pdev->cfg_size - PCI_CFG_SPACE_SIZE) / PCI_CAP_SIZEOF; | |
1296 | ||
1297 | while (loops-- && epos >= PCI_CFG_SPACE_SIZE) { | |
1298 | u32 header; | |
1299 | u16 ecap; | |
1300 | int i, len = 0; | |
1301 | bool hidden = false; | |
1302 | ||
1303 | ret = pci_read_config_dword(pdev, epos, &header); | |
1304 | if (ret) | |
1305 | return ret; | |
1306 | ||
1307 | ecap = PCI_EXT_CAP_ID(header); | |
1308 | ||
1309 | if (ecap <= PCI_EXT_CAP_ID_MAX) { | |
1310 | len = pci_ext_cap_length[ecap]; | |
1311 | if (len == 0xFF) { | |
1312 | len = vfio_ext_cap_len(vdev, ecap, epos); | |
1313 | if (len < 0) | |
1314 | return ret; | |
1315 | } | |
1316 | } | |
1317 | ||
1318 | if (!len) { | |
1319 | pr_info("%s: %s hiding ecap 0x%x@0x%x\n", | |
1320 | __func__, dev_name(&pdev->dev), ecap, epos); | |
1321 | ||
1322 | /* If not the first in the chain, we can skip over it */ | |
1323 | if (prev) { | |
1324 | u32 val = epos = PCI_EXT_CAP_NEXT(header); | |
1325 | *prev &= cpu_to_le32(~(0xffcU << 20)); | |
1326 | *prev |= cpu_to_le32(val << 20); | |
1327 | continue; | |
1328 | } | |
1329 | ||
1330 | /* | |
1331 | * Otherwise, fill in a placeholder, the direct | |
1332 | * readfn will virtualize this automatically | |
1333 | */ | |
1334 | len = PCI_CAP_SIZEOF; | |
1335 | hidden = true; | |
1336 | } | |
1337 | ||
180b1381 AW |
1338 | for (i = 0; i < len; i++) { |
1339 | if (likely(map[epos + i] == PCI_CAP_ID_INVALID)) | |
89e1f7d4 AW |
1340 | continue; |
1341 | ||
1342 | pr_warn("%s: %s pci config conflict @0x%x, was ecap 0x%x now ecap 0x%x\n", | |
1343 | __func__, dev_name(&pdev->dev), | |
1344 | epos + i, map[epos + i], ecap); | |
1345 | } | |
1346 | ||
1347 | /* | |
1348 | * Even though ecap is 2 bytes, we're currently a long way | |
1349 | * from exceeding 1 byte capabilities. If we ever make it | |
1350 | * up to 0xFF we'll need to up this to a two-byte, byte map. | |
1351 | */ | |
1352 | BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID); | |
1353 | ||
180b1381 | 1354 | memset(map + epos, ecap, len); |
89e1f7d4 AW |
1355 | ret = vfio_fill_vconfig_bytes(vdev, epos, len); |
1356 | if (ret) | |
1357 | return ret; | |
1358 | ||
1359 | /* | |
1360 | * If we're just using this capability to anchor the list, | |
1361 | * hide the real ID. Only count real ecaps. XXX PCI spec | |
1362 | * indicates to use cap id = 0, version = 0, next = 0 if | |
1363 | * ecaps are absent, hope users check all the way to next. | |
1364 | */ | |
1365 | if (hidden) | |
1366 | *(__le32 *)&vdev->vconfig[epos] &= | |
1367 | cpu_to_le32((0xffcU << 20)); | |
1368 | else | |
1369 | ecaps++; | |
1370 | ||
1371 | prev = (__le32 *)&vdev->vconfig[epos]; | |
1372 | epos = PCI_EXT_CAP_NEXT(header); | |
1373 | } | |
1374 | ||
1375 | if (!ecaps) | |
1376 | *(u32 *)&vdev->vconfig[PCI_CFG_SPACE_SIZE] = 0; | |
1377 | ||
1378 | return 0; | |
1379 | } | |
1380 | ||
1381 | /* | |
1382 | * For each device we allocate a pci_config_map that indicates the | |
1383 | * capability occupying each dword and thus the struct perm_bits we | |
1384 | * use for read and write. We also allocate a virtualized config | |
1385 | * space which tracks reads and writes to bits that we emulate for | |
1386 | * the user. Initial values filled from device. | |
1387 | * | |
1388 | * Using shared stuct perm_bits between all vfio-pci devices saves | |
1389 | * us from allocating cfg_size buffers for virt and write for every | |
1390 | * device. We could remove vconfig and allocate individual buffers | |
1391 | * for each area requring emulated bits, but the array of pointers | |
1392 | * would be comparable in size (at least for standard config space). | |
1393 | */ | |
1394 | int vfio_config_init(struct vfio_pci_device *vdev) | |
1395 | { | |
1396 | struct pci_dev *pdev = vdev->pdev; | |
1397 | u8 *map, *vconfig; | |
1398 | int ret; | |
1399 | ||
1400 | /* | |
180b1381 AW |
1401 | * Config space, caps and ecaps are all dword aligned, so we could |
1402 | * use one byte per dword to record the type. However, there are | |
1403 | * no requiremenst on the length of a capability, so the gap between | |
1404 | * capabilities needs byte granularity. | |
89e1f7d4 | 1405 | */ |
180b1381 | 1406 | map = kmalloc(pdev->cfg_size, GFP_KERNEL); |
89e1f7d4 AW |
1407 | if (!map) |
1408 | return -ENOMEM; | |
1409 | ||
1410 | vconfig = kmalloc(pdev->cfg_size, GFP_KERNEL); | |
1411 | if (!vconfig) { | |
1412 | kfree(map); | |
1413 | return -ENOMEM; | |
1414 | } | |
1415 | ||
1416 | vdev->pci_config_map = map; | |
1417 | vdev->vconfig = vconfig; | |
1418 | ||
180b1381 AW |
1419 | memset(map, PCI_CAP_ID_BASIC, PCI_STD_HEADER_SIZEOF); |
1420 | memset(map + PCI_STD_HEADER_SIZEOF, PCI_CAP_ID_INVALID, | |
1421 | pdev->cfg_size - PCI_STD_HEADER_SIZEOF); | |
89e1f7d4 AW |
1422 | |
1423 | ret = vfio_fill_vconfig_bytes(vdev, 0, PCI_STD_HEADER_SIZEOF); | |
1424 | if (ret) | |
1425 | goto out; | |
1426 | ||
1427 | vdev->bardirty = true; | |
1428 | ||
1429 | /* | |
1430 | * XXX can we just pci_load_saved_state/pci_restore_state? | |
1431 | * may need to rebuild vconfig after that | |
1432 | */ | |
1433 | ||
1434 | /* For restore after reset */ | |
1435 | vdev->rbar[0] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_0]); | |
1436 | vdev->rbar[1] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_1]); | |
1437 | vdev->rbar[2] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_2]); | |
1438 | vdev->rbar[3] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_3]); | |
1439 | vdev->rbar[4] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_4]); | |
1440 | vdev->rbar[5] = le32_to_cpu(*(__le32 *)&vconfig[PCI_BASE_ADDRESS_5]); | |
1441 | vdev->rbar[6] = le32_to_cpu(*(__le32 *)&vconfig[PCI_ROM_ADDRESS]); | |
1442 | ||
1443 | if (pdev->is_virtfn) { | |
1444 | *(__le16 *)&vconfig[PCI_VENDOR_ID] = cpu_to_le16(pdev->vendor); | |
1445 | *(__le16 *)&vconfig[PCI_DEVICE_ID] = cpu_to_le16(pdev->device); | |
1446 | } | |
1447 | ||
1448 | ret = vfio_cap_init(vdev); | |
1449 | if (ret) | |
1450 | goto out; | |
1451 | ||
1452 | ret = vfio_ecap_init(vdev); | |
1453 | if (ret) | |
1454 | goto out; | |
1455 | ||
1456 | return 0; | |
1457 | ||
1458 | out: | |
1459 | kfree(map); | |
1460 | vdev->pci_config_map = NULL; | |
1461 | kfree(vconfig); | |
1462 | vdev->vconfig = NULL; | |
1463 | return pcibios_err_to_errno(ret); | |
1464 | } | |
1465 | ||
1466 | void vfio_config_free(struct vfio_pci_device *vdev) | |
1467 | { | |
1468 | kfree(vdev->vconfig); | |
1469 | vdev->vconfig = NULL; | |
1470 | kfree(vdev->pci_config_map); | |
1471 | vdev->pci_config_map = NULL; | |
1472 | kfree(vdev->msi_perm); | |
1473 | vdev->msi_perm = NULL; | |
1474 | } | |
1475 | ||
180b1381 AW |
1476 | /* |
1477 | * Find the remaining number of bytes in a dword that match the given | |
1478 | * position. Stop at either the end of the capability or the dword boundary. | |
1479 | */ | |
1480 | static size_t vfio_pci_cap_remaining_dword(struct vfio_pci_device *vdev, | |
1481 | loff_t pos) | |
1482 | { | |
1483 | u8 cap = vdev->pci_config_map[pos]; | |
1484 | size_t i; | |
1485 | ||
1486 | for (i = 1; (pos + i) % 4 && vdev->pci_config_map[pos + i] == cap; i++) | |
1487 | /* nop */; | |
1488 | ||
1489 | return i; | |
1490 | } | |
1491 | ||
89e1f7d4 AW |
1492 | static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, |
1493 | size_t count, loff_t *ppos, bool iswrite) | |
1494 | { | |
1495 | struct pci_dev *pdev = vdev->pdev; | |
1496 | struct perm_bits *perm; | |
1497 | __le32 val = 0; | |
1498 | int cap_start = 0, offset; | |
1499 | u8 cap_id; | |
180b1381 | 1500 | ssize_t ret; |
89e1f7d4 | 1501 | |
180b1381 AW |
1502 | if (*ppos < 0 || *ppos >= pdev->cfg_size || |
1503 | *ppos + count > pdev->cfg_size) | |
89e1f7d4 AW |
1504 | return -EFAULT; |
1505 | ||
1506 | /* | |
180b1381 AW |
1507 | * Chop accesses into aligned chunks containing no more than a |
1508 | * single capability. Caller increments to the next chunk. | |
89e1f7d4 | 1509 | */ |
180b1381 AW |
1510 | count = min(count, vfio_pci_cap_remaining_dword(vdev, *ppos)); |
1511 | if (count >= 4 && !(*ppos % 4)) | |
1512 | count = 4; | |
1513 | else if (count >= 2 && !(*ppos % 2)) | |
1514 | count = 2; | |
1515 | else | |
1516 | count = 1; | |
89e1f7d4 | 1517 | |
180b1381 | 1518 | ret = count; |
89e1f7d4 | 1519 | |
180b1381 | 1520 | cap_id = vdev->pci_config_map[*ppos]; |
89e1f7d4 | 1521 | |
89e1f7d4 | 1522 | if (cap_id == PCI_CAP_ID_INVALID) { |
a7d1ea1c AW |
1523 | perm = &unassigned_perms; |
1524 | cap_start = *ppos; | |
89e1f7d4 | 1525 | } else { |
a7d1ea1c AW |
1526 | if (*ppos >= PCI_CFG_SPACE_SIZE) { |
1527 | WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); | |
89e1f7d4 | 1528 | |
a7d1ea1c AW |
1529 | perm = &ecap_perms[cap_id]; |
1530 | cap_start = vfio_find_cap_start(vdev, *ppos); | |
1531 | } else { | |
1532 | WARN_ON(cap_id > PCI_CAP_ID_MAX); | |
89e1f7d4 | 1533 | |
a7d1ea1c | 1534 | perm = &cap_perms[cap_id]; |
89e1f7d4 | 1535 | |
a7d1ea1c AW |
1536 | if (cap_id == PCI_CAP_ID_MSI) |
1537 | perm = vdev->msi_perm; | |
89e1f7d4 | 1538 | |
a7d1ea1c AW |
1539 | if (cap_id > PCI_CAP_ID_BASIC) |
1540 | cap_start = vfio_find_cap_start(vdev, *ppos); | |
1541 | } | |
89e1f7d4 AW |
1542 | } |
1543 | ||
1544 | WARN_ON(!cap_start && cap_id != PCI_CAP_ID_BASIC); | |
1545 | WARN_ON(cap_start > *ppos); | |
1546 | ||
1547 | offset = *ppos - cap_start; | |
1548 | ||
1549 | if (iswrite) { | |
1550 | if (!perm->writefn) | |
1551 | return ret; | |
1552 | ||
1553 | if (copy_from_user(&val, buf, count)) | |
1554 | return -EFAULT; | |
1555 | ||
1556 | ret = perm->writefn(vdev, *ppos, count, perm, offset, val); | |
1557 | } else { | |
1558 | if (perm->readfn) { | |
1559 | ret = perm->readfn(vdev, *ppos, count, | |
1560 | perm, offset, &val); | |
1561 | if (ret < 0) | |
1562 | return ret; | |
1563 | } | |
1564 | ||
1565 | if (copy_to_user(buf, &val, count)) | |
1566 | return -EFAULT; | |
1567 | } | |
1568 | ||
1569 | return ret; | |
1570 | } | |
1571 | ||
906ee99d AW |
1572 | ssize_t vfio_pci_config_rw(struct vfio_pci_device *vdev, char __user *buf, |
1573 | size_t count, loff_t *ppos, bool iswrite) | |
89e1f7d4 AW |
1574 | { |
1575 | size_t done = 0; | |
1576 | int ret = 0; | |
1577 | loff_t pos = *ppos; | |
1578 | ||
1579 | pos &= VFIO_PCI_OFFSET_MASK; | |
1580 | ||
89e1f7d4 | 1581 | while (count) { |
180b1381 | 1582 | ret = vfio_config_do_rw(vdev, buf, count, &pos, iswrite); |
89e1f7d4 AW |
1583 | if (ret < 0) |
1584 | return ret; | |
1585 | ||
1586 | count -= ret; | |
1587 | done += ret; | |
1588 | buf += ret; | |
1589 | pos += ret; | |
1590 | } | |
1591 | ||
1592 | *ppos += done; | |
1593 | ||
1594 | return done; | |
1595 | } |