Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
1 /*
2 * Copyright © 2006-2014 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
18 */
19
20 #include <linux/init.h>
21 #include <linux/bitmap.h>
22 #include <linux/debugfs.h>
23 #include <linux/export.h>
24 #include <linux/slab.h>
25 #include <linux/irq.h>
26 #include <linux/interrupt.h>
27 #include <linux/spinlock.h>
28 #include <linux/pci.h>
29 #include <linux/dmar.h>
30 #include <linux/dma-mapping.h>
31 #include <linux/mempool.h>
32 #include <linux/memory.h>
33 #include <linux/timer.h>
34 #include <linux/iova.h>
35 #include <linux/iommu.h>
36 #include <linux/intel-iommu.h>
37 #include <linux/syscore_ops.h>
38 #include <linux/tboot.h>
39 #include <linux/dmi.h>
40 #include <linux/pci-ats.h>
41 #include <linux/memblock.h>
42 #include <linux/dma-contiguous.h>
43 #include <asm/irq_remapping.h>
44 #include <asm/cacheflush.h>
45 #include <asm/iommu.h>
46
47 #include "irq_remapping.h"
48
49 #define ROOT_SIZE VTD_PAGE_SIZE
50 #define CONTEXT_SIZE VTD_PAGE_SIZE
51
52 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
54 #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
55
56 #define IOAPIC_RANGE_START (0xfee00000)
57 #define IOAPIC_RANGE_END (0xfeefffff)
58 #define IOVA_START_ADDR (0x1000)
59
60 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
62 #define MAX_AGAW_WIDTH 64
63 #define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
64
65 #define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66 #define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68 /* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70 #define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72 #define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
73
74 /* IO virtual address start page frame number */
75 #define IOVA_START_PFN (1)
76
77 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
78 #define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
79 #define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
80
81 /* page table handling */
82 #define LEVEL_STRIDE (9)
83 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
84
85 /*
86 * This bitmap is used to advertise the page sizes our hardware support
87 * to the IOMMU core, which will then use this information to split
88 * physically contiguous memory regions it is mapping into page sizes
89 * that we support.
90 *
91 * Traditionally the IOMMU core just handed us the mappings directly,
92 * after making sure the size is an order of a 4KiB page and that the
93 * mapping has natural alignment.
94 *
95 * To retain this behavior, we currently advertise that we support
96 * all page sizes that are an order of 4KiB.
97 *
98 * If at some point we'd like to utilize the IOMMU core's new behavior,
99 * we could change this to advertise the real page sizes we support.
100 */
101 #define INTEL_IOMMU_PGSIZES (~0xFFFUL)
102
103 static inline int agaw_to_level(int agaw)
104 {
105 return agaw + 2;
106 }
107
108 static inline int agaw_to_width(int agaw)
109 {
110 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
111 }
112
113 static inline int width_to_agaw(int width)
114 {
115 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
116 }
117
118 static inline unsigned int level_to_offset_bits(int level)
119 {
120 return (level - 1) * LEVEL_STRIDE;
121 }
122
123 static inline int pfn_level_offset(unsigned long pfn, int level)
124 {
125 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126 }
127
128 static inline unsigned long level_mask(int level)
129 {
130 return -1UL << level_to_offset_bits(level);
131 }
132
133 static inline unsigned long level_size(int level)
134 {
135 return 1UL << level_to_offset_bits(level);
136 }
137
138 static inline unsigned long align_to_level(unsigned long pfn, int level)
139 {
140 return (pfn + level_size(level) - 1) & level_mask(level);
141 }
142
143 static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144 {
145 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
146 }
147
148 /* VT-d pages must always be _smaller_ than MM pages. Otherwise things
149 are never going to work. */
150 static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151 {
152 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153 }
154
155 static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156 {
157 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158 }
159 static inline unsigned long page_to_dma_pfn(struct page *pg)
160 {
161 return mm_to_dma_pfn(page_to_pfn(pg));
162 }
163 static inline unsigned long virt_to_dma_pfn(void *p)
164 {
165 return page_to_dma_pfn(virt_to_page(p));
166 }
167
168 /* global iommu list, set NULL for ignored DMAR units */
169 static struct intel_iommu **g_iommus;
170
171 static void __init check_tylersburg_isoch(void);
172 static int rwbf_quirk;
173
174 /*
175 * set to 1 to panic kernel if can't successfully enable VT-d
176 * (used when kernel is launched w/ TXT)
177 */
178 static int force_on = 0;
179
180 /*
181 * 0: Present
182 * 1-11: Reserved
183 * 12-63: Context Ptr (12 - (haw-1))
184 * 64-127: Reserved
185 */
186 struct root_entry {
187 u64 val;
188 u64 rsvd1;
189 };
190 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191 static inline bool root_present(struct root_entry *root)
192 {
193 return (root->val & 1);
194 }
195 static inline void set_root_present(struct root_entry *root)
196 {
197 root->val |= 1;
198 }
199 static inline void set_root_value(struct root_entry *root, unsigned long value)
200 {
201 root->val &= ~VTD_PAGE_MASK;
202 root->val |= value & VTD_PAGE_MASK;
203 }
204
205 static inline struct context_entry *
206 get_context_addr_from_root(struct root_entry *root)
207 {
208 return (struct context_entry *)
209 (root_present(root)?phys_to_virt(
210 root->val & VTD_PAGE_MASK) :
211 NULL);
212 }
213
214 /*
215 * low 64 bits:
216 * 0: present
217 * 1: fault processing disable
218 * 2-3: translation type
219 * 12-63: address space root
220 * high 64 bits:
221 * 0-2: address width
222 * 3-6: aval
223 * 8-23: domain id
224 */
225 struct context_entry {
226 u64 lo;
227 u64 hi;
228 };
229
230 static inline bool context_present(struct context_entry *context)
231 {
232 return (context->lo & 1);
233 }
234 static inline void context_set_present(struct context_entry *context)
235 {
236 context->lo |= 1;
237 }
238
239 static inline void context_set_fault_enable(struct context_entry *context)
240 {
241 context->lo &= (((u64)-1) << 2) | 1;
242 }
243
244 static inline void context_set_translation_type(struct context_entry *context,
245 unsigned long value)
246 {
247 context->lo &= (((u64)-1) << 4) | 3;
248 context->lo |= (value & 3) << 2;
249 }
250
251 static inline void context_set_address_root(struct context_entry *context,
252 unsigned long value)
253 {
254 context->lo &= ~VTD_PAGE_MASK;
255 context->lo |= value & VTD_PAGE_MASK;
256 }
257
258 static inline void context_set_address_width(struct context_entry *context,
259 unsigned long value)
260 {
261 context->hi |= value & 7;
262 }
263
264 static inline void context_set_domain_id(struct context_entry *context,
265 unsigned long value)
266 {
267 context->hi |= (value & ((1 << 16) - 1)) << 8;
268 }
269
270 static inline void context_clear_entry(struct context_entry *context)
271 {
272 context->lo = 0;
273 context->hi = 0;
274 }
275
276 /*
277 * 0: readable
278 * 1: writable
279 * 2-6: reserved
280 * 7: super page
281 * 8-10: available
282 * 11: snoop behavior
283 * 12-63: Host physcial address
284 */
285 struct dma_pte {
286 u64 val;
287 };
288
289 static inline void dma_clear_pte(struct dma_pte *pte)
290 {
291 pte->val = 0;
292 }
293
294 static inline u64 dma_pte_addr(struct dma_pte *pte)
295 {
296 #ifdef CONFIG_64BIT
297 return pte->val & VTD_PAGE_MASK;
298 #else
299 /* Must have a full atomic 64-bit read */
300 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
301 #endif
302 }
303
304 static inline bool dma_pte_present(struct dma_pte *pte)
305 {
306 return (pte->val & 3) != 0;
307 }
308
309 static inline bool dma_pte_superpage(struct dma_pte *pte)
310 {
311 return (pte->val & DMA_PTE_LARGE_PAGE);
312 }
313
314 static inline int first_pte_in_page(struct dma_pte *pte)
315 {
316 return !((unsigned long)pte & ~VTD_PAGE_MASK);
317 }
318
319 /*
320 * This domain is a statically identity mapping domain.
321 * 1. This domain creats a static 1:1 mapping to all usable memory.
322 * 2. It maps to each iommu if successful.
323 * 3. Each iommu mapps to this domain if successful.
324 */
325 static struct dmar_domain *si_domain;
326 static int hw_pass_through = 1;
327
328 /* domain represents a virtual machine, more than one devices
329 * across iommus may be owned in one domain, e.g. kvm guest.
330 */
331 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
332
333 /* si_domain contains mulitple devices */
334 #define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
335
336 struct dmar_domain {
337 int id; /* domain id */
338 int nid; /* node id */
339 DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
340 /* bitmap of iommus this domain uses*/
341
342 struct list_head devices; /* all devices' list */
343 struct iova_domain iovad; /* iova's that belong to this domain */
344
345 struct dma_pte *pgd; /* virtual address */
346 int gaw; /* max guest address width */
347
348 /* adjusted guest address width, 0 is level 2 30-bit */
349 int agaw;
350
351 int flags; /* flags to find out type of domain */
352
353 int iommu_coherency;/* indicate coherency of iommu access */
354 int iommu_snooping; /* indicate snooping control feature*/
355 int iommu_count; /* reference count of iommu */
356 int iommu_superpage;/* Level of superpages supported:
357 0 == 4KiB (no superpages), 1 == 2MiB,
358 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
359 spinlock_t iommu_lock; /* protect iommu set in domain */
360 u64 max_addr; /* maximum mapped address */
361
362 struct iommu_domain domain; /* generic domain data structure for
363 iommu core */
364 };
365
366 /* PCI domain-device relationship */
367 struct device_domain_info {
368 struct list_head link; /* link to domain siblings */
369 struct list_head global; /* link to global list */
370 u8 bus; /* PCI bus number */
371 u8 devfn; /* PCI devfn number */
372 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
373 struct intel_iommu *iommu; /* IOMMU used by this device */
374 struct dmar_domain *domain; /* pointer to domain */
375 };
376
377 struct dmar_rmrr_unit {
378 struct list_head list; /* list of rmrr units */
379 struct acpi_dmar_header *hdr; /* ACPI header */
380 u64 base_address; /* reserved base address*/
381 u64 end_address; /* reserved end address */
382 struct dmar_dev_scope *devices; /* target devices */
383 int devices_cnt; /* target device count */
384 };
385
386 struct dmar_atsr_unit {
387 struct list_head list; /* list of ATSR units */
388 struct acpi_dmar_header *hdr; /* ACPI header */
389 struct dmar_dev_scope *devices; /* target devices */
390 int devices_cnt; /* target device count */
391 u8 include_all:1; /* include all ports */
392 };
393
394 static LIST_HEAD(dmar_atsr_units);
395 static LIST_HEAD(dmar_rmrr_units);
396
397 #define for_each_rmrr_units(rmrr) \
398 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
399
400 static void flush_unmaps_timeout(unsigned long data);
401
402 static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
403
404 #define HIGH_WATER_MARK 250
405 struct deferred_flush_tables {
406 int next;
407 struct iova *iova[HIGH_WATER_MARK];
408 struct dmar_domain *domain[HIGH_WATER_MARK];
409 struct page *freelist[HIGH_WATER_MARK];
410 };
411
412 static struct deferred_flush_tables *deferred_flush;
413
414 /* bitmap for indexing intel_iommus */
415 static int g_num_of_iommus;
416
417 static DEFINE_SPINLOCK(async_umap_flush_lock);
418 static LIST_HEAD(unmaps_to_do);
419
420 static int timer_on;
421 static long list_size;
422
423 static void domain_exit(struct dmar_domain *domain);
424 static void domain_remove_dev_info(struct dmar_domain *domain);
425 static void domain_remove_one_dev_info(struct dmar_domain *domain,
426 struct device *dev);
427 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
428 struct device *dev);
429 static int domain_detach_iommu(struct dmar_domain *domain,
430 struct intel_iommu *iommu);
431
432 #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
433 int dmar_disabled = 0;
434 #else
435 int dmar_disabled = 1;
436 #endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
437
438 int intel_iommu_enabled = 0;
439 EXPORT_SYMBOL_GPL(intel_iommu_enabled);
440
441 static int dmar_map_gfx = 1;
442 static int dmar_forcedac;
443 static int intel_iommu_strict;
444 static int intel_iommu_superpage = 1;
445
446 int intel_iommu_gfx_mapped;
447 EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
448
449 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
450 static DEFINE_SPINLOCK(device_domain_lock);
451 static LIST_HEAD(device_domain_list);
452
453 static const struct iommu_ops intel_iommu_ops;
454
455 /* Convert generic 'struct iommu_domain to private struct dmar_domain */
456 static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
457 {
458 return container_of(dom, struct dmar_domain, domain);
459 }
460
461 static int __init intel_iommu_setup(char *str)
462 {
463 if (!str)
464 return -EINVAL;
465 while (*str) {
466 if (!strncmp(str, "on", 2)) {
467 dmar_disabled = 0;
468 printk(KERN_INFO "Intel-IOMMU: enabled\n");
469 } else if (!strncmp(str, "off", 3)) {
470 dmar_disabled = 1;
471 printk(KERN_INFO "Intel-IOMMU: disabled\n");
472 } else if (!strncmp(str, "igfx_off", 8)) {
473 dmar_map_gfx = 0;
474 printk(KERN_INFO
475 "Intel-IOMMU: disable GFX device mapping\n");
476 } else if (!strncmp(str, "forcedac", 8)) {
477 printk(KERN_INFO
478 "Intel-IOMMU: Forcing DAC for PCI devices\n");
479 dmar_forcedac = 1;
480 } else if (!strncmp(str, "strict", 6)) {
481 printk(KERN_INFO
482 "Intel-IOMMU: disable batched IOTLB flush\n");
483 intel_iommu_strict = 1;
484 } else if (!strncmp(str, "sp_off", 6)) {
485 printk(KERN_INFO
486 "Intel-IOMMU: disable supported super page\n");
487 intel_iommu_superpage = 0;
488 }
489
490 str += strcspn(str, ",");
491 while (*str == ',')
492 str++;
493 }
494 return 0;
495 }
496 __setup("intel_iommu=", intel_iommu_setup);
497
498 static struct kmem_cache *iommu_domain_cache;
499 static struct kmem_cache *iommu_devinfo_cache;
500
501 static inline void *alloc_pgtable_page(int node)
502 {
503 struct page *page;
504 void *vaddr = NULL;
505
506 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
507 if (page)
508 vaddr = page_address(page);
509 return vaddr;
510 }
511
512 static inline void free_pgtable_page(void *vaddr)
513 {
514 free_page((unsigned long)vaddr);
515 }
516
517 static inline void *alloc_domain_mem(void)
518 {
519 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
520 }
521
522 static void free_domain_mem(void *vaddr)
523 {
524 kmem_cache_free(iommu_domain_cache, vaddr);
525 }
526
527 static inline void * alloc_devinfo_mem(void)
528 {
529 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
530 }
531
532 static inline void free_devinfo_mem(void *vaddr)
533 {
534 kmem_cache_free(iommu_devinfo_cache, vaddr);
535 }
536
537 static inline int domain_type_is_vm(struct dmar_domain *domain)
538 {
539 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
540 }
541
542 static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
543 {
544 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
545 DOMAIN_FLAG_STATIC_IDENTITY);
546 }
547
548 static inline int domain_pfn_supported(struct dmar_domain *domain,
549 unsigned long pfn)
550 {
551 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
552
553 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
554 }
555
556 static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
557 {
558 unsigned long sagaw;
559 int agaw = -1;
560
561 sagaw = cap_sagaw(iommu->cap);
562 for (agaw = width_to_agaw(max_gaw);
563 agaw >= 0; agaw--) {
564 if (test_bit(agaw, &sagaw))
565 break;
566 }
567
568 return agaw;
569 }
570
571 /*
572 * Calculate max SAGAW for each iommu.
573 */
574 int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
575 {
576 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
577 }
578
579 /*
580 * calculate agaw for each iommu.
581 * "SAGAW" may be different across iommus, use a default agaw, and
582 * get a supported less agaw for iommus that don't support the default agaw.
583 */
584 int iommu_calculate_agaw(struct intel_iommu *iommu)
585 {
586 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
587 }
588
589 /* This functionin only returns single iommu in a domain */
590 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
591 {
592 int iommu_id;
593
594 /* si_domain and vm domain should not get here. */
595 BUG_ON(domain_type_is_vm_or_si(domain));
596 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
597 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
598 return NULL;
599
600 return g_iommus[iommu_id];
601 }
602
603 static void domain_update_iommu_coherency(struct dmar_domain *domain)
604 {
605 struct dmar_drhd_unit *drhd;
606 struct intel_iommu *iommu;
607 bool found = false;
608 int i;
609
610 domain->iommu_coherency = 1;
611
612 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
613 found = true;
614 if (!ecap_coherent(g_iommus[i]->ecap)) {
615 domain->iommu_coherency = 0;
616 break;
617 }
618 }
619 if (found)
620 return;
621
622 /* No hardware attached; use lowest common denominator */
623 rcu_read_lock();
624 for_each_active_iommu(iommu, drhd) {
625 if (!ecap_coherent(iommu->ecap)) {
626 domain->iommu_coherency = 0;
627 break;
628 }
629 }
630 rcu_read_unlock();
631 }
632
633 static int domain_update_iommu_snooping(struct intel_iommu *skip)
634 {
635 struct dmar_drhd_unit *drhd;
636 struct intel_iommu *iommu;
637 int ret = 1;
638
639 rcu_read_lock();
640 for_each_active_iommu(iommu, drhd) {
641 if (iommu != skip) {
642 if (!ecap_sc_support(iommu->ecap)) {
643 ret = 0;
644 break;
645 }
646 }
647 }
648 rcu_read_unlock();
649
650 return ret;
651 }
652
653 static int domain_update_iommu_superpage(struct intel_iommu *skip)
654 {
655 struct dmar_drhd_unit *drhd;
656 struct intel_iommu *iommu;
657 int mask = 0xf;
658
659 if (!intel_iommu_superpage) {
660 return 0;
661 }
662
663 /* set iommu_superpage to the smallest common denominator */
664 rcu_read_lock();
665 for_each_active_iommu(iommu, drhd) {
666 if (iommu != skip) {
667 mask &= cap_super_page_val(iommu->cap);
668 if (!mask)
669 break;
670 }
671 }
672 rcu_read_unlock();
673
674 return fls(mask);
675 }
676
677 /* Some capabilities may be different across iommus */
678 static void domain_update_iommu_cap(struct dmar_domain *domain)
679 {
680 domain_update_iommu_coherency(domain);
681 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
682 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
683 }
684
685 static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
686 {
687 struct dmar_drhd_unit *drhd = NULL;
688 struct intel_iommu *iommu;
689 struct device *tmp;
690 struct pci_dev *ptmp, *pdev = NULL;
691 u16 segment = 0;
692 int i;
693
694 if (dev_is_pci(dev)) {
695 pdev = to_pci_dev(dev);
696 segment = pci_domain_nr(pdev->bus);
697 } else if (has_acpi_companion(dev))
698 dev = &ACPI_COMPANION(dev)->dev;
699
700 rcu_read_lock();
701 for_each_active_iommu(iommu, drhd) {
702 if (pdev && segment != drhd->segment)
703 continue;
704
705 for_each_active_dev_scope(drhd->devices,
706 drhd->devices_cnt, i, tmp) {
707 if (tmp == dev) {
708 *bus = drhd->devices[i].bus;
709 *devfn = drhd->devices[i].devfn;
710 goto out;
711 }
712
713 if (!pdev || !dev_is_pci(tmp))
714 continue;
715
716 ptmp = to_pci_dev(tmp);
717 if (ptmp->subordinate &&
718 ptmp->subordinate->number <= pdev->bus->number &&
719 ptmp->subordinate->busn_res.end >= pdev->bus->number)
720 goto got_pdev;
721 }
722
723 if (pdev && drhd->include_all) {
724 got_pdev:
725 *bus = pdev->bus->number;
726 *devfn = pdev->devfn;
727 goto out;
728 }
729 }
730 iommu = NULL;
731 out:
732 rcu_read_unlock();
733
734 return iommu;
735 }
736
737 static void domain_flush_cache(struct dmar_domain *domain,
738 void *addr, int size)
739 {
740 if (!domain->iommu_coherency)
741 clflush_cache_range(addr, size);
742 }
743
744 /* Gets context entry for a given bus and devfn */
745 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
746 u8 bus, u8 devfn)
747 {
748 struct root_entry *root;
749 struct context_entry *context;
750 unsigned long phy_addr;
751 unsigned long flags;
752
753 spin_lock_irqsave(&iommu->lock, flags);
754 root = &iommu->root_entry[bus];
755 context = get_context_addr_from_root(root);
756 if (!context) {
757 context = (struct context_entry *)
758 alloc_pgtable_page(iommu->node);
759 if (!context) {
760 spin_unlock_irqrestore(&iommu->lock, flags);
761 return NULL;
762 }
763 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
764 phy_addr = virt_to_phys((void *)context);
765 set_root_value(root, phy_addr);
766 set_root_present(root);
767 __iommu_flush_cache(iommu, root, sizeof(*root));
768 }
769 spin_unlock_irqrestore(&iommu->lock, flags);
770 return &context[devfn];
771 }
772
773 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
774 {
775 struct root_entry *root;
776 struct context_entry *context;
777 int ret;
778 unsigned long flags;
779
780 spin_lock_irqsave(&iommu->lock, flags);
781 root = &iommu->root_entry[bus];
782 context = get_context_addr_from_root(root);
783 if (!context) {
784 ret = 0;
785 goto out;
786 }
787 ret = context_present(&context[devfn]);
788 out:
789 spin_unlock_irqrestore(&iommu->lock, flags);
790 return ret;
791 }
792
793 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
794 {
795 struct root_entry *root;
796 struct context_entry *context;
797 unsigned long flags;
798
799 spin_lock_irqsave(&iommu->lock, flags);
800 root = &iommu->root_entry[bus];
801 context = get_context_addr_from_root(root);
802 if (context) {
803 context_clear_entry(&context[devfn]);
804 __iommu_flush_cache(iommu, &context[devfn], \
805 sizeof(*context));
806 }
807 spin_unlock_irqrestore(&iommu->lock, flags);
808 }
809
810 static void free_context_table(struct intel_iommu *iommu)
811 {
812 struct root_entry *root;
813 int i;
814 unsigned long flags;
815 struct context_entry *context;
816
817 spin_lock_irqsave(&iommu->lock, flags);
818 if (!iommu->root_entry) {
819 goto out;
820 }
821 for (i = 0; i < ROOT_ENTRY_NR; i++) {
822 root = &iommu->root_entry[i];
823 context = get_context_addr_from_root(root);
824 if (context)
825 free_pgtable_page(context);
826 }
827 free_pgtable_page(iommu->root_entry);
828 iommu->root_entry = NULL;
829 out:
830 spin_unlock_irqrestore(&iommu->lock, flags);
831 }
832
833 static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
834 unsigned long pfn, int *target_level)
835 {
836 struct dma_pte *parent, *pte = NULL;
837 int level = agaw_to_level(domain->agaw);
838 int offset;
839
840 BUG_ON(!domain->pgd);
841
842 if (!domain_pfn_supported(domain, pfn))
843 /* Address beyond IOMMU's addressing capabilities. */
844 return NULL;
845
846 parent = domain->pgd;
847
848 while (1) {
849 void *tmp_page;
850
851 offset = pfn_level_offset(pfn, level);
852 pte = &parent[offset];
853 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
854 break;
855 if (level == *target_level)
856 break;
857
858 if (!dma_pte_present(pte)) {
859 uint64_t pteval;
860
861 tmp_page = alloc_pgtable_page(domain->nid);
862
863 if (!tmp_page)
864 return NULL;
865
866 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
867 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
868 if (cmpxchg64(&pte->val, 0ULL, pteval))
869 /* Someone else set it while we were thinking; use theirs. */
870 free_pgtable_page(tmp_page);
871 else
872 domain_flush_cache(domain, pte, sizeof(*pte));
873 }
874 if (level == 1)
875 break;
876
877 parent = phys_to_virt(dma_pte_addr(pte));
878 level--;
879 }
880
881 if (!*target_level)
882 *target_level = level;
883
884 return pte;
885 }
886
887
888 /* return address's pte at specific level */
889 static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
890 unsigned long pfn,
891 int level, int *large_page)
892 {
893 struct dma_pte *parent, *pte = NULL;
894 int total = agaw_to_level(domain->agaw);
895 int offset;
896
897 parent = domain->pgd;
898 while (level <= total) {
899 offset = pfn_level_offset(pfn, total);
900 pte = &parent[offset];
901 if (level == total)
902 return pte;
903
904 if (!dma_pte_present(pte)) {
905 *large_page = total;
906 break;
907 }
908
909 if (dma_pte_superpage(pte)) {
910 *large_page = total;
911 return pte;
912 }
913
914 parent = phys_to_virt(dma_pte_addr(pte));
915 total--;
916 }
917 return NULL;
918 }
919
920 /* clear last level pte, a tlb flush should be followed */
921 static void dma_pte_clear_range(struct dmar_domain *domain,
922 unsigned long start_pfn,
923 unsigned long last_pfn)
924 {
925 unsigned int large_page = 1;
926 struct dma_pte *first_pte, *pte;
927
928 BUG_ON(!domain_pfn_supported(domain, start_pfn));
929 BUG_ON(!domain_pfn_supported(domain, last_pfn));
930 BUG_ON(start_pfn > last_pfn);
931
932 /* we don't need lock here; nobody else touches the iova range */
933 do {
934 large_page = 1;
935 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
936 if (!pte) {
937 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
938 continue;
939 }
940 do {
941 dma_clear_pte(pte);
942 start_pfn += lvl_to_nr_pages(large_page);
943 pte++;
944 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
945
946 domain_flush_cache(domain, first_pte,
947 (void *)pte - (void *)first_pte);
948
949 } while (start_pfn && start_pfn <= last_pfn);
950 }
951
952 static void dma_pte_free_level(struct dmar_domain *domain, int level,
953 struct dma_pte *pte, unsigned long pfn,
954 unsigned long start_pfn, unsigned long last_pfn)
955 {
956 pfn = max(start_pfn, pfn);
957 pte = &pte[pfn_level_offset(pfn, level)];
958
959 do {
960 unsigned long level_pfn;
961 struct dma_pte *level_pte;
962
963 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
964 goto next;
965
966 level_pfn = pfn & level_mask(level - 1);
967 level_pte = phys_to_virt(dma_pte_addr(pte));
968
969 if (level > 2)
970 dma_pte_free_level(domain, level - 1, level_pte,
971 level_pfn, start_pfn, last_pfn);
972
973 /* If range covers entire pagetable, free it */
974 if (!(start_pfn > level_pfn ||
975 last_pfn < level_pfn + level_size(level) - 1)) {
976 dma_clear_pte(pte);
977 domain_flush_cache(domain, pte, sizeof(*pte));
978 free_pgtable_page(level_pte);
979 }
980 next:
981 pfn += level_size(level);
982 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
983 }
984
985 /* free page table pages. last level pte should already be cleared */
986 static void dma_pte_free_pagetable(struct dmar_domain *domain,
987 unsigned long start_pfn,
988 unsigned long last_pfn)
989 {
990 BUG_ON(!domain_pfn_supported(domain, start_pfn));
991 BUG_ON(!domain_pfn_supported(domain, last_pfn));
992 BUG_ON(start_pfn > last_pfn);
993
994 dma_pte_clear_range(domain, start_pfn, last_pfn);
995
996 /* We don't need lock here; nobody else touches the iova range */
997 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
998 domain->pgd, 0, start_pfn, last_pfn);
999
1000 /* free pgd */
1001 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1002 free_pgtable_page(domain->pgd);
1003 domain->pgd = NULL;
1004 }
1005 }
1006
1007 /* When a page at a given level is being unlinked from its parent, we don't
1008 need to *modify* it at all. All we need to do is make a list of all the
1009 pages which can be freed just as soon as we've flushed the IOTLB and we
1010 know the hardware page-walk will no longer touch them.
1011 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1012 be freed. */
1013 static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1014 int level, struct dma_pte *pte,
1015 struct page *freelist)
1016 {
1017 struct page *pg;
1018
1019 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1020 pg->freelist = freelist;
1021 freelist = pg;
1022
1023 if (level == 1)
1024 return freelist;
1025
1026 pte = page_address(pg);
1027 do {
1028 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1029 freelist = dma_pte_list_pagetables(domain, level - 1,
1030 pte, freelist);
1031 pte++;
1032 } while (!first_pte_in_page(pte));
1033
1034 return freelist;
1035 }
1036
1037 static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1038 struct dma_pte *pte, unsigned long pfn,
1039 unsigned long start_pfn,
1040 unsigned long last_pfn,
1041 struct page *freelist)
1042 {
1043 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1044
1045 pfn = max(start_pfn, pfn);
1046 pte = &pte[pfn_level_offset(pfn, level)];
1047
1048 do {
1049 unsigned long level_pfn;
1050
1051 if (!dma_pte_present(pte))
1052 goto next;
1053
1054 level_pfn = pfn & level_mask(level);
1055
1056 /* If range covers entire pagetable, free it */
1057 if (start_pfn <= level_pfn &&
1058 last_pfn >= level_pfn + level_size(level) - 1) {
1059 /* These suborbinate page tables are going away entirely. Don't
1060 bother to clear them; we're just going to *free* them. */
1061 if (level > 1 && !dma_pte_superpage(pte))
1062 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1063
1064 dma_clear_pte(pte);
1065 if (!first_pte)
1066 first_pte = pte;
1067 last_pte = pte;
1068 } else if (level > 1) {
1069 /* Recurse down into a level that isn't *entirely* obsolete */
1070 freelist = dma_pte_clear_level(domain, level - 1,
1071 phys_to_virt(dma_pte_addr(pte)),
1072 level_pfn, start_pfn, last_pfn,
1073 freelist);
1074 }
1075 next:
1076 pfn += level_size(level);
1077 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1078
1079 if (first_pte)
1080 domain_flush_cache(domain, first_pte,
1081 (void *)++last_pte - (void *)first_pte);
1082
1083 return freelist;
1084 }
1085
1086 /* We can't just free the pages because the IOMMU may still be walking
1087 the page tables, and may have cached the intermediate levels. The
1088 pages can only be freed after the IOTLB flush has been done. */
1089 struct page *domain_unmap(struct dmar_domain *domain,
1090 unsigned long start_pfn,
1091 unsigned long last_pfn)
1092 {
1093 struct page *freelist = NULL;
1094
1095 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1096 BUG_ON(!domain_pfn_supported(domain, last_pfn));
1097 BUG_ON(start_pfn > last_pfn);
1098
1099 /* we don't need lock here; nobody else touches the iova range */
1100 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1101 domain->pgd, 0, start_pfn, last_pfn, NULL);
1102
1103 /* free pgd */
1104 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1105 struct page *pgd_page = virt_to_page(domain->pgd);
1106 pgd_page->freelist = freelist;
1107 freelist = pgd_page;
1108
1109 domain->pgd = NULL;
1110 }
1111
1112 return freelist;
1113 }
1114
1115 void dma_free_pagelist(struct page *freelist)
1116 {
1117 struct page *pg;
1118
1119 while ((pg = freelist)) {
1120 freelist = pg->freelist;
1121 free_pgtable_page(page_address(pg));
1122 }
1123 }
1124
1125 /* iommu handling */
1126 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1127 {
1128 struct root_entry *root;
1129 unsigned long flags;
1130
1131 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
1132 if (!root) {
1133 pr_err("IOMMU: allocating root entry for %s failed\n",
1134 iommu->name);
1135 return -ENOMEM;
1136 }
1137
1138 __iommu_flush_cache(iommu, root, ROOT_SIZE);
1139
1140 spin_lock_irqsave(&iommu->lock, flags);
1141 iommu->root_entry = root;
1142 spin_unlock_irqrestore(&iommu->lock, flags);
1143
1144 return 0;
1145 }
1146
1147 static void iommu_set_root_entry(struct intel_iommu *iommu)
1148 {
1149 void *addr;
1150 u32 sts;
1151 unsigned long flag;
1152
1153 addr = iommu->root_entry;
1154
1155 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1156 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1157
1158 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
1159
1160 /* Make sure hardware complete it */
1161 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1162 readl, (sts & DMA_GSTS_RTPS), sts);
1163
1164 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1165 }
1166
1167 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1168 {
1169 u32 val;
1170 unsigned long flag;
1171
1172 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
1173 return;
1174
1175 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1176 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
1177
1178 /* Make sure hardware complete it */
1179 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1180 readl, (!(val & DMA_GSTS_WBFS)), val);
1181
1182 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1183 }
1184
1185 /* return value determine if we need a write buffer flush */
1186 static void __iommu_flush_context(struct intel_iommu *iommu,
1187 u16 did, u16 source_id, u8 function_mask,
1188 u64 type)
1189 {
1190 u64 val = 0;
1191 unsigned long flag;
1192
1193 switch (type) {
1194 case DMA_CCMD_GLOBAL_INVL:
1195 val = DMA_CCMD_GLOBAL_INVL;
1196 break;
1197 case DMA_CCMD_DOMAIN_INVL:
1198 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1199 break;
1200 case DMA_CCMD_DEVICE_INVL:
1201 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1202 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1203 break;
1204 default:
1205 BUG();
1206 }
1207 val |= DMA_CCMD_ICC;
1208
1209 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1210 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1211
1212 /* Make sure hardware complete it */
1213 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1214 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1215
1216 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1217 }
1218
1219 /* return value determine if we need a write buffer flush */
1220 static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1221 u64 addr, unsigned int size_order, u64 type)
1222 {
1223 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1224 u64 val = 0, val_iva = 0;
1225 unsigned long flag;
1226
1227 switch (type) {
1228 case DMA_TLB_GLOBAL_FLUSH:
1229 /* global flush doesn't need set IVA_REG */
1230 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1231 break;
1232 case DMA_TLB_DSI_FLUSH:
1233 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1234 break;
1235 case DMA_TLB_PSI_FLUSH:
1236 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1237 /* IH bit is passed in as part of address */
1238 val_iva = size_order | addr;
1239 break;
1240 default:
1241 BUG();
1242 }
1243 /* Note: set drain read/write */
1244 #if 0
1245 /*
1246 * This is probably to be super secure.. Looks like we can
1247 * ignore it without any impact.
1248 */
1249 if (cap_read_drain(iommu->cap))
1250 val |= DMA_TLB_READ_DRAIN;
1251 #endif
1252 if (cap_write_drain(iommu->cap))
1253 val |= DMA_TLB_WRITE_DRAIN;
1254
1255 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1256 /* Note: Only uses first TLB reg currently */
1257 if (val_iva)
1258 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1259 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1260
1261 /* Make sure hardware complete it */
1262 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1263 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1264
1265 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1266
1267 /* check IOTLB invalidation granularity */
1268 if (DMA_TLB_IAIG(val) == 0)
1269 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1270 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1271 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
1272 (unsigned long long)DMA_TLB_IIRG(type),
1273 (unsigned long long)DMA_TLB_IAIG(val));
1274 }
1275
1276 static struct device_domain_info *
1277 iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1278 u8 bus, u8 devfn)
1279 {
1280 bool found = false;
1281 unsigned long flags;
1282 struct device_domain_info *info;
1283 struct pci_dev *pdev;
1284
1285 if (!ecap_dev_iotlb_support(iommu->ecap))
1286 return NULL;
1287
1288 if (!iommu->qi)
1289 return NULL;
1290
1291 spin_lock_irqsave(&device_domain_lock, flags);
1292 list_for_each_entry(info, &domain->devices, link)
1293 if (info->iommu == iommu && info->bus == bus &&
1294 info->devfn == devfn) {
1295 found = true;
1296 break;
1297 }
1298 spin_unlock_irqrestore(&device_domain_lock, flags);
1299
1300 if (!found || !info->dev || !dev_is_pci(info->dev))
1301 return NULL;
1302
1303 pdev = to_pci_dev(info->dev);
1304
1305 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
1306 return NULL;
1307
1308 if (!dmar_find_matched_atsr_unit(pdev))
1309 return NULL;
1310
1311 return info;
1312 }
1313
1314 static void iommu_enable_dev_iotlb(struct device_domain_info *info)
1315 {
1316 if (!info || !dev_is_pci(info->dev))
1317 return;
1318
1319 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
1320 }
1321
1322 static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1323 {
1324 if (!info->dev || !dev_is_pci(info->dev) ||
1325 !pci_ats_enabled(to_pci_dev(info->dev)))
1326 return;
1327
1328 pci_disable_ats(to_pci_dev(info->dev));
1329 }
1330
1331 static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1332 u64 addr, unsigned mask)
1333 {
1334 u16 sid, qdep;
1335 unsigned long flags;
1336 struct device_domain_info *info;
1337
1338 spin_lock_irqsave(&device_domain_lock, flags);
1339 list_for_each_entry(info, &domain->devices, link) {
1340 struct pci_dev *pdev;
1341 if (!info->dev || !dev_is_pci(info->dev))
1342 continue;
1343
1344 pdev = to_pci_dev(info->dev);
1345 if (!pci_ats_enabled(pdev))
1346 continue;
1347
1348 sid = info->bus << 8 | info->devfn;
1349 qdep = pci_ats_queue_depth(pdev);
1350 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1351 }
1352 spin_unlock_irqrestore(&device_domain_lock, flags);
1353 }
1354
1355 static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
1356 unsigned long pfn, unsigned int pages, int ih, int map)
1357 {
1358 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
1359 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
1360
1361 BUG_ON(pages == 0);
1362
1363 if (ih)
1364 ih = 1 << 6;
1365 /*
1366 * Fallback to domain selective flush if no PSI support or the size is
1367 * too big.
1368 * PSI requires page size to be 2 ^ x, and the base address is naturally
1369 * aligned to the size
1370 */
1371 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1372 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1373 DMA_TLB_DSI_FLUSH);
1374 else
1375 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
1376 DMA_TLB_PSI_FLUSH);
1377
1378 /*
1379 * In caching mode, changes of pages from non-present to present require
1380 * flush. However, device IOTLB doesn't need to be flushed in this case.
1381 */
1382 if (!cap_caching_mode(iommu->cap) || !map)
1383 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
1384 }
1385
1386 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1387 {
1388 u32 pmen;
1389 unsigned long flags;
1390
1391 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1392 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1393 pmen &= ~DMA_PMEN_EPM;
1394 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1395
1396 /* wait for the protected region status bit to clear */
1397 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1398 readl, !(pmen & DMA_PMEN_PRS), pmen);
1399
1400 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1401 }
1402
1403 static void iommu_enable_translation(struct intel_iommu *iommu)
1404 {
1405 u32 sts;
1406 unsigned long flags;
1407
1408 raw_spin_lock_irqsave(&iommu->register_lock, flags);
1409 iommu->gcmd |= DMA_GCMD_TE;
1410 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1411
1412 /* Make sure hardware complete it */
1413 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1414 readl, (sts & DMA_GSTS_TES), sts);
1415
1416 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1417 }
1418
1419 static void iommu_disable_translation(struct intel_iommu *iommu)
1420 {
1421 u32 sts;
1422 unsigned long flag;
1423
1424 raw_spin_lock_irqsave(&iommu->register_lock, flag);
1425 iommu->gcmd &= ~DMA_GCMD_TE;
1426 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1427
1428 /* Make sure hardware complete it */
1429 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
1430 readl, (!(sts & DMA_GSTS_TES)), sts);
1431
1432 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1433 }
1434
1435
1436 static int iommu_init_domains(struct intel_iommu *iommu)
1437 {
1438 unsigned long ndomains;
1439 unsigned long nlongs;
1440
1441 ndomains = cap_ndoms(iommu->cap);
1442 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1443 iommu->seq_id, ndomains);
1444 nlongs = BITS_TO_LONGS(ndomains);
1445
1446 spin_lock_init(&iommu->lock);
1447
1448 /* TBD: there might be 64K domains,
1449 * consider other allocation for future chip
1450 */
1451 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1452 if (!iommu->domain_ids) {
1453 pr_err("IOMMU%d: allocating domain id array failed\n",
1454 iommu->seq_id);
1455 return -ENOMEM;
1456 }
1457 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1458 GFP_KERNEL);
1459 if (!iommu->domains) {
1460 pr_err("IOMMU%d: allocating domain array failed\n",
1461 iommu->seq_id);
1462 kfree(iommu->domain_ids);
1463 iommu->domain_ids = NULL;
1464 return -ENOMEM;
1465 }
1466
1467 /*
1468 * if Caching mode is set, then invalid translations are tagged
1469 * with domainid 0. Hence we need to pre-allocate it.
1470 */
1471 if (cap_caching_mode(iommu->cap))
1472 set_bit(0, iommu->domain_ids);
1473 return 0;
1474 }
1475
1476 static void disable_dmar_iommu(struct intel_iommu *iommu)
1477 {
1478 struct dmar_domain *domain;
1479 int i;
1480
1481 if ((iommu->domains) && (iommu->domain_ids)) {
1482 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
1483 /*
1484 * Domain id 0 is reserved for invalid translation
1485 * if hardware supports caching mode.
1486 */
1487 if (cap_caching_mode(iommu->cap) && i == 0)
1488 continue;
1489
1490 domain = iommu->domains[i];
1491 clear_bit(i, iommu->domain_ids);
1492 if (domain_detach_iommu(domain, iommu) == 0 &&
1493 !domain_type_is_vm(domain))
1494 domain_exit(domain);
1495 }
1496 }
1497
1498 if (iommu->gcmd & DMA_GCMD_TE)
1499 iommu_disable_translation(iommu);
1500 }
1501
1502 static void free_dmar_iommu(struct intel_iommu *iommu)
1503 {
1504 if ((iommu->domains) && (iommu->domain_ids)) {
1505 kfree(iommu->domains);
1506 kfree(iommu->domain_ids);
1507 iommu->domains = NULL;
1508 iommu->domain_ids = NULL;
1509 }
1510
1511 g_iommus[iommu->seq_id] = NULL;
1512
1513 /* free context mapping */
1514 free_context_table(iommu);
1515 }
1516
1517 static struct dmar_domain *alloc_domain(int flags)
1518 {
1519 /* domain id for virtual machine, it won't be set in context */
1520 static atomic_t vm_domid = ATOMIC_INIT(0);
1521 struct dmar_domain *domain;
1522
1523 domain = alloc_domain_mem();
1524 if (!domain)
1525 return NULL;
1526
1527 memset(domain, 0, sizeof(*domain));
1528 domain->nid = -1;
1529 domain->flags = flags;
1530 spin_lock_init(&domain->iommu_lock);
1531 INIT_LIST_HEAD(&domain->devices);
1532 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1533 domain->id = atomic_inc_return(&vm_domid);
1534
1535 return domain;
1536 }
1537
1538 static int __iommu_attach_domain(struct dmar_domain *domain,
1539 struct intel_iommu *iommu)
1540 {
1541 int num;
1542 unsigned long ndomains;
1543
1544 ndomains = cap_ndoms(iommu->cap);
1545 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1546 if (num < ndomains) {
1547 set_bit(num, iommu->domain_ids);
1548 iommu->domains[num] = domain;
1549 } else {
1550 num = -ENOSPC;
1551 }
1552
1553 return num;
1554 }
1555
1556 static int iommu_attach_domain(struct dmar_domain *domain,
1557 struct intel_iommu *iommu)
1558 {
1559 int num;
1560 unsigned long flags;
1561
1562 spin_lock_irqsave(&iommu->lock, flags);
1563 num = __iommu_attach_domain(domain, iommu);
1564 spin_unlock_irqrestore(&iommu->lock, flags);
1565 if (num < 0)
1566 pr_err("IOMMU: no free domain ids\n");
1567
1568 return num;
1569 }
1570
1571 static int iommu_attach_vm_domain(struct dmar_domain *domain,
1572 struct intel_iommu *iommu)
1573 {
1574 int num;
1575 unsigned long ndomains;
1576
1577 ndomains = cap_ndoms(iommu->cap);
1578 for_each_set_bit(num, iommu->domain_ids, ndomains)
1579 if (iommu->domains[num] == domain)
1580 return num;
1581
1582 return __iommu_attach_domain(domain, iommu);
1583 }
1584
1585 static void iommu_detach_domain(struct dmar_domain *domain,
1586 struct intel_iommu *iommu)
1587 {
1588 unsigned long flags;
1589 int num, ndomains;
1590
1591 spin_lock_irqsave(&iommu->lock, flags);
1592 if (domain_type_is_vm_or_si(domain)) {
1593 ndomains = cap_ndoms(iommu->cap);
1594 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1595 if (iommu->domains[num] == domain) {
1596 clear_bit(num, iommu->domain_ids);
1597 iommu->domains[num] = NULL;
1598 break;
1599 }
1600 }
1601 } else {
1602 clear_bit(domain->id, iommu->domain_ids);
1603 iommu->domains[domain->id] = NULL;
1604 }
1605 spin_unlock_irqrestore(&iommu->lock, flags);
1606 }
1607
1608 static void domain_attach_iommu(struct dmar_domain *domain,
1609 struct intel_iommu *iommu)
1610 {
1611 unsigned long flags;
1612
1613 spin_lock_irqsave(&domain->iommu_lock, flags);
1614 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1615 domain->iommu_count++;
1616 if (domain->iommu_count == 1)
1617 domain->nid = iommu->node;
1618 domain_update_iommu_cap(domain);
1619 }
1620 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1621 }
1622
1623 static int domain_detach_iommu(struct dmar_domain *domain,
1624 struct intel_iommu *iommu)
1625 {
1626 unsigned long flags;
1627 int count = INT_MAX;
1628
1629 spin_lock_irqsave(&domain->iommu_lock, flags);
1630 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1631 count = --domain->iommu_count;
1632 domain_update_iommu_cap(domain);
1633 }
1634 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1635
1636 return count;
1637 }
1638
1639 static struct iova_domain reserved_iova_list;
1640 static struct lock_class_key reserved_rbtree_key;
1641
1642 static int dmar_init_reserved_ranges(void)
1643 {
1644 struct pci_dev *pdev = NULL;
1645 struct iova *iova;
1646 int i;
1647
1648 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1649 DMA_32BIT_PFN);
1650
1651 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1652 &reserved_rbtree_key);
1653
1654 /* IOAPIC ranges shouldn't be accessed by DMA */
1655 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1656 IOVA_PFN(IOAPIC_RANGE_END));
1657 if (!iova) {
1658 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1659 return -ENODEV;
1660 }
1661
1662 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1663 for_each_pci_dev(pdev) {
1664 struct resource *r;
1665
1666 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1667 r = &pdev->resource[i];
1668 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1669 continue;
1670 iova = reserve_iova(&reserved_iova_list,
1671 IOVA_PFN(r->start),
1672 IOVA_PFN(r->end));
1673 if (!iova) {
1674 printk(KERN_ERR "Reserve iova failed\n");
1675 return -ENODEV;
1676 }
1677 }
1678 }
1679 return 0;
1680 }
1681
1682 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1683 {
1684 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1685 }
1686
1687 static inline int guestwidth_to_adjustwidth(int gaw)
1688 {
1689 int agaw;
1690 int r = (gaw - 12) % 9;
1691
1692 if (r == 0)
1693 agaw = gaw;
1694 else
1695 agaw = gaw + 9 - r;
1696 if (agaw > 64)
1697 agaw = 64;
1698 return agaw;
1699 }
1700
1701 static int domain_init(struct dmar_domain *domain, int guest_width)
1702 {
1703 struct intel_iommu *iommu;
1704 int adjust_width, agaw;
1705 unsigned long sagaw;
1706
1707 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1708 DMA_32BIT_PFN);
1709 domain_reserve_special_ranges(domain);
1710
1711 /* calculate AGAW */
1712 iommu = domain_get_iommu(domain);
1713 if (guest_width > cap_mgaw(iommu->cap))
1714 guest_width = cap_mgaw(iommu->cap);
1715 domain->gaw = guest_width;
1716 adjust_width = guestwidth_to_adjustwidth(guest_width);
1717 agaw = width_to_agaw(adjust_width);
1718 sagaw = cap_sagaw(iommu->cap);
1719 if (!test_bit(agaw, &sagaw)) {
1720 /* hardware doesn't support it, choose a bigger one */
1721 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1722 agaw = find_next_bit(&sagaw, 5, agaw);
1723 if (agaw >= 5)
1724 return -ENODEV;
1725 }
1726 domain->agaw = agaw;
1727
1728 if (ecap_coherent(iommu->ecap))
1729 domain->iommu_coherency = 1;
1730 else
1731 domain->iommu_coherency = 0;
1732
1733 if (ecap_sc_support(iommu->ecap))
1734 domain->iommu_snooping = 1;
1735 else
1736 domain->iommu_snooping = 0;
1737
1738 if (intel_iommu_superpage)
1739 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1740 else
1741 domain->iommu_superpage = 0;
1742
1743 domain->nid = iommu->node;
1744
1745 /* always allocate the top pgd */
1746 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
1747 if (!domain->pgd)
1748 return -ENOMEM;
1749 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1750 return 0;
1751 }
1752
1753 static void domain_exit(struct dmar_domain *domain)
1754 {
1755 struct page *freelist = NULL;
1756 int i;
1757
1758 /* Domain 0 is reserved, so dont process it */
1759 if (!domain)
1760 return;
1761
1762 /* Flush any lazy unmaps that may reference this domain */
1763 if (!intel_iommu_strict)
1764 flush_unmaps_timeout(0);
1765
1766 /* remove associated devices */
1767 domain_remove_dev_info(domain);
1768
1769 /* destroy iovas */
1770 put_iova_domain(&domain->iovad);
1771
1772 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
1773
1774 /* clear attached or cached domains */
1775 rcu_read_lock();
1776 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus)
1777 iommu_detach_domain(domain, g_iommus[i]);
1778 rcu_read_unlock();
1779
1780 dma_free_pagelist(freelist);
1781
1782 free_domain_mem(domain);
1783 }
1784
1785 static int domain_context_mapping_one(struct dmar_domain *domain,
1786 struct intel_iommu *iommu,
1787 u8 bus, u8 devfn, int translation)
1788 {
1789 struct context_entry *context;
1790 unsigned long flags;
1791 struct dma_pte *pgd;
1792 int id;
1793 int agaw;
1794 struct device_domain_info *info = NULL;
1795
1796 pr_debug("Set context mapping for %02x:%02x.%d\n",
1797 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1798
1799 BUG_ON(!domain->pgd);
1800 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1801 translation != CONTEXT_TT_MULTI_LEVEL);
1802
1803 context = device_to_context_entry(iommu, bus, devfn);
1804 if (!context)
1805 return -ENOMEM;
1806 spin_lock_irqsave(&iommu->lock, flags);
1807 if (context_present(context)) {
1808 spin_unlock_irqrestore(&iommu->lock, flags);
1809 return 0;
1810 }
1811
1812 id = domain->id;
1813 pgd = domain->pgd;
1814
1815 if (domain_type_is_vm_or_si(domain)) {
1816 if (domain_type_is_vm(domain)) {
1817 id = iommu_attach_vm_domain(domain, iommu);
1818 if (id < 0) {
1819 spin_unlock_irqrestore(&iommu->lock, flags);
1820 pr_err("IOMMU: no free domain ids\n");
1821 return -EFAULT;
1822 }
1823 }
1824
1825 /* Skip top levels of page tables for
1826 * iommu which has less agaw than default.
1827 * Unnecessary for PT mode.
1828 */
1829 if (translation != CONTEXT_TT_PASS_THROUGH) {
1830 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1831 pgd = phys_to_virt(dma_pte_addr(pgd));
1832 if (!dma_pte_present(pgd)) {
1833 spin_unlock_irqrestore(&iommu->lock, flags);
1834 return -ENOMEM;
1835 }
1836 }
1837 }
1838 }
1839
1840 context_set_domain_id(context, id);
1841
1842 if (translation != CONTEXT_TT_PASS_THROUGH) {
1843 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
1844 translation = info ? CONTEXT_TT_DEV_IOTLB :
1845 CONTEXT_TT_MULTI_LEVEL;
1846 }
1847 /*
1848 * In pass through mode, AW must be programmed to indicate the largest
1849 * AGAW value supported by hardware. And ASR is ignored by hardware.
1850 */
1851 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
1852 context_set_address_width(context, iommu->msagaw);
1853 else {
1854 context_set_address_root(context, virt_to_phys(pgd));
1855 context_set_address_width(context, iommu->agaw);
1856 }
1857
1858 context_set_translation_type(context, translation);
1859 context_set_fault_enable(context);
1860 context_set_present(context);
1861 domain_flush_cache(domain, context, sizeof(*context));
1862
1863 /*
1864 * It's a non-present to present mapping. If hardware doesn't cache
1865 * non-present entry we only need to flush the write-buffer. If the
1866 * _does_ cache non-present entries, then it does so in the special
1867 * domain #0, which we have to flush:
1868 */
1869 if (cap_caching_mode(iommu->cap)) {
1870 iommu->flush.flush_context(iommu, 0,
1871 (((u16)bus) << 8) | devfn,
1872 DMA_CCMD_MASK_NOBIT,
1873 DMA_CCMD_DEVICE_INVL);
1874 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
1875 } else {
1876 iommu_flush_write_buffer(iommu);
1877 }
1878 iommu_enable_dev_iotlb(info);
1879 spin_unlock_irqrestore(&iommu->lock, flags);
1880
1881 domain_attach_iommu(domain, iommu);
1882
1883 return 0;
1884 }
1885
1886 struct domain_context_mapping_data {
1887 struct dmar_domain *domain;
1888 struct intel_iommu *iommu;
1889 int translation;
1890 };
1891
1892 static int domain_context_mapping_cb(struct pci_dev *pdev,
1893 u16 alias, void *opaque)
1894 {
1895 struct domain_context_mapping_data *data = opaque;
1896
1897 return domain_context_mapping_one(data->domain, data->iommu,
1898 PCI_BUS_NUM(alias), alias & 0xff,
1899 data->translation);
1900 }
1901
1902 static int
1903 domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1904 int translation)
1905 {
1906 struct intel_iommu *iommu;
1907 u8 bus, devfn;
1908 struct domain_context_mapping_data data;
1909
1910 iommu = device_to_iommu(dev, &bus, &devfn);
1911 if (!iommu)
1912 return -ENODEV;
1913
1914 if (!dev_is_pci(dev))
1915 return domain_context_mapping_one(domain, iommu, bus, devfn,
1916 translation);
1917
1918 data.domain = domain;
1919 data.iommu = iommu;
1920 data.translation = translation;
1921
1922 return pci_for_each_dma_alias(to_pci_dev(dev),
1923 &domain_context_mapping_cb, &data);
1924 }
1925
1926 static int domain_context_mapped_cb(struct pci_dev *pdev,
1927 u16 alias, void *opaque)
1928 {
1929 struct intel_iommu *iommu = opaque;
1930
1931 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
1932 }
1933
1934 static int domain_context_mapped(struct device *dev)
1935 {
1936 struct intel_iommu *iommu;
1937 u8 bus, devfn;
1938
1939 iommu = device_to_iommu(dev, &bus, &devfn);
1940 if (!iommu)
1941 return -ENODEV;
1942
1943 if (!dev_is_pci(dev))
1944 return device_context_mapped(iommu, bus, devfn);
1945
1946 return !pci_for_each_dma_alias(to_pci_dev(dev),
1947 domain_context_mapped_cb, iommu);
1948 }
1949
1950 /* Returns a number of VTD pages, but aligned to MM page size */
1951 static inline unsigned long aligned_nrpages(unsigned long host_addr,
1952 size_t size)
1953 {
1954 host_addr &= ~PAGE_MASK;
1955 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1956 }
1957
1958 /* Return largest possible superpage level for a given mapping */
1959 static inline int hardware_largepage_caps(struct dmar_domain *domain,
1960 unsigned long iov_pfn,
1961 unsigned long phy_pfn,
1962 unsigned long pages)
1963 {
1964 int support, level = 1;
1965 unsigned long pfnmerge;
1966
1967 support = domain->iommu_superpage;
1968
1969 /* To use a large page, the virtual *and* physical addresses
1970 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1971 of them will mean we have to use smaller pages. So just
1972 merge them and check both at once. */
1973 pfnmerge = iov_pfn | phy_pfn;
1974
1975 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1976 pages >>= VTD_STRIDE_SHIFT;
1977 if (!pages)
1978 break;
1979 pfnmerge >>= VTD_STRIDE_SHIFT;
1980 level++;
1981 support--;
1982 }
1983 return level;
1984 }
1985
1986 static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1987 struct scatterlist *sg, unsigned long phys_pfn,
1988 unsigned long nr_pages, int prot)
1989 {
1990 struct dma_pte *first_pte = NULL, *pte = NULL;
1991 phys_addr_t uninitialized_var(pteval);
1992 unsigned long sg_res = 0;
1993 unsigned int largepage_lvl = 0;
1994 unsigned long lvl_pages = 0;
1995
1996 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
1997
1998 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1999 return -EINVAL;
2000
2001 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2002
2003 if (!sg) {
2004 sg_res = nr_pages;
2005 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2006 }
2007
2008 while (nr_pages > 0) {
2009 uint64_t tmp;
2010
2011 if (!sg_res) {
2012 sg_res = aligned_nrpages(sg->offset, sg->length);
2013 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2014 sg->dma_length = sg->length;
2015 pteval = page_to_phys(sg_page(sg)) | prot;
2016 phys_pfn = pteval >> VTD_PAGE_SHIFT;
2017 }
2018
2019 if (!pte) {
2020 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2021
2022 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
2023 if (!pte)
2024 return -ENOMEM;
2025 /* It is large page*/
2026 if (largepage_lvl > 1) {
2027 pteval |= DMA_PTE_LARGE_PAGE;
2028 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2029 /*
2030 * Ensure that old small page tables are
2031 * removed to make room for superpage,
2032 * if they exist.
2033 */
2034 dma_pte_free_pagetable(domain, iov_pfn,
2035 iov_pfn + lvl_pages - 1);
2036 } else {
2037 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
2038 }
2039
2040 }
2041 /* We don't need lock here, nobody else
2042 * touches the iova range
2043 */
2044 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
2045 if (tmp) {
2046 static int dumps = 5;
2047 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2048 iov_pfn, tmp, (unsigned long long)pteval);
2049 if (dumps) {
2050 dumps--;
2051 debug_dma_dump_mappings(NULL);
2052 }
2053 WARN_ON(1);
2054 }
2055
2056 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2057
2058 BUG_ON(nr_pages < lvl_pages);
2059 BUG_ON(sg_res < lvl_pages);
2060
2061 nr_pages -= lvl_pages;
2062 iov_pfn += lvl_pages;
2063 phys_pfn += lvl_pages;
2064 pteval += lvl_pages * VTD_PAGE_SIZE;
2065 sg_res -= lvl_pages;
2066
2067 /* If the next PTE would be the first in a new page, then we
2068 need to flush the cache on the entries we've just written.
2069 And then we'll need to recalculate 'pte', so clear it and
2070 let it get set again in the if (!pte) block above.
2071
2072 If we're done (!nr_pages) we need to flush the cache too.
2073
2074 Also if we've been setting superpages, we may need to
2075 recalculate 'pte' and switch back to smaller pages for the
2076 end of the mapping, if the trailing size is not enough to
2077 use another superpage (i.e. sg_res < lvl_pages). */
2078 pte++;
2079 if (!nr_pages || first_pte_in_page(pte) ||
2080 (largepage_lvl > 1 && sg_res < lvl_pages)) {
2081 domain_flush_cache(domain, first_pte,
2082 (void *)pte - (void *)first_pte);
2083 pte = NULL;
2084 }
2085
2086 if (!sg_res && nr_pages)
2087 sg = sg_next(sg);
2088 }
2089 return 0;
2090 }
2091
2092 static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2093 struct scatterlist *sg, unsigned long nr_pages,
2094 int prot)
2095 {
2096 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2097 }
2098
2099 static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2100 unsigned long phys_pfn, unsigned long nr_pages,
2101 int prot)
2102 {
2103 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
2104 }
2105
2106 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
2107 {
2108 if (!iommu)
2109 return;
2110
2111 clear_context_table(iommu, bus, devfn);
2112 iommu->flush.flush_context(iommu, 0, 0, 0,
2113 DMA_CCMD_GLOBAL_INVL);
2114 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2115 }
2116
2117 static inline void unlink_domain_info(struct device_domain_info *info)
2118 {
2119 assert_spin_locked(&device_domain_lock);
2120 list_del(&info->link);
2121 list_del(&info->global);
2122 if (info->dev)
2123 info->dev->archdata.iommu = NULL;
2124 }
2125
2126 static void domain_remove_dev_info(struct dmar_domain *domain)
2127 {
2128 struct device_domain_info *info, *tmp;
2129 unsigned long flags;
2130
2131 spin_lock_irqsave(&device_domain_lock, flags);
2132 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
2133 unlink_domain_info(info);
2134 spin_unlock_irqrestore(&device_domain_lock, flags);
2135
2136 iommu_disable_dev_iotlb(info);
2137 iommu_detach_dev(info->iommu, info->bus, info->devfn);
2138
2139 if (domain_type_is_vm(domain)) {
2140 iommu_detach_dependent_devices(info->iommu, info->dev);
2141 domain_detach_iommu(domain, info->iommu);
2142 }
2143
2144 free_devinfo_mem(info);
2145 spin_lock_irqsave(&device_domain_lock, flags);
2146 }
2147 spin_unlock_irqrestore(&device_domain_lock, flags);
2148 }
2149
2150 /*
2151 * find_domain
2152 * Note: we use struct device->archdata.iommu stores the info
2153 */
2154 static struct dmar_domain *find_domain(struct device *dev)
2155 {
2156 struct device_domain_info *info;
2157
2158 /* No lock here, assumes no domain exit in normal case */
2159 info = dev->archdata.iommu;
2160 if (info)
2161 return info->domain;
2162 return NULL;
2163 }
2164
2165 static inline struct device_domain_info *
2166 dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2167 {
2168 struct device_domain_info *info;
2169
2170 list_for_each_entry(info, &device_domain_list, global)
2171 if (info->iommu->segment == segment && info->bus == bus &&
2172 info->devfn == devfn)
2173 return info;
2174
2175 return NULL;
2176 }
2177
2178 static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
2179 int bus, int devfn,
2180 struct device *dev,
2181 struct dmar_domain *domain)
2182 {
2183 struct dmar_domain *found = NULL;
2184 struct device_domain_info *info;
2185 unsigned long flags;
2186
2187 info = alloc_devinfo_mem();
2188 if (!info)
2189 return NULL;
2190
2191 info->bus = bus;
2192 info->devfn = devfn;
2193 info->dev = dev;
2194 info->domain = domain;
2195 info->iommu = iommu;
2196
2197 spin_lock_irqsave(&device_domain_lock, flags);
2198 if (dev)
2199 found = find_domain(dev);
2200 else {
2201 struct device_domain_info *info2;
2202 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
2203 if (info2)
2204 found = info2->domain;
2205 }
2206 if (found) {
2207 spin_unlock_irqrestore(&device_domain_lock, flags);
2208 free_devinfo_mem(info);
2209 /* Caller must free the original domain */
2210 return found;
2211 }
2212
2213 list_add(&info->link, &domain->devices);
2214 list_add(&info->global, &device_domain_list);
2215 if (dev)
2216 dev->archdata.iommu = info;
2217 spin_unlock_irqrestore(&device_domain_lock, flags);
2218
2219 return domain;
2220 }
2221
2222 static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2223 {
2224 *(u16 *)opaque = alias;
2225 return 0;
2226 }
2227
2228 /* domain is initialized */
2229 static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
2230 {
2231 struct dmar_domain *domain, *tmp;
2232 struct intel_iommu *iommu;
2233 struct device_domain_info *info;
2234 u16 dma_alias;
2235 unsigned long flags;
2236 u8 bus, devfn;
2237
2238 domain = find_domain(dev);
2239 if (domain)
2240 return domain;
2241
2242 iommu = device_to_iommu(dev, &bus, &devfn);
2243 if (!iommu)
2244 return NULL;
2245
2246 if (dev_is_pci(dev)) {
2247 struct pci_dev *pdev = to_pci_dev(dev);
2248
2249 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2250
2251 spin_lock_irqsave(&device_domain_lock, flags);
2252 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2253 PCI_BUS_NUM(dma_alias),
2254 dma_alias & 0xff);
2255 if (info) {
2256 iommu = info->iommu;
2257 domain = info->domain;
2258 }
2259 spin_unlock_irqrestore(&device_domain_lock, flags);
2260
2261 /* DMA alias already has a domain, uses it */
2262 if (info)
2263 goto found_domain;
2264 }
2265
2266 /* Allocate and initialize new domain for the device */
2267 domain = alloc_domain(0);
2268 if (!domain)
2269 return NULL;
2270 domain->id = iommu_attach_domain(domain, iommu);
2271 if (domain->id < 0) {
2272 free_domain_mem(domain);
2273 return NULL;
2274 }
2275 domain_attach_iommu(domain, iommu);
2276 if (domain_init(domain, gaw)) {
2277 domain_exit(domain);
2278 return NULL;
2279 }
2280
2281 /* register PCI DMA alias device */
2282 if (dev_is_pci(dev)) {
2283 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2284 dma_alias & 0xff, NULL, domain);
2285
2286 if (!tmp || tmp != domain) {
2287 domain_exit(domain);
2288 domain = tmp;
2289 }
2290
2291 if (!domain)
2292 return NULL;
2293 }
2294
2295 found_domain:
2296 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2297
2298 if (!tmp || tmp != domain) {
2299 domain_exit(domain);
2300 domain = tmp;
2301 }
2302
2303 return domain;
2304 }
2305
2306 static int iommu_identity_mapping;
2307 #define IDENTMAP_ALL 1
2308 #define IDENTMAP_GFX 2
2309 #define IDENTMAP_AZALIA 4
2310
2311 static int iommu_domain_identity_map(struct dmar_domain *domain,
2312 unsigned long long start,
2313 unsigned long long end)
2314 {
2315 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2316 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2317
2318 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2319 dma_to_mm_pfn(last_vpfn))) {
2320 printk(KERN_ERR "IOMMU: reserve iova failed\n");
2321 return -ENOMEM;
2322 }
2323
2324 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2325 start, end, domain->id);
2326 /*
2327 * RMRR range might have overlap with physical memory range,
2328 * clear it first
2329 */
2330 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
2331
2332 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2333 last_vpfn - first_vpfn + 1,
2334 DMA_PTE_READ|DMA_PTE_WRITE);
2335 }
2336
2337 static int iommu_prepare_identity_map(struct device *dev,
2338 unsigned long long start,
2339 unsigned long long end)
2340 {
2341 struct dmar_domain *domain;
2342 int ret;
2343
2344 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2345 if (!domain)
2346 return -ENOMEM;
2347
2348 /* For _hardware_ passthrough, don't bother. But for software
2349 passthrough, we do it anyway -- it may indicate a memory
2350 range which is reserved in E820, so which didn't get set
2351 up to start with in si_domain */
2352 if (domain == si_domain && hw_pass_through) {
2353 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2354 dev_name(dev), start, end);
2355 return 0;
2356 }
2357
2358 printk(KERN_INFO
2359 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2360 dev_name(dev), start, end);
2361
2362 if (end < start) {
2363 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2364 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2365 dmi_get_system_info(DMI_BIOS_VENDOR),
2366 dmi_get_system_info(DMI_BIOS_VERSION),
2367 dmi_get_system_info(DMI_PRODUCT_VERSION));
2368 ret = -EIO;
2369 goto error;
2370 }
2371
2372 if (end >> agaw_to_width(domain->agaw)) {
2373 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2374 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2375 agaw_to_width(domain->agaw),
2376 dmi_get_system_info(DMI_BIOS_VENDOR),
2377 dmi_get_system_info(DMI_BIOS_VERSION),
2378 dmi_get_system_info(DMI_PRODUCT_VERSION));
2379 ret = -EIO;
2380 goto error;
2381 }
2382
2383 ret = iommu_domain_identity_map(domain, start, end);
2384 if (ret)
2385 goto error;
2386
2387 /* context entry init */
2388 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2389 if (ret)
2390 goto error;
2391
2392 return 0;
2393
2394 error:
2395 domain_exit(domain);
2396 return ret;
2397 }
2398
2399 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2400 struct device *dev)
2401 {
2402 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2403 return 0;
2404 return iommu_prepare_identity_map(dev, rmrr->base_address,
2405 rmrr->end_address);
2406 }
2407
2408 #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
2409 static inline void iommu_prepare_isa(void)
2410 {
2411 struct pci_dev *pdev;
2412 int ret;
2413
2414 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2415 if (!pdev)
2416 return;
2417
2418 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
2419 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
2420
2421 if (ret)
2422 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2423 "floppy might not work\n");
2424
2425 pci_dev_put(pdev);
2426 }
2427 #else
2428 static inline void iommu_prepare_isa(void)
2429 {
2430 return;
2431 }
2432 #endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
2433
2434 static int md_domain_init(struct dmar_domain *domain, int guest_width);
2435
2436 static int __init si_domain_init(int hw)
2437 {
2438 struct dmar_drhd_unit *drhd;
2439 struct intel_iommu *iommu;
2440 int nid, ret = 0;
2441 bool first = true;
2442
2443 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2444 if (!si_domain)
2445 return -EFAULT;
2446
2447 for_each_active_iommu(iommu, drhd) {
2448 ret = iommu_attach_domain(si_domain, iommu);
2449 if (ret < 0) {
2450 domain_exit(si_domain);
2451 return -EFAULT;
2452 } else if (first) {
2453 si_domain->id = ret;
2454 first = false;
2455 } else if (si_domain->id != ret) {
2456 domain_exit(si_domain);
2457 return -EFAULT;
2458 }
2459 domain_attach_iommu(si_domain, iommu);
2460 }
2461
2462 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2463 domain_exit(si_domain);
2464 return -EFAULT;
2465 }
2466
2467 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2468 si_domain->id);
2469
2470 if (hw)
2471 return 0;
2472
2473 for_each_online_node(nid) {
2474 unsigned long start_pfn, end_pfn;
2475 int i;
2476
2477 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2478 ret = iommu_domain_identity_map(si_domain,
2479 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2480 if (ret)
2481 return ret;
2482 }
2483 }
2484
2485 return 0;
2486 }
2487
2488 static int identity_mapping(struct device *dev)
2489 {
2490 struct device_domain_info *info;
2491
2492 if (likely(!iommu_identity_mapping))
2493 return 0;
2494
2495 info = dev->archdata.iommu;
2496 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2497 return (info->domain == si_domain);
2498
2499 return 0;
2500 }
2501
2502 static int domain_add_dev_info(struct dmar_domain *domain,
2503 struct device *dev, int translation)
2504 {
2505 struct dmar_domain *ndomain;
2506 struct intel_iommu *iommu;
2507 u8 bus, devfn;
2508 int ret;
2509
2510 iommu = device_to_iommu(dev, &bus, &devfn);
2511 if (!iommu)
2512 return -ENODEV;
2513
2514 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2515 if (ndomain != domain)
2516 return -EBUSY;
2517
2518 ret = domain_context_mapping(domain, dev, translation);
2519 if (ret) {
2520 domain_remove_one_dev_info(domain, dev);
2521 return ret;
2522 }
2523
2524 return 0;
2525 }
2526
2527 static bool device_has_rmrr(struct device *dev)
2528 {
2529 struct dmar_rmrr_unit *rmrr;
2530 struct device *tmp;
2531 int i;
2532
2533 rcu_read_lock();
2534 for_each_rmrr_units(rmrr) {
2535 /*
2536 * Return TRUE if this RMRR contains the device that
2537 * is passed in.
2538 */
2539 for_each_active_dev_scope(rmrr->devices,
2540 rmrr->devices_cnt, i, tmp)
2541 if (tmp == dev) {
2542 rcu_read_unlock();
2543 return true;
2544 }
2545 }
2546 rcu_read_unlock();
2547 return false;
2548 }
2549
2550 /*
2551 * There are a couple cases where we need to restrict the functionality of
2552 * devices associated with RMRRs. The first is when evaluating a device for
2553 * identity mapping because problems exist when devices are moved in and out
2554 * of domains and their respective RMRR information is lost. This means that
2555 * a device with associated RMRRs will never be in a "passthrough" domain.
2556 * The second is use of the device through the IOMMU API. This interface
2557 * expects to have full control of the IOVA space for the device. We cannot
2558 * satisfy both the requirement that RMRR access is maintained and have an
2559 * unencumbered IOVA space. We also have no ability to quiesce the device's
2560 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2561 * We therefore prevent devices associated with an RMRR from participating in
2562 * the IOMMU API, which eliminates them from device assignment.
2563 *
2564 * In both cases we assume that PCI USB devices with RMRRs have them largely
2565 * for historical reasons and that the RMRR space is not actively used post
2566 * boot. This exclusion may change if vendors begin to abuse it.
2567 */
2568 static bool device_is_rmrr_locked(struct device *dev)
2569 {
2570 if (!device_has_rmrr(dev))
2571 return false;
2572
2573 if (dev_is_pci(dev)) {
2574 struct pci_dev *pdev = to_pci_dev(dev);
2575
2576 if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
2577 return false;
2578 }
2579
2580 return true;
2581 }
2582
2583 static int iommu_should_identity_map(struct device *dev, int startup)
2584 {
2585
2586 if (dev_is_pci(dev)) {
2587 struct pci_dev *pdev = to_pci_dev(dev);
2588
2589 if (device_is_rmrr_locked(dev))
2590 return 0;
2591
2592 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2593 return 1;
2594
2595 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2596 return 1;
2597
2598 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2599 return 0;
2600
2601 /*
2602 * We want to start off with all devices in the 1:1 domain, and
2603 * take them out later if we find they can't access all of memory.
2604 *
2605 * However, we can't do this for PCI devices behind bridges,
2606 * because all PCI devices behind the same bridge will end up
2607 * with the same source-id on their transactions.
2608 *
2609 * Practically speaking, we can't change things around for these
2610 * devices at run-time, because we can't be sure there'll be no
2611 * DMA transactions in flight for any of their siblings.
2612 *
2613 * So PCI devices (unless they're on the root bus) as well as
2614 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2615 * the 1:1 domain, just in _case_ one of their siblings turns out
2616 * not to be able to map all of memory.
2617 */
2618 if (!pci_is_pcie(pdev)) {
2619 if (!pci_is_root_bus(pdev->bus))
2620 return 0;
2621 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2622 return 0;
2623 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
2624 return 0;
2625 } else {
2626 if (device_has_rmrr(dev))
2627 return 0;
2628 }
2629
2630 /*
2631 * At boot time, we don't yet know if devices will be 64-bit capable.
2632 * Assume that they will — if they turn out not to be, then we can
2633 * take them out of the 1:1 domain later.
2634 */
2635 if (!startup) {
2636 /*
2637 * If the device's dma_mask is less than the system's memory
2638 * size then this is not a candidate for identity mapping.
2639 */
2640 u64 dma_mask = *dev->dma_mask;
2641
2642 if (dev->coherent_dma_mask &&
2643 dev->coherent_dma_mask < dma_mask)
2644 dma_mask = dev->coherent_dma_mask;
2645
2646 return dma_mask >= dma_get_required_mask(dev);
2647 }
2648
2649 return 1;
2650 }
2651
2652 static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2653 {
2654 int ret;
2655
2656 if (!iommu_should_identity_map(dev, 1))
2657 return 0;
2658
2659 ret = domain_add_dev_info(si_domain, dev,
2660 hw ? CONTEXT_TT_PASS_THROUGH :
2661 CONTEXT_TT_MULTI_LEVEL);
2662 if (!ret)
2663 pr_info("IOMMU: %s identity mapping for device %s\n",
2664 hw ? "hardware" : "software", dev_name(dev));
2665 else if (ret == -ENODEV)
2666 /* device not associated with an iommu */
2667 ret = 0;
2668
2669 return ret;
2670 }
2671
2672
2673 static int __init iommu_prepare_static_identity_mapping(int hw)
2674 {
2675 struct pci_dev *pdev = NULL;
2676 struct dmar_drhd_unit *drhd;
2677 struct intel_iommu *iommu;
2678 struct device *dev;
2679 int i;
2680 int ret = 0;
2681
2682 ret = si_domain_init(hw);
2683 if (ret)
2684 return -EFAULT;
2685
2686 for_each_pci_dev(pdev) {
2687 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2688 if (ret)
2689 return ret;
2690 }
2691
2692 for_each_active_iommu(iommu, drhd)
2693 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2694 struct acpi_device_physical_node *pn;
2695 struct acpi_device *adev;
2696
2697 if (dev->bus != &acpi_bus_type)
2698 continue;
2699
2700 adev= to_acpi_device(dev);
2701 mutex_lock(&adev->physical_node_lock);
2702 list_for_each_entry(pn, &adev->physical_node_list, node) {
2703 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2704 if (ret)
2705 break;
2706 }
2707 mutex_unlock(&adev->physical_node_lock);
2708 if (ret)
2709 return ret;
2710 }
2711
2712 return 0;
2713 }
2714
2715 static void intel_iommu_init_qi(struct intel_iommu *iommu)
2716 {
2717 /*
2718 * Start from the sane iommu hardware state.
2719 * If the queued invalidation is already initialized by us
2720 * (for example, while enabling interrupt-remapping) then
2721 * we got the things already rolling from a sane state.
2722 */
2723 if (!iommu->qi) {
2724 /*
2725 * Clear any previous faults.
2726 */
2727 dmar_fault(-1, iommu);
2728 /*
2729 * Disable queued invalidation if supported and already enabled
2730 * before OS handover.
2731 */
2732 dmar_disable_qi(iommu);
2733 }
2734
2735 if (dmar_enable_qi(iommu)) {
2736 /*
2737 * Queued Invalidate not enabled, use Register Based Invalidate
2738 */
2739 iommu->flush.flush_context = __iommu_flush_context;
2740 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2741 pr_info("IOMMU: %s using Register based invalidation\n",
2742 iommu->name);
2743 } else {
2744 iommu->flush.flush_context = qi_flush_context;
2745 iommu->flush.flush_iotlb = qi_flush_iotlb;
2746 pr_info("IOMMU: %s using Queued invalidation\n", iommu->name);
2747 }
2748 }
2749
2750 static int __init init_dmars(void)
2751 {
2752 struct dmar_drhd_unit *drhd;
2753 struct dmar_rmrr_unit *rmrr;
2754 struct device *dev;
2755 struct intel_iommu *iommu;
2756 int i, ret;
2757
2758 /*
2759 * for each drhd
2760 * allocate root
2761 * initialize and program root entry to not present
2762 * endfor
2763 */
2764 for_each_drhd_unit(drhd) {
2765 /*
2766 * lock not needed as this is only incremented in the single
2767 * threaded kernel __init code path all other access are read
2768 * only
2769 */
2770 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
2771 g_num_of_iommus++;
2772 continue;
2773 }
2774 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2775 DMAR_UNITS_SUPPORTED);
2776 }
2777
2778 /* Preallocate enough resources for IOMMU hot-addition */
2779 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
2780 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
2781
2782 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2783 GFP_KERNEL);
2784 if (!g_iommus) {
2785 printk(KERN_ERR "Allocating global iommu array failed\n");
2786 ret = -ENOMEM;
2787 goto error;
2788 }
2789
2790 deferred_flush = kzalloc(g_num_of_iommus *
2791 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2792 if (!deferred_flush) {
2793 ret = -ENOMEM;
2794 goto free_g_iommus;
2795 }
2796
2797 for_each_active_iommu(iommu, drhd) {
2798 g_iommus[iommu->seq_id] = iommu;
2799
2800 ret = iommu_init_domains(iommu);
2801 if (ret)
2802 goto free_iommu;
2803
2804 /*
2805 * TBD:
2806 * we could share the same root & context tables
2807 * among all IOMMU's. Need to Split it later.
2808 */
2809 ret = iommu_alloc_root_entry(iommu);
2810 if (ret)
2811 goto free_iommu;
2812 if (!ecap_pass_through(iommu->ecap))
2813 hw_pass_through = 0;
2814 }
2815
2816 for_each_active_iommu(iommu, drhd)
2817 intel_iommu_init_qi(iommu);
2818
2819 if (iommu_pass_through)
2820 iommu_identity_mapping |= IDENTMAP_ALL;
2821
2822 #ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
2823 iommu_identity_mapping |= IDENTMAP_GFX;
2824 #endif
2825
2826 check_tylersburg_isoch();
2827
2828 /*
2829 * If pass through is not set or not enabled, setup context entries for
2830 * identity mappings for rmrr, gfx, and isa and may fall back to static
2831 * identity mapping if iommu_identity_mapping is set.
2832 */
2833 if (iommu_identity_mapping) {
2834 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
2835 if (ret) {
2836 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2837 goto free_iommu;
2838 }
2839 }
2840 /*
2841 * For each rmrr
2842 * for each dev attached to rmrr
2843 * do
2844 * locate drhd for dev, alloc domain for dev
2845 * allocate free domain
2846 * allocate page table entries for rmrr
2847 * if context not allocated for bus
2848 * allocate and init context
2849 * set present in root table for this bus
2850 * init context with domain, translation etc
2851 * endfor
2852 * endfor
2853 */
2854 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2855 for_each_rmrr_units(rmrr) {
2856 /* some BIOS lists non-exist devices in DMAR table. */
2857 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
2858 i, dev) {
2859 ret = iommu_prepare_rmrr_dev(rmrr, dev);
2860 if (ret)
2861 printk(KERN_ERR
2862 "IOMMU: mapping reserved region failed\n");
2863 }
2864 }
2865
2866 iommu_prepare_isa();
2867
2868 /*
2869 * for each drhd
2870 * enable fault log
2871 * global invalidate context cache
2872 * global invalidate iotlb
2873 * enable translation
2874 */
2875 for_each_iommu(iommu, drhd) {
2876 if (drhd->ignored) {
2877 /*
2878 * we always have to disable PMRs or DMA may fail on
2879 * this device
2880 */
2881 if (force_on)
2882 iommu_disable_protect_mem_regions(iommu);
2883 continue;
2884 }
2885
2886 iommu_flush_write_buffer(iommu);
2887
2888 ret = dmar_set_interrupt(iommu);
2889 if (ret)
2890 goto free_iommu;
2891
2892 iommu_set_root_entry(iommu);
2893
2894 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
2895 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
2896 iommu_enable_translation(iommu);
2897 iommu_disable_protect_mem_regions(iommu);
2898 }
2899
2900 return 0;
2901
2902 free_iommu:
2903 for_each_active_iommu(iommu, drhd) {
2904 disable_dmar_iommu(iommu);
2905 free_dmar_iommu(iommu);
2906 }
2907 kfree(deferred_flush);
2908 free_g_iommus:
2909 kfree(g_iommus);
2910 error:
2911 return ret;
2912 }
2913
2914 /* This takes a number of _MM_ pages, not VTD pages */
2915 static struct iova *intel_alloc_iova(struct device *dev,
2916 struct dmar_domain *domain,
2917 unsigned long nrpages, uint64_t dma_mask)
2918 {
2919 struct iova *iova = NULL;
2920
2921 /* Restrict dma_mask to the width that the iommu can handle */
2922 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2923
2924 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
2925 /*
2926 * First try to allocate an io virtual address in
2927 * DMA_BIT_MASK(32) and if that fails then try allocating
2928 * from higher range
2929 */
2930 iova = alloc_iova(&domain->iovad, nrpages,
2931 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2932 if (iova)
2933 return iova;
2934 }
2935 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2936 if (unlikely(!iova)) {
2937 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2938 nrpages, dev_name(dev));
2939 return NULL;
2940 }
2941
2942 return iova;
2943 }
2944
2945 static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
2946 {
2947 struct dmar_domain *domain;
2948 int ret;
2949
2950 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
2951 if (!domain) {
2952 printk(KERN_ERR "Allocating domain for %s failed",
2953 dev_name(dev));
2954 return NULL;
2955 }
2956
2957 /* make sure context mapping is ok */
2958 if (unlikely(!domain_context_mapped(dev))) {
2959 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
2960 if (ret) {
2961 printk(KERN_ERR "Domain context map for %s failed",
2962 dev_name(dev));
2963 return NULL;
2964 }
2965 }
2966
2967 return domain;
2968 }
2969
2970 static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
2971 {
2972 struct device_domain_info *info;
2973
2974 /* No lock here, assumes no domain exit in normal case */
2975 info = dev->archdata.iommu;
2976 if (likely(info))
2977 return info->domain;
2978
2979 return __get_valid_domain_for_dev(dev);
2980 }
2981
2982 static int iommu_dummy(struct device *dev)
2983 {
2984 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2985 }
2986
2987 /* Check if the dev needs to go through non-identity map and unmap process.*/
2988 static int iommu_no_mapping(struct device *dev)
2989 {
2990 int found;
2991
2992 if (iommu_dummy(dev))
2993 return 1;
2994
2995 if (!iommu_identity_mapping)
2996 return 0;
2997
2998 found = identity_mapping(dev);
2999 if (found) {
3000 if (iommu_should_identity_map(dev, 0))
3001 return 1;
3002 else {
3003 /*
3004 * 32 bit DMA is removed from si_domain and fall back
3005 * to non-identity mapping.
3006 */
3007 domain_remove_one_dev_info(si_domain, dev);
3008 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
3009 dev_name(dev));
3010 return 0;
3011 }
3012 } else {
3013 /*
3014 * In case of a detached 64 bit DMA device from vm, the device
3015 * is put into si_domain for identity mapping.
3016 */
3017 if (iommu_should_identity_map(dev, 0)) {
3018 int ret;
3019 ret = domain_add_dev_info(si_domain, dev,
3020 hw_pass_through ?
3021 CONTEXT_TT_PASS_THROUGH :
3022 CONTEXT_TT_MULTI_LEVEL);
3023 if (!ret) {
3024 printk(KERN_INFO "64bit %s uses identity mapping\n",
3025 dev_name(dev));
3026 return 1;
3027 }
3028 }
3029 }
3030
3031 return 0;
3032 }
3033
3034 static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
3035 size_t size, int dir, u64 dma_mask)
3036 {
3037 struct dmar_domain *domain;
3038 phys_addr_t start_paddr;
3039 struct iova *iova;
3040 int prot = 0;
3041 int ret;
3042 struct intel_iommu *iommu;
3043 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
3044
3045 BUG_ON(dir == DMA_NONE);
3046
3047 if (iommu_no_mapping(dev))
3048 return paddr;
3049
3050 domain = get_valid_domain_for_dev(dev);
3051 if (!domain)
3052 return 0;
3053
3054 iommu = domain_get_iommu(domain);
3055 size = aligned_nrpages(paddr, size);
3056
3057 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
3058 if (!iova)
3059 goto error;
3060
3061 /*
3062 * Check if DMAR supports zero-length reads on write only
3063 * mappings..
3064 */
3065 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3066 !cap_zlr(iommu->cap))
3067 prot |= DMA_PTE_READ;
3068 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3069 prot |= DMA_PTE_WRITE;
3070 /*
3071 * paddr - (paddr + size) might be partial page, we should map the whole
3072 * page. Note: if two part of one page are separately mapped, we
3073 * might have two guest_addr mapping to the same host paddr, but this
3074 * is not a big problem
3075 */
3076 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
3077 mm_to_dma_pfn(paddr_pfn), size, prot);
3078 if (ret)
3079 goto error;
3080
3081 /* it's a non-present to present mapping. Only flush if caching mode */
3082 if (cap_caching_mode(iommu->cap))
3083 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
3084 else
3085 iommu_flush_write_buffer(iommu);
3086
3087 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3088 start_paddr += paddr & ~PAGE_MASK;
3089 return start_paddr;
3090
3091 error:
3092 if (iova)
3093 __free_iova(&domain->iovad, iova);
3094 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
3095 dev_name(dev), size, (unsigned long long)paddr, dir);
3096 return 0;
3097 }
3098
3099 static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3100 unsigned long offset, size_t size,
3101 enum dma_data_direction dir,
3102 struct dma_attrs *attrs)
3103 {
3104 return __intel_map_single(dev, page_to_phys(page) + offset, size,
3105 dir, *dev->dma_mask);
3106 }
3107
3108 static void flush_unmaps(void)
3109 {
3110 int i, j;
3111
3112 timer_on = 0;
3113
3114 /* just flush them all */
3115 for (i = 0; i < g_num_of_iommus; i++) {
3116 struct intel_iommu *iommu = g_iommus[i];
3117 if (!iommu)
3118 continue;
3119
3120 if (!deferred_flush[i].next)
3121 continue;
3122
3123 /* In caching mode, global flushes turn emulation expensive */
3124 if (!cap_caching_mode(iommu->cap))
3125 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3126 DMA_TLB_GLOBAL_FLUSH);
3127 for (j = 0; j < deferred_flush[i].next; j++) {
3128 unsigned long mask;
3129 struct iova *iova = deferred_flush[i].iova[j];
3130 struct dmar_domain *domain = deferred_flush[i].domain[j];
3131
3132 /* On real hardware multiple invalidations are expensive */
3133 if (cap_caching_mode(iommu->cap))
3134 iommu_flush_iotlb_psi(iommu, domain->id,
3135 iova->pfn_lo, iova_size(iova),
3136 !deferred_flush[i].freelist[j], 0);
3137 else {
3138 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
3139 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3140 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3141 }
3142 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
3143 if (deferred_flush[i].freelist[j])
3144 dma_free_pagelist(deferred_flush[i].freelist[j]);
3145 }
3146 deferred_flush[i].next = 0;
3147 }
3148
3149 list_size = 0;
3150 }
3151
3152 static void flush_unmaps_timeout(unsigned long data)
3153 {
3154 unsigned long flags;
3155
3156 spin_lock_irqsave(&async_umap_flush_lock, flags);
3157 flush_unmaps();
3158 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3159 }
3160
3161 static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
3162 {
3163 unsigned long flags;
3164 int next, iommu_id;
3165 struct intel_iommu *iommu;
3166
3167 spin_lock_irqsave(&async_umap_flush_lock, flags);
3168 if (list_size == HIGH_WATER_MARK)
3169 flush_unmaps();
3170
3171 iommu = domain_get_iommu(dom);
3172 iommu_id = iommu->seq_id;
3173
3174 next = deferred_flush[iommu_id].next;
3175 deferred_flush[iommu_id].domain[next] = dom;
3176 deferred_flush[iommu_id].iova[next] = iova;
3177 deferred_flush[iommu_id].freelist[next] = freelist;
3178 deferred_flush[iommu_id].next++;
3179
3180 if (!timer_on) {
3181 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3182 timer_on = 1;
3183 }
3184 list_size++;
3185 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3186 }
3187
3188 static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
3189 {
3190 struct dmar_domain *domain;
3191 unsigned long start_pfn, last_pfn;
3192 struct iova *iova;
3193 struct intel_iommu *iommu;
3194 struct page *freelist;
3195
3196 if (iommu_no_mapping(dev))
3197 return;
3198
3199 domain = find_domain(dev);
3200 BUG_ON(!domain);
3201
3202 iommu = domain_get_iommu(domain);
3203
3204 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
3205 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3206 (unsigned long long)dev_addr))
3207 return;
3208
3209 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3210 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
3211
3212 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
3213 dev_name(dev), start_pfn, last_pfn);
3214
3215 freelist = domain_unmap(domain, start_pfn, last_pfn);
3216
3217 if (intel_iommu_strict) {
3218 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
3219 last_pfn - start_pfn + 1, !freelist, 0);
3220 /* free iova */
3221 __free_iova(&domain->iovad, iova);
3222 dma_free_pagelist(freelist);
3223 } else {
3224 add_unmap(domain, iova, freelist);
3225 /*
3226 * queue up the release of the unmap to save the 1/6th of the
3227 * cpu used up by the iotlb flush operation...
3228 */
3229 }
3230 }
3231
3232 static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3233 size_t size, enum dma_data_direction dir,
3234 struct dma_attrs *attrs)
3235 {
3236 intel_unmap(dev, dev_addr);
3237 }
3238
3239 static void *intel_alloc_coherent(struct device *dev, size_t size,
3240 dma_addr_t *dma_handle, gfp_t flags,
3241 struct dma_attrs *attrs)
3242 {
3243 struct page *page = NULL;
3244 int order;
3245
3246 size = PAGE_ALIGN(size);
3247 order = get_order(size);
3248
3249 if (!iommu_no_mapping(dev))
3250 flags &= ~(GFP_DMA | GFP_DMA32);
3251 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3252 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
3253 flags |= GFP_DMA;
3254 else
3255 flags |= GFP_DMA32;
3256 }
3257
3258 if (flags & __GFP_WAIT) {
3259 unsigned int count = size >> PAGE_SHIFT;
3260
3261 page = dma_alloc_from_contiguous(dev, count, order);
3262 if (page && iommu_no_mapping(dev) &&
3263 page_to_phys(page) + size > dev->coherent_dma_mask) {
3264 dma_release_from_contiguous(dev, page, count);
3265 page = NULL;
3266 }
3267 }
3268
3269 if (!page)
3270 page = alloc_pages(flags, order);
3271 if (!page)
3272 return NULL;
3273 memset(page_address(page), 0, size);
3274
3275 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
3276 DMA_BIDIRECTIONAL,
3277 dev->coherent_dma_mask);
3278 if (*dma_handle)
3279 return page_address(page);
3280 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3281 __free_pages(page, order);
3282
3283 return NULL;
3284 }
3285
3286 static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
3287 dma_addr_t dma_handle, struct dma_attrs *attrs)
3288 {
3289 int order;
3290 struct page *page = virt_to_page(vaddr);
3291
3292 size = PAGE_ALIGN(size);
3293 order = get_order(size);
3294
3295 intel_unmap(dev, dma_handle);
3296 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3297 __free_pages(page, order);
3298 }
3299
3300 static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
3301 int nelems, enum dma_data_direction dir,
3302 struct dma_attrs *attrs)
3303 {
3304 intel_unmap(dev, sglist[0].dma_address);
3305 }
3306
3307 static int intel_nontranslate_map_sg(struct device *hddev,
3308 struct scatterlist *sglist, int nelems, int dir)
3309 {
3310 int i;
3311 struct scatterlist *sg;
3312
3313 for_each_sg(sglist, sg, nelems, i) {
3314 BUG_ON(!sg_page(sg));
3315 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
3316 sg->dma_length = sg->length;
3317 }
3318 return nelems;
3319 }
3320
3321 static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
3322 enum dma_data_direction dir, struct dma_attrs *attrs)
3323 {
3324 int i;
3325 struct dmar_domain *domain;
3326 size_t size = 0;
3327 int prot = 0;
3328 struct iova *iova = NULL;
3329 int ret;
3330 struct scatterlist *sg;
3331 unsigned long start_vpfn;
3332 struct intel_iommu *iommu;
3333
3334 BUG_ON(dir == DMA_NONE);
3335 if (iommu_no_mapping(dev))
3336 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
3337
3338 domain = get_valid_domain_for_dev(dev);
3339 if (!domain)
3340 return 0;
3341
3342 iommu = domain_get_iommu(domain);
3343
3344 for_each_sg(sglist, sg, nelems, i)
3345 size += aligned_nrpages(sg->offset, sg->length);
3346
3347 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3348 *dev->dma_mask);
3349 if (!iova) {
3350 sglist->dma_length = 0;
3351 return 0;
3352 }
3353
3354 /*
3355 * Check if DMAR supports zero-length reads on write only
3356 * mappings..
3357 */
3358 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
3359 !cap_zlr(iommu->cap))
3360 prot |= DMA_PTE_READ;
3361 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3362 prot |= DMA_PTE_WRITE;
3363
3364 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
3365
3366 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
3367 if (unlikely(ret)) {
3368 dma_pte_free_pagetable(domain, start_vpfn,
3369 start_vpfn + size - 1);
3370 __free_iova(&domain->iovad, iova);
3371 return 0;
3372 }
3373
3374 /* it's a non-present to present mapping. Only flush if caching mode */
3375 if (cap_caching_mode(iommu->cap))
3376 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
3377 else
3378 iommu_flush_write_buffer(iommu);
3379
3380 return nelems;
3381 }
3382
3383 static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3384 {
3385 return !dma_addr;
3386 }
3387
3388 struct dma_map_ops intel_dma_ops = {
3389 .alloc = intel_alloc_coherent,
3390 .free = intel_free_coherent,
3391 .map_sg = intel_map_sg,
3392 .unmap_sg = intel_unmap_sg,
3393 .map_page = intel_map_page,
3394 .unmap_page = intel_unmap_page,
3395 .mapping_error = intel_mapping_error,
3396 };
3397
3398 static inline int iommu_domain_cache_init(void)
3399 {
3400 int ret = 0;
3401
3402 iommu_domain_cache = kmem_cache_create("iommu_domain",
3403 sizeof(struct dmar_domain),
3404 0,
3405 SLAB_HWCACHE_ALIGN,
3406
3407 NULL);
3408 if (!iommu_domain_cache) {
3409 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3410 ret = -ENOMEM;
3411 }
3412
3413 return ret;
3414 }
3415
3416 static inline int iommu_devinfo_cache_init(void)
3417 {
3418 int ret = 0;
3419
3420 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3421 sizeof(struct device_domain_info),
3422 0,
3423 SLAB_HWCACHE_ALIGN,
3424 NULL);
3425 if (!iommu_devinfo_cache) {
3426 printk(KERN_ERR "Couldn't create devinfo cache\n");
3427 ret = -ENOMEM;
3428 }
3429
3430 return ret;
3431 }
3432
3433 static int __init iommu_init_mempool(void)
3434 {
3435 int ret;
3436 ret = iommu_iova_cache_init();
3437 if (ret)
3438 return ret;
3439
3440 ret = iommu_domain_cache_init();
3441 if (ret)
3442 goto domain_error;
3443
3444 ret = iommu_devinfo_cache_init();
3445 if (!ret)
3446 return ret;
3447
3448 kmem_cache_destroy(iommu_domain_cache);
3449 domain_error:
3450 iommu_iova_cache_destroy();
3451
3452 return -ENOMEM;
3453 }
3454
3455 static void __init iommu_exit_mempool(void)
3456 {
3457 kmem_cache_destroy(iommu_devinfo_cache);
3458 kmem_cache_destroy(iommu_domain_cache);
3459 iommu_iova_cache_destroy();
3460 }
3461
3462 static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3463 {
3464 struct dmar_drhd_unit *drhd;
3465 u32 vtbar;
3466 int rc;
3467
3468 /* We know that this device on this chipset has its own IOMMU.
3469 * If we find it under a different IOMMU, then the BIOS is lying
3470 * to us. Hope that the IOMMU for this device is actually
3471 * disabled, and it needs no translation...
3472 */
3473 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3474 if (rc) {
3475 /* "can't" happen */
3476 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3477 return;
3478 }
3479 vtbar &= 0xffff0000;
3480
3481 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3482 drhd = dmar_find_matched_drhd_unit(pdev);
3483 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3484 TAINT_FIRMWARE_WORKAROUND,
3485 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3486 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3487 }
3488 DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3489
3490 static void __init init_no_remapping_devices(void)
3491 {
3492 struct dmar_drhd_unit *drhd;
3493 struct device *dev;
3494 int i;
3495
3496 for_each_drhd_unit(drhd) {
3497 if (!drhd->include_all) {
3498 for_each_active_dev_scope(drhd->devices,
3499 drhd->devices_cnt, i, dev)
3500 break;
3501 /* ignore DMAR unit if no devices exist */
3502 if (i == drhd->devices_cnt)
3503 drhd->ignored = 1;
3504 }
3505 }
3506
3507 for_each_active_drhd_unit(drhd) {
3508 if (drhd->include_all)
3509 continue;
3510
3511 for_each_active_dev_scope(drhd->devices,
3512 drhd->devices_cnt, i, dev)
3513 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
3514 break;
3515 if (i < drhd->devices_cnt)
3516 continue;
3517
3518 /* This IOMMU has *only* gfx devices. Either bypass it or
3519 set the gfx_mapped flag, as appropriate */
3520 if (dmar_map_gfx) {
3521 intel_iommu_gfx_mapped = 1;
3522 } else {
3523 drhd->ignored = 1;
3524 for_each_active_dev_scope(drhd->devices,
3525 drhd->devices_cnt, i, dev)
3526 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3527 }
3528 }
3529 }
3530
3531 #ifdef CONFIG_SUSPEND
3532 static int init_iommu_hw(void)
3533 {
3534 struct dmar_drhd_unit *drhd;
3535 struct intel_iommu *iommu = NULL;
3536
3537 for_each_active_iommu(iommu, drhd)
3538 if (iommu->qi)
3539 dmar_reenable_qi(iommu);
3540
3541 for_each_iommu(iommu, drhd) {
3542 if (drhd->ignored) {
3543 /*
3544 * we always have to disable PMRs or DMA may fail on
3545 * this device
3546 */
3547 if (force_on)
3548 iommu_disable_protect_mem_regions(iommu);
3549 continue;
3550 }
3551
3552 iommu_flush_write_buffer(iommu);
3553
3554 iommu_set_root_entry(iommu);
3555
3556 iommu->flush.flush_context(iommu, 0, 0, 0,
3557 DMA_CCMD_GLOBAL_INVL);
3558 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3559 iommu_enable_translation(iommu);
3560 iommu_disable_protect_mem_regions(iommu);
3561 }
3562
3563 return 0;
3564 }
3565
3566 static void iommu_flush_all(void)
3567 {
3568 struct dmar_drhd_unit *drhd;
3569 struct intel_iommu *iommu;
3570
3571 for_each_active_iommu(iommu, drhd) {
3572 iommu->flush.flush_context(iommu, 0, 0, 0,
3573 DMA_CCMD_GLOBAL_INVL);
3574 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
3575 DMA_TLB_GLOBAL_FLUSH);
3576 }
3577 }
3578
3579 static int iommu_suspend(void)
3580 {
3581 struct dmar_drhd_unit *drhd;
3582 struct intel_iommu *iommu = NULL;
3583 unsigned long flag;
3584
3585 for_each_active_iommu(iommu, drhd) {
3586 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3587 GFP_ATOMIC);
3588 if (!iommu->iommu_state)
3589 goto nomem;
3590 }
3591
3592 iommu_flush_all();
3593
3594 for_each_active_iommu(iommu, drhd) {
3595 iommu_disable_translation(iommu);
3596
3597 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3598
3599 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3600 readl(iommu->reg + DMAR_FECTL_REG);
3601 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3602 readl(iommu->reg + DMAR_FEDATA_REG);
3603 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3604 readl(iommu->reg + DMAR_FEADDR_REG);
3605 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3606 readl(iommu->reg + DMAR_FEUADDR_REG);
3607
3608 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3609 }
3610 return 0;
3611
3612 nomem:
3613 for_each_active_iommu(iommu, drhd)
3614 kfree(iommu->iommu_state);
3615
3616 return -ENOMEM;
3617 }
3618
3619 static void iommu_resume(void)
3620 {
3621 struct dmar_drhd_unit *drhd;
3622 struct intel_iommu *iommu = NULL;
3623 unsigned long flag;
3624
3625 if (init_iommu_hw()) {
3626 if (force_on)
3627 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3628 else
3629 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3630 return;
3631 }
3632
3633 for_each_active_iommu(iommu, drhd) {
3634
3635 raw_spin_lock_irqsave(&iommu->register_lock, flag);
3636
3637 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3638 iommu->reg + DMAR_FECTL_REG);
3639 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3640 iommu->reg + DMAR_FEDATA_REG);
3641 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3642 iommu->reg + DMAR_FEADDR_REG);
3643 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3644 iommu->reg + DMAR_FEUADDR_REG);
3645
3646 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
3647 }
3648
3649 for_each_active_iommu(iommu, drhd)
3650 kfree(iommu->iommu_state);
3651 }
3652
3653 static struct syscore_ops iommu_syscore_ops = {
3654 .resume = iommu_resume,
3655 .suspend = iommu_suspend,
3656 };
3657
3658 static void __init init_iommu_pm_ops(void)
3659 {
3660 register_syscore_ops(&iommu_syscore_ops);
3661 }
3662
3663 #else
3664 static inline void init_iommu_pm_ops(void) {}
3665 #endif /* CONFIG_PM */
3666
3667
3668 int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
3669 {
3670 struct acpi_dmar_reserved_memory *rmrr;
3671 struct dmar_rmrr_unit *rmrru;
3672
3673 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3674 if (!rmrru)
3675 return -ENOMEM;
3676
3677 rmrru->hdr = header;
3678 rmrr = (struct acpi_dmar_reserved_memory *)header;
3679 rmrru->base_address = rmrr->base_address;
3680 rmrru->end_address = rmrr->end_address;
3681 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3682 ((void *)rmrr) + rmrr->header.length,
3683 &rmrru->devices_cnt);
3684 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3685 kfree(rmrru);
3686 return -ENOMEM;
3687 }
3688
3689 list_add(&rmrru->list, &dmar_rmrr_units);
3690
3691 return 0;
3692 }
3693
3694 static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3695 {
3696 struct dmar_atsr_unit *atsru;
3697 struct acpi_dmar_atsr *tmp;
3698
3699 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3700 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3701 if (atsr->segment != tmp->segment)
3702 continue;
3703 if (atsr->header.length != tmp->header.length)
3704 continue;
3705 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3706 return atsru;
3707 }
3708
3709 return NULL;
3710 }
3711
3712 int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3713 {
3714 struct acpi_dmar_atsr *atsr;
3715 struct dmar_atsr_unit *atsru;
3716
3717 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3718 return 0;
3719
3720 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3721 atsru = dmar_find_atsr(atsr);
3722 if (atsru)
3723 return 0;
3724
3725 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
3726 if (!atsru)
3727 return -ENOMEM;
3728
3729 /*
3730 * If memory is allocated from slab by ACPI _DSM method, we need to
3731 * copy the memory content because the memory buffer will be freed
3732 * on return.
3733 */
3734 atsru->hdr = (void *)(atsru + 1);
3735 memcpy(atsru->hdr, hdr, hdr->length);
3736 atsru->include_all = atsr->flags & 0x1;
3737 if (!atsru->include_all) {
3738 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3739 (void *)atsr + atsr->header.length,
3740 &atsru->devices_cnt);
3741 if (atsru->devices_cnt && atsru->devices == NULL) {
3742 kfree(atsru);
3743 return -ENOMEM;
3744 }
3745 }
3746
3747 list_add_rcu(&atsru->list, &dmar_atsr_units);
3748
3749 return 0;
3750 }
3751
3752 static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3753 {
3754 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3755 kfree(atsru);
3756 }
3757
3758 int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3759 {
3760 struct acpi_dmar_atsr *atsr;
3761 struct dmar_atsr_unit *atsru;
3762
3763 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3764 atsru = dmar_find_atsr(atsr);
3765 if (atsru) {
3766 list_del_rcu(&atsru->list);
3767 synchronize_rcu();
3768 intel_iommu_free_atsr(atsru);
3769 }
3770
3771 return 0;
3772 }
3773
3774 int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
3775 {
3776 int i;
3777 struct device *dev;
3778 struct acpi_dmar_atsr *atsr;
3779 struct dmar_atsr_unit *atsru;
3780
3781 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3782 atsru = dmar_find_atsr(atsr);
3783 if (!atsru)
3784 return 0;
3785
3786 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
3787 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
3788 i, dev)
3789 return -EBUSY;
3790
3791 return 0;
3792 }
3793
3794 static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
3795 {
3796 int sp, ret = 0;
3797 struct intel_iommu *iommu = dmaru->iommu;
3798
3799 if (g_iommus[iommu->seq_id])
3800 return 0;
3801
3802 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
3803 pr_warn("IOMMU: %s doesn't support hardware pass through.\n",
3804 iommu->name);
3805 return -ENXIO;
3806 }
3807 if (!ecap_sc_support(iommu->ecap) &&
3808 domain_update_iommu_snooping(iommu)) {
3809 pr_warn("IOMMU: %s doesn't support snooping.\n",
3810 iommu->name);
3811 return -ENXIO;
3812 }
3813 sp = domain_update_iommu_superpage(iommu) - 1;
3814 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
3815 pr_warn("IOMMU: %s doesn't support large page.\n",
3816 iommu->name);
3817 return -ENXIO;
3818 }
3819
3820 /*
3821 * Disable translation if already enabled prior to OS handover.
3822 */
3823 if (iommu->gcmd & DMA_GCMD_TE)
3824 iommu_disable_translation(iommu);
3825
3826 g_iommus[iommu->seq_id] = iommu;
3827 ret = iommu_init_domains(iommu);
3828 if (ret == 0)
3829 ret = iommu_alloc_root_entry(iommu);
3830 if (ret)
3831 goto out;
3832
3833 if (dmaru->ignored) {
3834 /*
3835 * we always have to disable PMRs or DMA may fail on this device
3836 */
3837 if (force_on)
3838 iommu_disable_protect_mem_regions(iommu);
3839 return 0;
3840 }
3841
3842 intel_iommu_init_qi(iommu);
3843 iommu_flush_write_buffer(iommu);
3844 ret = dmar_set_interrupt(iommu);
3845 if (ret)
3846 goto disable_iommu;
3847
3848 iommu_set_root_entry(iommu);
3849 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3850 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3851 iommu_enable_translation(iommu);
3852
3853 if (si_domain) {
3854 ret = iommu_attach_domain(si_domain, iommu);
3855 if (ret < 0 || si_domain->id != ret)
3856 goto disable_iommu;
3857 domain_attach_iommu(si_domain, iommu);
3858 }
3859
3860 iommu_disable_protect_mem_regions(iommu);
3861 return 0;
3862
3863 disable_iommu:
3864 disable_dmar_iommu(iommu);
3865 out:
3866 free_dmar_iommu(iommu);
3867 return ret;
3868 }
3869
3870 int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
3871 {
3872 int ret = 0;
3873 struct intel_iommu *iommu = dmaru->iommu;
3874
3875 if (!intel_iommu_enabled)
3876 return 0;
3877 if (iommu == NULL)
3878 return -EINVAL;
3879
3880 if (insert) {
3881 ret = intel_iommu_add(dmaru);
3882 } else {
3883 disable_dmar_iommu(iommu);
3884 free_dmar_iommu(iommu);
3885 }
3886
3887 return ret;
3888 }
3889
3890 static void intel_iommu_free_dmars(void)
3891 {
3892 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3893 struct dmar_atsr_unit *atsru, *atsr_n;
3894
3895 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3896 list_del(&rmrru->list);
3897 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3898 kfree(rmrru);
3899 }
3900
3901 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3902 list_del(&atsru->list);
3903 intel_iommu_free_atsr(atsru);
3904 }
3905 }
3906
3907 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3908 {
3909 int i, ret = 1;
3910 struct pci_bus *bus;
3911 struct pci_dev *bridge = NULL;
3912 struct device *tmp;
3913 struct acpi_dmar_atsr *atsr;
3914 struct dmar_atsr_unit *atsru;
3915
3916 dev = pci_physfn(dev);
3917 for (bus = dev->bus; bus; bus = bus->parent) {
3918 bridge = bus->self;
3919 if (!bridge || !pci_is_pcie(bridge) ||
3920 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
3921 return 0;
3922 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
3923 break;
3924 }
3925 if (!bridge)
3926 return 0;
3927
3928 rcu_read_lock();
3929 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3930 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3931 if (atsr->segment != pci_domain_nr(dev->bus))
3932 continue;
3933
3934 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
3935 if (tmp == &bridge->dev)
3936 goto out;
3937
3938 if (atsru->include_all)
3939 goto out;
3940 }
3941 ret = 0;
3942 out:
3943 rcu_read_unlock();
3944
3945 return ret;
3946 }
3947
3948 int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3949 {
3950 int ret = 0;
3951 struct dmar_rmrr_unit *rmrru;
3952 struct dmar_atsr_unit *atsru;
3953 struct acpi_dmar_atsr *atsr;
3954 struct acpi_dmar_reserved_memory *rmrr;
3955
3956 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3957 return 0;
3958
3959 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3960 rmrr = container_of(rmrru->hdr,
3961 struct acpi_dmar_reserved_memory, header);
3962 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3963 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3964 ((void *)rmrr) + rmrr->header.length,
3965 rmrr->segment, rmrru->devices,
3966 rmrru->devices_cnt);
3967 if(ret < 0)
3968 return ret;
3969 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3970 dmar_remove_dev_scope(info, rmrr->segment,
3971 rmrru->devices, rmrru->devices_cnt);
3972 }
3973 }
3974
3975 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3976 if (atsru->include_all)
3977 continue;
3978
3979 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3980 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3981 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3982 (void *)atsr + atsr->header.length,
3983 atsr->segment, atsru->devices,
3984 atsru->devices_cnt);
3985 if (ret > 0)
3986 break;
3987 else if(ret < 0)
3988 return ret;
3989 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3990 if (dmar_remove_dev_scope(info, atsr->segment,
3991 atsru->devices, atsru->devices_cnt))
3992 break;
3993 }
3994 }
3995
3996 return 0;
3997 }
3998
3999 /*
4000 * Here we only respond to action of unbound device from driver.
4001 *
4002 * Added device is not attached to its DMAR domain here yet. That will happen
4003 * when mapping the device to iova.
4004 */
4005 static int device_notifier(struct notifier_block *nb,
4006 unsigned long action, void *data)
4007 {
4008 struct device *dev = data;
4009 struct dmar_domain *domain;
4010
4011 if (iommu_dummy(dev))
4012 return 0;
4013
4014 if (action != BUS_NOTIFY_REMOVED_DEVICE)
4015 return 0;
4016
4017 domain = find_domain(dev);
4018 if (!domain)
4019 return 0;
4020
4021 down_read(&dmar_global_lock);
4022 domain_remove_one_dev_info(domain, dev);
4023 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
4024 domain_exit(domain);
4025 up_read(&dmar_global_lock);
4026
4027 return 0;
4028 }
4029
4030 static struct notifier_block device_nb = {
4031 .notifier_call = device_notifier,
4032 };
4033
4034 static int intel_iommu_memory_notifier(struct notifier_block *nb,
4035 unsigned long val, void *v)
4036 {
4037 struct memory_notify *mhp = v;
4038 unsigned long long start, end;
4039 unsigned long start_vpfn, last_vpfn;
4040
4041 switch (val) {
4042 case MEM_GOING_ONLINE:
4043 start = mhp->start_pfn << PAGE_SHIFT;
4044 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4045 if (iommu_domain_identity_map(si_domain, start, end)) {
4046 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
4047 start, end);
4048 return NOTIFY_BAD;
4049 }
4050 break;
4051
4052 case MEM_OFFLINE:
4053 case MEM_CANCEL_ONLINE:
4054 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4055 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4056 while (start_vpfn <= last_vpfn) {
4057 struct iova *iova;
4058 struct dmar_drhd_unit *drhd;
4059 struct intel_iommu *iommu;
4060 struct page *freelist;
4061
4062 iova = find_iova(&si_domain->iovad, start_vpfn);
4063 if (iova == NULL) {
4064 pr_debug("dmar: failed get IOVA for PFN %lx\n",
4065 start_vpfn);
4066 break;
4067 }
4068
4069 iova = split_and_remove_iova(&si_domain->iovad, iova,
4070 start_vpfn, last_vpfn);
4071 if (iova == NULL) {
4072 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
4073 start_vpfn, last_vpfn);
4074 return NOTIFY_BAD;
4075 }
4076
4077 freelist = domain_unmap(si_domain, iova->pfn_lo,
4078 iova->pfn_hi);
4079
4080 rcu_read_lock();
4081 for_each_active_iommu(iommu, drhd)
4082 iommu_flush_iotlb_psi(iommu, si_domain->id,
4083 iova->pfn_lo, iova_size(iova),
4084 !freelist, 0);
4085 rcu_read_unlock();
4086 dma_free_pagelist(freelist);
4087
4088 start_vpfn = iova->pfn_hi + 1;
4089 free_iova_mem(iova);
4090 }
4091 break;
4092 }
4093
4094 return NOTIFY_OK;
4095 }
4096
4097 static struct notifier_block intel_iommu_memory_nb = {
4098 .notifier_call = intel_iommu_memory_notifier,
4099 .priority = 0
4100 };
4101
4102
4103 static ssize_t intel_iommu_show_version(struct device *dev,
4104 struct device_attribute *attr,
4105 char *buf)
4106 {
4107 struct intel_iommu *iommu = dev_get_drvdata(dev);
4108 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4109 return sprintf(buf, "%d:%d\n",
4110 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4111 }
4112 static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4113
4114 static ssize_t intel_iommu_show_address(struct device *dev,
4115 struct device_attribute *attr,
4116 char *buf)
4117 {
4118 struct intel_iommu *iommu = dev_get_drvdata(dev);
4119 return sprintf(buf, "%llx\n", iommu->reg_phys);
4120 }
4121 static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4122
4123 static ssize_t intel_iommu_show_cap(struct device *dev,
4124 struct device_attribute *attr,
4125 char *buf)
4126 {
4127 struct intel_iommu *iommu = dev_get_drvdata(dev);
4128 return sprintf(buf, "%llx\n", iommu->cap);
4129 }
4130 static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4131
4132 static ssize_t intel_iommu_show_ecap(struct device *dev,
4133 struct device_attribute *attr,
4134 char *buf)
4135 {
4136 struct intel_iommu *iommu = dev_get_drvdata(dev);
4137 return sprintf(buf, "%llx\n", iommu->ecap);
4138 }
4139 static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4140
4141 static struct attribute *intel_iommu_attrs[] = {
4142 &dev_attr_version.attr,
4143 &dev_attr_address.attr,
4144 &dev_attr_cap.attr,
4145 &dev_attr_ecap.attr,
4146 NULL,
4147 };
4148
4149 static struct attribute_group intel_iommu_group = {
4150 .name = "intel-iommu",
4151 .attrs = intel_iommu_attrs,
4152 };
4153
4154 const struct attribute_group *intel_iommu_groups[] = {
4155 &intel_iommu_group,
4156 NULL,
4157 };
4158
4159 int __init intel_iommu_init(void)
4160 {
4161 int ret = -ENODEV;
4162 struct dmar_drhd_unit *drhd;
4163 struct intel_iommu *iommu;
4164
4165 /* VT-d is required for a TXT/tboot launch, so enforce that */
4166 force_on = tboot_force_iommu();
4167
4168 if (iommu_init_mempool()) {
4169 if (force_on)
4170 panic("tboot: Failed to initialize iommu memory\n");
4171 return -ENOMEM;
4172 }
4173
4174 down_write(&dmar_global_lock);
4175 if (dmar_table_init()) {
4176 if (force_on)
4177 panic("tboot: Failed to initialize DMAR table\n");
4178 goto out_free_dmar;
4179 }
4180
4181 /*
4182 * Disable translation if already enabled prior to OS handover.
4183 */
4184 for_each_active_iommu(iommu, drhd)
4185 if (iommu->gcmd & DMA_GCMD_TE)
4186 iommu_disable_translation(iommu);
4187
4188 if (dmar_dev_scope_init() < 0) {
4189 if (force_on)
4190 panic("tboot: Failed to initialize DMAR device scope\n");
4191 goto out_free_dmar;
4192 }
4193
4194 if (no_iommu || dmar_disabled)
4195 goto out_free_dmar;
4196
4197 if (list_empty(&dmar_rmrr_units))
4198 printk(KERN_INFO "DMAR: No RMRR found\n");
4199
4200 if (list_empty(&dmar_atsr_units))
4201 printk(KERN_INFO "DMAR: No ATSR found\n");
4202
4203 if (dmar_init_reserved_ranges()) {
4204 if (force_on)
4205 panic("tboot: Failed to reserve iommu ranges\n");
4206 goto out_free_reserved_range;
4207 }
4208
4209 init_no_remapping_devices();
4210
4211 ret = init_dmars();
4212 if (ret) {
4213 if (force_on)
4214 panic("tboot: Failed to initialize DMARs\n");
4215 printk(KERN_ERR "IOMMU: dmar init failed\n");
4216 goto out_free_reserved_range;
4217 }
4218 up_write(&dmar_global_lock);
4219 printk(KERN_INFO
4220 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4221
4222 init_timer(&unmap_timer);
4223 #ifdef CONFIG_SWIOTLB
4224 swiotlb = 0;
4225 #endif
4226 dma_ops = &intel_dma_ops;
4227
4228 init_iommu_pm_ops();
4229
4230 for_each_active_iommu(iommu, drhd)
4231 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4232 intel_iommu_groups,
4233 iommu->name);
4234
4235 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
4236 bus_register_notifier(&pci_bus_type, &device_nb);
4237 if (si_domain && !hw_pass_through)
4238 register_memory_notifier(&intel_iommu_memory_nb);
4239
4240 intel_iommu_enabled = 1;
4241
4242 return 0;
4243
4244 out_free_reserved_range:
4245 put_iova_domain(&reserved_iova_list);
4246 out_free_dmar:
4247 intel_iommu_free_dmars();
4248 up_write(&dmar_global_lock);
4249 iommu_exit_mempool();
4250 return ret;
4251 }
4252
4253 static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4254 {
4255 struct intel_iommu *iommu = opaque;
4256
4257 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4258 return 0;
4259 }
4260
4261 /*
4262 * NB - intel-iommu lacks any sort of reference counting for the users of
4263 * dependent devices. If multiple endpoints have intersecting dependent
4264 * devices, unbinding the driver from any one of them will possibly leave
4265 * the others unable to operate.
4266 */
4267 static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
4268 struct device *dev)
4269 {
4270 if (!iommu || !dev || !dev_is_pci(dev))
4271 return;
4272
4273 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
4274 }
4275
4276 static void domain_remove_one_dev_info(struct dmar_domain *domain,
4277 struct device *dev)
4278 {
4279 struct device_domain_info *info, *tmp;
4280 struct intel_iommu *iommu;
4281 unsigned long flags;
4282 bool found = false;
4283 u8 bus, devfn;
4284
4285 iommu = device_to_iommu(dev, &bus, &devfn);
4286 if (!iommu)
4287 return;
4288
4289 spin_lock_irqsave(&device_domain_lock, flags);
4290 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
4291 if (info->iommu == iommu && info->bus == bus &&
4292 info->devfn == devfn) {
4293 unlink_domain_info(info);
4294 spin_unlock_irqrestore(&device_domain_lock, flags);
4295
4296 iommu_disable_dev_iotlb(info);
4297 iommu_detach_dev(iommu, info->bus, info->devfn);
4298 iommu_detach_dependent_devices(iommu, dev);
4299 free_devinfo_mem(info);
4300
4301 spin_lock_irqsave(&device_domain_lock, flags);
4302
4303 if (found)
4304 break;
4305 else
4306 continue;
4307 }
4308
4309 /* if there is no other devices under the same iommu
4310 * owned by this domain, clear this iommu in iommu_bmp
4311 * update iommu count and coherency
4312 */
4313 if (info->iommu == iommu)
4314 found = true;
4315 }
4316
4317 spin_unlock_irqrestore(&device_domain_lock, flags);
4318
4319 if (found == 0) {
4320 domain_detach_iommu(domain, iommu);
4321 if (!domain_type_is_vm_or_si(domain))
4322 iommu_detach_domain(domain, iommu);
4323 }
4324 }
4325
4326 static int md_domain_init(struct dmar_domain *domain, int guest_width)
4327 {
4328 int adjust_width;
4329
4330 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4331 DMA_32BIT_PFN);
4332 domain_reserve_special_ranges(domain);
4333
4334 /* calculate AGAW */
4335 domain->gaw = guest_width;
4336 adjust_width = guestwidth_to_adjustwidth(guest_width);
4337 domain->agaw = width_to_agaw(adjust_width);
4338
4339 domain->iommu_coherency = 0;
4340 domain->iommu_snooping = 0;
4341 domain->iommu_superpage = 0;
4342 domain->max_addr = 0;
4343
4344 /* always allocate the top pgd */
4345 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
4346 if (!domain->pgd)
4347 return -ENOMEM;
4348 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4349 return 0;
4350 }
4351
4352 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
4353 {
4354 struct dmar_domain *dmar_domain;
4355 struct iommu_domain *domain;
4356
4357 if (type != IOMMU_DOMAIN_UNMANAGED)
4358 return NULL;
4359
4360 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
4361 if (!dmar_domain) {
4362 printk(KERN_ERR
4363 "intel_iommu_domain_init: dmar_domain == NULL\n");
4364 return NULL;
4365 }
4366 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
4367 printk(KERN_ERR
4368 "intel_iommu_domain_init() failed\n");
4369 domain_exit(dmar_domain);
4370 return NULL;
4371 }
4372 domain_update_iommu_cap(dmar_domain);
4373
4374 domain = &dmar_domain->domain;
4375 domain->geometry.aperture_start = 0;
4376 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4377 domain->geometry.force_aperture = true;
4378
4379 return domain;
4380 }
4381
4382 static void intel_iommu_domain_free(struct iommu_domain *domain)
4383 {
4384 domain_exit(to_dmar_domain(domain));
4385 }
4386
4387 static int intel_iommu_attach_device(struct iommu_domain *domain,
4388 struct device *dev)
4389 {
4390 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4391 struct intel_iommu *iommu;
4392 int addr_width;
4393 u8 bus, devfn;
4394
4395 if (device_is_rmrr_locked(dev)) {
4396 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4397 return -EPERM;
4398 }
4399
4400 /* normally dev is not mapped */
4401 if (unlikely(domain_context_mapped(dev))) {
4402 struct dmar_domain *old_domain;
4403
4404 old_domain = find_domain(dev);
4405 if (old_domain) {
4406 if (domain_type_is_vm_or_si(dmar_domain))
4407 domain_remove_one_dev_info(old_domain, dev);
4408 else
4409 domain_remove_dev_info(old_domain);
4410
4411 if (!domain_type_is_vm_or_si(old_domain) &&
4412 list_empty(&old_domain->devices))
4413 domain_exit(old_domain);
4414 }
4415 }
4416
4417 iommu = device_to_iommu(dev, &bus, &devfn);
4418 if (!iommu)
4419 return -ENODEV;
4420
4421 /* check if this iommu agaw is sufficient for max mapped address */
4422 addr_width = agaw_to_width(iommu->agaw);
4423 if (addr_width > cap_mgaw(iommu->cap))
4424 addr_width = cap_mgaw(iommu->cap);
4425
4426 if (dmar_domain->max_addr > (1LL << addr_width)) {
4427 printk(KERN_ERR "%s: iommu width (%d) is not "
4428 "sufficient for the mapped address (%llx)\n",
4429 __func__, addr_width, dmar_domain->max_addr);
4430 return -EFAULT;
4431 }
4432 dmar_domain->gaw = addr_width;
4433
4434 /*
4435 * Knock out extra levels of page tables if necessary
4436 */
4437 while (iommu->agaw < dmar_domain->agaw) {
4438 struct dma_pte *pte;
4439
4440 pte = dmar_domain->pgd;
4441 if (dma_pte_present(pte)) {
4442 dmar_domain->pgd = (struct dma_pte *)
4443 phys_to_virt(dma_pte_addr(pte));
4444 free_pgtable_page(pte);
4445 }
4446 dmar_domain->agaw--;
4447 }
4448
4449 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
4450 }
4451
4452 static void intel_iommu_detach_device(struct iommu_domain *domain,
4453 struct device *dev)
4454 {
4455 domain_remove_one_dev_info(to_dmar_domain(domain), dev);
4456 }
4457
4458 static int intel_iommu_map(struct iommu_domain *domain,
4459 unsigned long iova, phys_addr_t hpa,
4460 size_t size, int iommu_prot)
4461 {
4462 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4463 u64 max_addr;
4464 int prot = 0;
4465 int ret;
4466
4467 if (iommu_prot & IOMMU_READ)
4468 prot |= DMA_PTE_READ;
4469 if (iommu_prot & IOMMU_WRITE)
4470 prot |= DMA_PTE_WRITE;
4471 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4472 prot |= DMA_PTE_SNP;
4473
4474 max_addr = iova + size;
4475 if (dmar_domain->max_addr < max_addr) {
4476 u64 end;
4477
4478 /* check if minimum agaw is sufficient for mapped address */
4479 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
4480 if (end < max_addr) {
4481 printk(KERN_ERR "%s: iommu width (%d) is not "
4482 "sufficient for the mapped address (%llx)\n",
4483 __func__, dmar_domain->gaw, max_addr);
4484 return -EFAULT;
4485 }
4486 dmar_domain->max_addr = max_addr;
4487 }
4488 /* Round up size to next multiple of PAGE_SIZE, if it and
4489 the low bits of hpa would take us onto the next page */
4490 size = aligned_nrpages(hpa, size);
4491 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4492 hpa >> VTD_PAGE_SHIFT, size, prot);
4493 return ret;
4494 }
4495
4496 static size_t intel_iommu_unmap(struct iommu_domain *domain,
4497 unsigned long iova, size_t size)
4498 {
4499 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4500 struct page *freelist = NULL;
4501 struct intel_iommu *iommu;
4502 unsigned long start_pfn, last_pfn;
4503 unsigned int npages;
4504 int iommu_id, num, ndomains, level = 0;
4505
4506 /* Cope with horrid API which requires us to unmap more than the
4507 size argument if it happens to be a large-page mapping. */
4508 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4509 BUG();
4510
4511 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4512 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4513
4514 start_pfn = iova >> VTD_PAGE_SHIFT;
4515 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4516
4517 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4518
4519 npages = last_pfn - start_pfn + 1;
4520
4521 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4522 iommu = g_iommus[iommu_id];
4523
4524 /*
4525 * find bit position of dmar_domain
4526 */
4527 ndomains = cap_ndoms(iommu->cap);
4528 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4529 if (iommu->domains[num] == dmar_domain)
4530 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4531 npages, !freelist, 0);
4532 }
4533
4534 }
4535
4536 dma_free_pagelist(freelist);
4537
4538 if (dmar_domain->max_addr == iova + size)
4539 dmar_domain->max_addr = iova;
4540
4541 return size;
4542 }
4543
4544 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
4545 dma_addr_t iova)
4546 {
4547 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4548 struct dma_pte *pte;
4549 int level = 0;
4550 u64 phys = 0;
4551
4552 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
4553 if (pte)
4554 phys = dma_pte_addr(pte);
4555
4556 return phys;
4557 }
4558
4559 static bool intel_iommu_capable(enum iommu_cap cap)
4560 {
4561 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4562 return domain_update_iommu_snooping(NULL) == 1;
4563 if (cap == IOMMU_CAP_INTR_REMAP)
4564 return irq_remapping_enabled == 1;
4565
4566 return false;
4567 }
4568
4569 static int intel_iommu_add_device(struct device *dev)
4570 {
4571 struct intel_iommu *iommu;
4572 struct iommu_group *group;
4573 u8 bus, devfn;
4574
4575 iommu = device_to_iommu(dev, &bus, &devfn);
4576 if (!iommu)
4577 return -ENODEV;
4578
4579 iommu_device_link(iommu->iommu_dev, dev);
4580
4581 group = iommu_group_get_for_dev(dev);
4582
4583 if (IS_ERR(group))
4584 return PTR_ERR(group);
4585
4586 iommu_group_put(group);
4587 return 0;
4588 }
4589
4590 static void intel_iommu_remove_device(struct device *dev)
4591 {
4592 struct intel_iommu *iommu;
4593 u8 bus, devfn;
4594
4595 iommu = device_to_iommu(dev, &bus, &devfn);
4596 if (!iommu)
4597 return;
4598
4599 iommu_group_remove_device(dev);
4600
4601 iommu_device_unlink(iommu->iommu_dev, dev);
4602 }
4603
4604 static const struct iommu_ops intel_iommu_ops = {
4605 .capable = intel_iommu_capable,
4606 .domain_alloc = intel_iommu_domain_alloc,
4607 .domain_free = intel_iommu_domain_free,
4608 .attach_dev = intel_iommu_attach_device,
4609 .detach_dev = intel_iommu_detach_device,
4610 .map = intel_iommu_map,
4611 .unmap = intel_iommu_unmap,
4612 .map_sg = default_iommu_map_sg,
4613 .iova_to_phys = intel_iommu_iova_to_phys,
4614 .add_device = intel_iommu_add_device,
4615 .remove_device = intel_iommu_remove_device,
4616 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
4617 };
4618
4619 static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4620 {
4621 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4622 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4623 dmar_map_gfx = 0;
4624 }
4625
4626 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4627 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4628 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4629 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4630 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4631 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4632 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4633
4634 static void quirk_iommu_rwbf(struct pci_dev *dev)
4635 {
4636 /*
4637 * Mobile 4 Series Chipset neglects to set RWBF capability,
4638 * but needs it. Same seems to hold for the desktop versions.
4639 */
4640 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4641 rwbf_quirk = 1;
4642 }
4643
4644 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
4645 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4646 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4647 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4648 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4649 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4650 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
4651
4652 #define GGC 0x52
4653 #define GGC_MEMORY_SIZE_MASK (0xf << 8)
4654 #define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4655 #define GGC_MEMORY_SIZE_1M (0x1 << 8)
4656 #define GGC_MEMORY_SIZE_2M (0x3 << 8)
4657 #define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4658 #define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4659 #define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4660 #define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4661
4662 static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
4663 {
4664 unsigned short ggc;
4665
4666 if (pci_read_config_word(dev, GGC, &ggc))
4667 return;
4668
4669 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
4670 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4671 dmar_map_gfx = 0;
4672 } else if (dmar_map_gfx) {
4673 /* we have to ensure the gfx device is idle before we flush */
4674 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4675 intel_iommu_strict = 1;
4676 }
4677 }
4678 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4679 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4680 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4681 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4682
4683 /* On Tylersburg chipsets, some BIOSes have been known to enable the
4684 ISOCH DMAR unit for the Azalia sound device, but not give it any
4685 TLB entries, which causes it to deadlock. Check for that. We do
4686 this in a function called from init_dmars(), instead of in a PCI
4687 quirk, because we don't want to print the obnoxious "BIOS broken"
4688 message if VT-d is actually disabled.
4689 */
4690 static void __init check_tylersburg_isoch(void)
4691 {
4692 struct pci_dev *pdev;
4693 uint32_t vtisochctrl;
4694
4695 /* If there's no Azalia in the system anyway, forget it. */
4696 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4697 if (!pdev)
4698 return;
4699 pci_dev_put(pdev);
4700
4701 /* System Management Registers. Might be hidden, in which case
4702 we can't do the sanity check. But that's OK, because the
4703 known-broken BIOSes _don't_ actually hide it, so far. */
4704 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4705 if (!pdev)
4706 return;
4707
4708 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4709 pci_dev_put(pdev);
4710 return;
4711 }
4712
4713 pci_dev_put(pdev);
4714
4715 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4716 if (vtisochctrl & 1)
4717 return;
4718
4719 /* Drop all bits other than the number of TLB entries */
4720 vtisochctrl &= 0x1c;
4721
4722 /* If we have the recommended number of TLB entries (16), fine. */
4723 if (vtisochctrl == 0x10)
4724 return;
4725
4726 /* Zero TLB entries? You get to ride the short bus to school. */
4727 if (!vtisochctrl) {
4728 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4729 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4730 dmi_get_system_info(DMI_BIOS_VENDOR),
4731 dmi_get_system_info(DMI_BIOS_VERSION),
4732 dmi_get_system_info(DMI_PRODUCT_VERSION));
4733 iommu_identity_mapping |= IDENTMAP_AZALIA;
4734 return;
4735 }
4736
4737 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4738 vtisochctrl);
4739 }
This page took 0.169263 seconds and 5 git commands to generate.