Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[deliverable/linux.git] / drivers / pci / intel-iommu.c
1 /*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * Author: Fenghua Yu <fenghua.yu@intel.com>
22 */
23
24 #include <linux/init.h>
25 #include <linux/bitmap.h>
26 #include <linux/debugfs.h>
27 #include <linux/slab.h>
28 #include <linux/irq.h>
29 #include <linux/interrupt.h>
30 #include <linux/spinlock.h>
31 #include <linux/pci.h>
32 #include <linux/dmar.h>
33 #include <linux/dma-mapping.h>
34 #include <linux/mempool.h>
35 #include <linux/timer.h>
36 #include <linux/iova.h>
37 #include <linux/iommu.h>
38 #include <linux/intel-iommu.h>
39 #include <asm/cacheflush.h>
40 #include <asm/iommu.h>
41 #include "pci.h"
42
43 #define ROOT_SIZE VTD_PAGE_SIZE
44 #define CONTEXT_SIZE VTD_PAGE_SIZE
45
46 #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47 #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48
49 #define IOAPIC_RANGE_START (0xfee00000)
50 #define IOAPIC_RANGE_END (0xfeefffff)
51 #define IOVA_START_ADDR (0x1000)
52
53 #define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54
55 #define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56
57 #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58 #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59 #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
60
61 /* global iommu list, set NULL for ignored DMAR units */
62 static struct intel_iommu **g_iommus;
63
64 /*
65 * 0: Present
66 * 1-11: Reserved
67 * 12-63: Context Ptr (12 - (haw-1))
68 * 64-127: Reserved
69 */
70 struct root_entry {
71 u64 val;
72 u64 rsvd1;
73 };
74 #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
75 static inline bool root_present(struct root_entry *root)
76 {
77 return (root->val & 1);
78 }
79 static inline void set_root_present(struct root_entry *root)
80 {
81 root->val |= 1;
82 }
83 static inline void set_root_value(struct root_entry *root, unsigned long value)
84 {
85 root->val |= value & VTD_PAGE_MASK;
86 }
87
88 static inline struct context_entry *
89 get_context_addr_from_root(struct root_entry *root)
90 {
91 return (struct context_entry *)
92 (root_present(root)?phys_to_virt(
93 root->val & VTD_PAGE_MASK) :
94 NULL);
95 }
96
97 /*
98 * low 64 bits:
99 * 0: present
100 * 1: fault processing disable
101 * 2-3: translation type
102 * 12-63: address space root
103 * high 64 bits:
104 * 0-2: address width
105 * 3-6: aval
106 * 8-23: domain id
107 */
108 struct context_entry {
109 u64 lo;
110 u64 hi;
111 };
112
113 static inline bool context_present(struct context_entry *context)
114 {
115 return (context->lo & 1);
116 }
117 static inline void context_set_present(struct context_entry *context)
118 {
119 context->lo |= 1;
120 }
121
122 static inline void context_set_fault_enable(struct context_entry *context)
123 {
124 context->lo &= (((u64)-1) << 2) | 1;
125 }
126
127 #define CONTEXT_TT_MULTI_LEVEL 0
128
129 static inline void context_set_translation_type(struct context_entry *context,
130 unsigned long value)
131 {
132 context->lo &= (((u64)-1) << 4) | 3;
133 context->lo |= (value & 3) << 2;
134 }
135
136 static inline void context_set_address_root(struct context_entry *context,
137 unsigned long value)
138 {
139 context->lo |= value & VTD_PAGE_MASK;
140 }
141
142 static inline void context_set_address_width(struct context_entry *context,
143 unsigned long value)
144 {
145 context->hi |= value & 7;
146 }
147
148 static inline void context_set_domain_id(struct context_entry *context,
149 unsigned long value)
150 {
151 context->hi |= (value & ((1 << 16) - 1)) << 8;
152 }
153
154 static inline void context_clear_entry(struct context_entry *context)
155 {
156 context->lo = 0;
157 context->hi = 0;
158 }
159
160 /*
161 * 0: readable
162 * 1: writable
163 * 2-6: reserved
164 * 7: super page
165 * 8-11: available
166 * 12-63: Host physcial address
167 */
168 struct dma_pte {
169 u64 val;
170 };
171
172 static inline void dma_clear_pte(struct dma_pte *pte)
173 {
174 pte->val = 0;
175 }
176
177 static inline void dma_set_pte_readable(struct dma_pte *pte)
178 {
179 pte->val |= DMA_PTE_READ;
180 }
181
182 static inline void dma_set_pte_writable(struct dma_pte *pte)
183 {
184 pte->val |= DMA_PTE_WRITE;
185 }
186
187 static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
188 {
189 pte->val = (pte->val & ~3) | (prot & 3);
190 }
191
192 static inline u64 dma_pte_addr(struct dma_pte *pte)
193 {
194 return (pte->val & VTD_PAGE_MASK);
195 }
196
197 static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
198 {
199 pte->val |= (addr & VTD_PAGE_MASK);
200 }
201
202 static inline bool dma_pte_present(struct dma_pte *pte)
203 {
204 return (pte->val & 3) != 0;
205 }
206
207 /* devices under the same p2p bridge are owned in one domain */
208 #define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
209
210 /* domain represents a virtual machine, more than one devices
211 * across iommus may be owned in one domain, e.g. kvm guest.
212 */
213 #define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
214
215 struct dmar_domain {
216 int id; /* domain id */
217 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
218
219 struct list_head devices; /* all devices' list */
220 struct iova_domain iovad; /* iova's that belong to this domain */
221
222 struct dma_pte *pgd; /* virtual address */
223 spinlock_t mapping_lock; /* page table lock */
224 int gaw; /* max guest address width */
225
226 /* adjusted guest address width, 0 is level 2 30-bit */
227 int agaw;
228
229 int flags; /* flags to find out type of domain */
230
231 int iommu_coherency;/* indicate coherency of iommu access */
232 int iommu_count; /* reference count of iommu */
233 spinlock_t iommu_lock; /* protect iommu set in domain */
234 u64 max_addr; /* maximum mapped address */
235 };
236
237 /* PCI domain-device relationship */
238 struct device_domain_info {
239 struct list_head link; /* link to domain siblings */
240 struct list_head global; /* link to global list */
241 u8 bus; /* PCI bus numer */
242 u8 devfn; /* PCI devfn number */
243 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
244 struct dmar_domain *domain; /* pointer to domain */
245 };
246
247 static void flush_unmaps_timeout(unsigned long data);
248
249 DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
250
251 #define HIGH_WATER_MARK 250
252 struct deferred_flush_tables {
253 int next;
254 struct iova *iova[HIGH_WATER_MARK];
255 struct dmar_domain *domain[HIGH_WATER_MARK];
256 };
257
258 static struct deferred_flush_tables *deferred_flush;
259
260 /* bitmap for indexing intel_iommus */
261 static int g_num_of_iommus;
262
263 static DEFINE_SPINLOCK(async_umap_flush_lock);
264 static LIST_HEAD(unmaps_to_do);
265
266 static int timer_on;
267 static long list_size;
268
269 static void domain_remove_dev_info(struct dmar_domain *domain);
270
271 #ifdef CONFIG_DMAR_DEFAULT_ON
272 int dmar_disabled = 0;
273 #else
274 int dmar_disabled = 1;
275 #endif /*CONFIG_DMAR_DEFAULT_ON*/
276
277 static int __initdata dmar_map_gfx = 1;
278 static int dmar_forcedac;
279 static int intel_iommu_strict;
280
281 #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
282 static DEFINE_SPINLOCK(device_domain_lock);
283 static LIST_HEAD(device_domain_list);
284
285 static struct iommu_ops intel_iommu_ops;
286
287 static int __init intel_iommu_setup(char *str)
288 {
289 if (!str)
290 return -EINVAL;
291 while (*str) {
292 if (!strncmp(str, "on", 2)) {
293 dmar_disabled = 0;
294 printk(KERN_INFO "Intel-IOMMU: enabled\n");
295 } else if (!strncmp(str, "off", 3)) {
296 dmar_disabled = 1;
297 printk(KERN_INFO "Intel-IOMMU: disabled\n");
298 } else if (!strncmp(str, "igfx_off", 8)) {
299 dmar_map_gfx = 0;
300 printk(KERN_INFO
301 "Intel-IOMMU: disable GFX device mapping\n");
302 } else if (!strncmp(str, "forcedac", 8)) {
303 printk(KERN_INFO
304 "Intel-IOMMU: Forcing DAC for PCI devices\n");
305 dmar_forcedac = 1;
306 } else if (!strncmp(str, "strict", 6)) {
307 printk(KERN_INFO
308 "Intel-IOMMU: disable batched IOTLB flush\n");
309 intel_iommu_strict = 1;
310 }
311
312 str += strcspn(str, ",");
313 while (*str == ',')
314 str++;
315 }
316 return 0;
317 }
318 __setup("intel_iommu=", intel_iommu_setup);
319
320 static struct kmem_cache *iommu_domain_cache;
321 static struct kmem_cache *iommu_devinfo_cache;
322 static struct kmem_cache *iommu_iova_cache;
323
324 static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
325 {
326 unsigned int flags;
327 void *vaddr;
328
329 /* trying to avoid low memory issues */
330 flags = current->flags & PF_MEMALLOC;
331 current->flags |= PF_MEMALLOC;
332 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
333 current->flags &= (~PF_MEMALLOC | flags);
334 return vaddr;
335 }
336
337
338 static inline void *alloc_pgtable_page(void)
339 {
340 unsigned int flags;
341 void *vaddr;
342
343 /* trying to avoid low memory issues */
344 flags = current->flags & PF_MEMALLOC;
345 current->flags |= PF_MEMALLOC;
346 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
347 current->flags &= (~PF_MEMALLOC | flags);
348 return vaddr;
349 }
350
351 static inline void free_pgtable_page(void *vaddr)
352 {
353 free_page((unsigned long)vaddr);
354 }
355
356 static inline void *alloc_domain_mem(void)
357 {
358 return iommu_kmem_cache_alloc(iommu_domain_cache);
359 }
360
361 static void free_domain_mem(void *vaddr)
362 {
363 kmem_cache_free(iommu_domain_cache, vaddr);
364 }
365
366 static inline void * alloc_devinfo_mem(void)
367 {
368 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
369 }
370
371 static inline void free_devinfo_mem(void *vaddr)
372 {
373 kmem_cache_free(iommu_devinfo_cache, vaddr);
374 }
375
376 struct iova *alloc_iova_mem(void)
377 {
378 return iommu_kmem_cache_alloc(iommu_iova_cache);
379 }
380
381 void free_iova_mem(struct iova *iova)
382 {
383 kmem_cache_free(iommu_iova_cache, iova);
384 }
385
386
387 static inline int width_to_agaw(int width);
388
389 /* calculate agaw for each iommu.
390 * "SAGAW" may be different across iommus, use a default agaw, and
391 * get a supported less agaw for iommus that don't support the default agaw.
392 */
393 int iommu_calculate_agaw(struct intel_iommu *iommu)
394 {
395 unsigned long sagaw;
396 int agaw = -1;
397
398 sagaw = cap_sagaw(iommu->cap);
399 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
400 agaw >= 0; agaw--) {
401 if (test_bit(agaw, &sagaw))
402 break;
403 }
404
405 return agaw;
406 }
407
408 /* in native case, each domain is related to only one iommu */
409 static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
410 {
411 int iommu_id;
412
413 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
414
415 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
416 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
417 return NULL;
418
419 return g_iommus[iommu_id];
420 }
421
422 /* "Coherency" capability may be different across iommus */
423 static void domain_update_iommu_coherency(struct dmar_domain *domain)
424 {
425 int i;
426
427 domain->iommu_coherency = 1;
428
429 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
430 for (; i < g_num_of_iommus; ) {
431 if (!ecap_coherent(g_iommus[i]->ecap)) {
432 domain->iommu_coherency = 0;
433 break;
434 }
435 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
436 }
437 }
438
439 static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
440 {
441 struct dmar_drhd_unit *drhd = NULL;
442 int i;
443
444 for_each_drhd_unit(drhd) {
445 if (drhd->ignored)
446 continue;
447
448 for (i = 0; i < drhd->devices_cnt; i++)
449 if (drhd->devices[i] &&
450 drhd->devices[i]->bus->number == bus &&
451 drhd->devices[i]->devfn == devfn)
452 return drhd->iommu;
453
454 if (drhd->include_all)
455 return drhd->iommu;
456 }
457
458 return NULL;
459 }
460
461 static void domain_flush_cache(struct dmar_domain *domain,
462 void *addr, int size)
463 {
464 if (!domain->iommu_coherency)
465 clflush_cache_range(addr, size);
466 }
467
468 /* Gets context entry for a given bus and devfn */
469 static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
470 u8 bus, u8 devfn)
471 {
472 struct root_entry *root;
473 struct context_entry *context;
474 unsigned long phy_addr;
475 unsigned long flags;
476
477 spin_lock_irqsave(&iommu->lock, flags);
478 root = &iommu->root_entry[bus];
479 context = get_context_addr_from_root(root);
480 if (!context) {
481 context = (struct context_entry *)alloc_pgtable_page();
482 if (!context) {
483 spin_unlock_irqrestore(&iommu->lock, flags);
484 return NULL;
485 }
486 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
487 phy_addr = virt_to_phys((void *)context);
488 set_root_value(root, phy_addr);
489 set_root_present(root);
490 __iommu_flush_cache(iommu, root, sizeof(*root));
491 }
492 spin_unlock_irqrestore(&iommu->lock, flags);
493 return &context[devfn];
494 }
495
496 static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
497 {
498 struct root_entry *root;
499 struct context_entry *context;
500 int ret;
501 unsigned long flags;
502
503 spin_lock_irqsave(&iommu->lock, flags);
504 root = &iommu->root_entry[bus];
505 context = get_context_addr_from_root(root);
506 if (!context) {
507 ret = 0;
508 goto out;
509 }
510 ret = context_present(&context[devfn]);
511 out:
512 spin_unlock_irqrestore(&iommu->lock, flags);
513 return ret;
514 }
515
516 static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
517 {
518 struct root_entry *root;
519 struct context_entry *context;
520 unsigned long flags;
521
522 spin_lock_irqsave(&iommu->lock, flags);
523 root = &iommu->root_entry[bus];
524 context = get_context_addr_from_root(root);
525 if (context) {
526 context_clear_entry(&context[devfn]);
527 __iommu_flush_cache(iommu, &context[devfn], \
528 sizeof(*context));
529 }
530 spin_unlock_irqrestore(&iommu->lock, flags);
531 }
532
533 static void free_context_table(struct intel_iommu *iommu)
534 {
535 struct root_entry *root;
536 int i;
537 unsigned long flags;
538 struct context_entry *context;
539
540 spin_lock_irqsave(&iommu->lock, flags);
541 if (!iommu->root_entry) {
542 goto out;
543 }
544 for (i = 0; i < ROOT_ENTRY_NR; i++) {
545 root = &iommu->root_entry[i];
546 context = get_context_addr_from_root(root);
547 if (context)
548 free_pgtable_page(context);
549 }
550 free_pgtable_page(iommu->root_entry);
551 iommu->root_entry = NULL;
552 out:
553 spin_unlock_irqrestore(&iommu->lock, flags);
554 }
555
556 /* page table handling */
557 #define LEVEL_STRIDE (9)
558 #define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
559
560 static inline int agaw_to_level(int agaw)
561 {
562 return agaw + 2;
563 }
564
565 static inline int agaw_to_width(int agaw)
566 {
567 return 30 + agaw * LEVEL_STRIDE;
568
569 }
570
571 static inline int width_to_agaw(int width)
572 {
573 return (width - 30) / LEVEL_STRIDE;
574 }
575
576 static inline unsigned int level_to_offset_bits(int level)
577 {
578 return (12 + (level - 1) * LEVEL_STRIDE);
579 }
580
581 static inline int address_level_offset(u64 addr, int level)
582 {
583 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
584 }
585
586 static inline u64 level_mask(int level)
587 {
588 return ((u64)-1 << level_to_offset_bits(level));
589 }
590
591 static inline u64 level_size(int level)
592 {
593 return ((u64)1 << level_to_offset_bits(level));
594 }
595
596 static inline u64 align_to_level(u64 addr, int level)
597 {
598 return ((addr + level_size(level) - 1) & level_mask(level));
599 }
600
601 static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
602 {
603 int addr_width = agaw_to_width(domain->agaw);
604 struct dma_pte *parent, *pte = NULL;
605 int level = agaw_to_level(domain->agaw);
606 int offset;
607 unsigned long flags;
608
609 BUG_ON(!domain->pgd);
610
611 addr &= (((u64)1) << addr_width) - 1;
612 parent = domain->pgd;
613
614 spin_lock_irqsave(&domain->mapping_lock, flags);
615 while (level > 0) {
616 void *tmp_page;
617
618 offset = address_level_offset(addr, level);
619 pte = &parent[offset];
620 if (level == 1)
621 break;
622
623 if (!dma_pte_present(pte)) {
624 tmp_page = alloc_pgtable_page();
625
626 if (!tmp_page) {
627 spin_unlock_irqrestore(&domain->mapping_lock,
628 flags);
629 return NULL;
630 }
631 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
632 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
633 /*
634 * high level table always sets r/w, last level page
635 * table control read/write
636 */
637 dma_set_pte_readable(pte);
638 dma_set_pte_writable(pte);
639 domain_flush_cache(domain, pte, sizeof(*pte));
640 }
641 parent = phys_to_virt(dma_pte_addr(pte));
642 level--;
643 }
644
645 spin_unlock_irqrestore(&domain->mapping_lock, flags);
646 return pte;
647 }
648
649 /* return address's pte at specific level */
650 static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
651 int level)
652 {
653 struct dma_pte *parent, *pte = NULL;
654 int total = agaw_to_level(domain->agaw);
655 int offset;
656
657 parent = domain->pgd;
658 while (level <= total) {
659 offset = address_level_offset(addr, total);
660 pte = &parent[offset];
661 if (level == total)
662 return pte;
663
664 if (!dma_pte_present(pte))
665 break;
666 parent = phys_to_virt(dma_pte_addr(pte));
667 total--;
668 }
669 return NULL;
670 }
671
672 /* clear one page's page table */
673 static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
674 {
675 struct dma_pte *pte = NULL;
676
677 /* get last level pte */
678 pte = dma_addr_level_pte(domain, addr, 1);
679
680 if (pte) {
681 dma_clear_pte(pte);
682 domain_flush_cache(domain, pte, sizeof(*pte));
683 }
684 }
685
686 /* clear last level pte, a tlb flush should be followed */
687 static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
688 {
689 int addr_width = agaw_to_width(domain->agaw);
690
691 start &= (((u64)1) << addr_width) - 1;
692 end &= (((u64)1) << addr_width) - 1;
693 /* in case it's partial page */
694 start = PAGE_ALIGN(start);
695 end &= PAGE_MASK;
696
697 /* we don't need lock here, nobody else touches the iova range */
698 while (start < end) {
699 dma_pte_clear_one(domain, start);
700 start += VTD_PAGE_SIZE;
701 }
702 }
703
704 /* free page table pages. last level pte should already be cleared */
705 static void dma_pte_free_pagetable(struct dmar_domain *domain,
706 u64 start, u64 end)
707 {
708 int addr_width = agaw_to_width(domain->agaw);
709 struct dma_pte *pte;
710 int total = agaw_to_level(domain->agaw);
711 int level;
712 u64 tmp;
713
714 start &= (((u64)1) << addr_width) - 1;
715 end &= (((u64)1) << addr_width) - 1;
716
717 /* we don't need lock here, nobody else touches the iova range */
718 level = 2;
719 while (level <= total) {
720 tmp = align_to_level(start, level);
721 if (tmp >= end || (tmp + level_size(level) > end))
722 return;
723
724 while (tmp < end) {
725 pte = dma_addr_level_pte(domain, tmp, level);
726 if (pte) {
727 free_pgtable_page(
728 phys_to_virt(dma_pte_addr(pte)));
729 dma_clear_pte(pte);
730 domain_flush_cache(domain, pte, sizeof(*pte));
731 }
732 tmp += level_size(level);
733 }
734 level++;
735 }
736 /* free pgd */
737 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
738 free_pgtable_page(domain->pgd);
739 domain->pgd = NULL;
740 }
741 }
742
743 /* iommu handling */
744 static int iommu_alloc_root_entry(struct intel_iommu *iommu)
745 {
746 struct root_entry *root;
747 unsigned long flags;
748
749 root = (struct root_entry *)alloc_pgtable_page();
750 if (!root)
751 return -ENOMEM;
752
753 __iommu_flush_cache(iommu, root, ROOT_SIZE);
754
755 spin_lock_irqsave(&iommu->lock, flags);
756 iommu->root_entry = root;
757 spin_unlock_irqrestore(&iommu->lock, flags);
758
759 return 0;
760 }
761
762 static void iommu_set_root_entry(struct intel_iommu *iommu)
763 {
764 void *addr;
765 u32 cmd, sts;
766 unsigned long flag;
767
768 addr = iommu->root_entry;
769
770 spin_lock_irqsave(&iommu->register_lock, flag);
771 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
772
773 cmd = iommu->gcmd | DMA_GCMD_SRTP;
774 writel(cmd, iommu->reg + DMAR_GCMD_REG);
775
776 /* Make sure hardware complete it */
777 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
778 readl, (sts & DMA_GSTS_RTPS), sts);
779
780 spin_unlock_irqrestore(&iommu->register_lock, flag);
781 }
782
783 static void iommu_flush_write_buffer(struct intel_iommu *iommu)
784 {
785 u32 val;
786 unsigned long flag;
787
788 if (!cap_rwbf(iommu->cap))
789 return;
790 val = iommu->gcmd | DMA_GCMD_WBF;
791
792 spin_lock_irqsave(&iommu->register_lock, flag);
793 writel(val, iommu->reg + DMAR_GCMD_REG);
794
795 /* Make sure hardware complete it */
796 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
797 readl, (!(val & DMA_GSTS_WBFS)), val);
798
799 spin_unlock_irqrestore(&iommu->register_lock, flag);
800 }
801
802 /* return value determine if we need a write buffer flush */
803 static int __iommu_flush_context(struct intel_iommu *iommu,
804 u16 did, u16 source_id, u8 function_mask, u64 type,
805 int non_present_entry_flush)
806 {
807 u64 val = 0;
808 unsigned long flag;
809
810 /*
811 * In the non-present entry flush case, if hardware doesn't cache
812 * non-present entry we do nothing and if hardware cache non-present
813 * entry, we flush entries of domain 0 (the domain id is used to cache
814 * any non-present entries)
815 */
816 if (non_present_entry_flush) {
817 if (!cap_caching_mode(iommu->cap))
818 return 1;
819 else
820 did = 0;
821 }
822
823 switch (type) {
824 case DMA_CCMD_GLOBAL_INVL:
825 val = DMA_CCMD_GLOBAL_INVL;
826 break;
827 case DMA_CCMD_DOMAIN_INVL:
828 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
829 break;
830 case DMA_CCMD_DEVICE_INVL:
831 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
832 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
833 break;
834 default:
835 BUG();
836 }
837 val |= DMA_CCMD_ICC;
838
839 spin_lock_irqsave(&iommu->register_lock, flag);
840 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
841
842 /* Make sure hardware complete it */
843 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
844 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
845
846 spin_unlock_irqrestore(&iommu->register_lock, flag);
847
848 /* flush context entry will implicitly flush write buffer */
849 return 0;
850 }
851
852 /* return value determine if we need a write buffer flush */
853 static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
854 u64 addr, unsigned int size_order, u64 type,
855 int non_present_entry_flush)
856 {
857 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
858 u64 val = 0, val_iva = 0;
859 unsigned long flag;
860
861 /*
862 * In the non-present entry flush case, if hardware doesn't cache
863 * non-present entry we do nothing and if hardware cache non-present
864 * entry, we flush entries of domain 0 (the domain id is used to cache
865 * any non-present entries)
866 */
867 if (non_present_entry_flush) {
868 if (!cap_caching_mode(iommu->cap))
869 return 1;
870 else
871 did = 0;
872 }
873
874 switch (type) {
875 case DMA_TLB_GLOBAL_FLUSH:
876 /* global flush doesn't need set IVA_REG */
877 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
878 break;
879 case DMA_TLB_DSI_FLUSH:
880 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
881 break;
882 case DMA_TLB_PSI_FLUSH:
883 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
884 /* Note: always flush non-leaf currently */
885 val_iva = size_order | addr;
886 break;
887 default:
888 BUG();
889 }
890 /* Note: set drain read/write */
891 #if 0
892 /*
893 * This is probably to be super secure.. Looks like we can
894 * ignore it without any impact.
895 */
896 if (cap_read_drain(iommu->cap))
897 val |= DMA_TLB_READ_DRAIN;
898 #endif
899 if (cap_write_drain(iommu->cap))
900 val |= DMA_TLB_WRITE_DRAIN;
901
902 spin_lock_irqsave(&iommu->register_lock, flag);
903 /* Note: Only uses first TLB reg currently */
904 if (val_iva)
905 dmar_writeq(iommu->reg + tlb_offset, val_iva);
906 dmar_writeq(iommu->reg + tlb_offset + 8, val);
907
908 /* Make sure hardware complete it */
909 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
910 dmar_readq, (!(val & DMA_TLB_IVT)), val);
911
912 spin_unlock_irqrestore(&iommu->register_lock, flag);
913
914 /* check IOTLB invalidation granularity */
915 if (DMA_TLB_IAIG(val) == 0)
916 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
917 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
918 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
919 (unsigned long long)DMA_TLB_IIRG(type),
920 (unsigned long long)DMA_TLB_IAIG(val));
921 /* flush iotlb entry will implicitly flush write buffer */
922 return 0;
923 }
924
925 static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
926 u64 addr, unsigned int pages, int non_present_entry_flush)
927 {
928 unsigned int mask;
929
930 BUG_ON(addr & (~VTD_PAGE_MASK));
931 BUG_ON(pages == 0);
932
933 /* Fallback to domain selective flush if no PSI support */
934 if (!cap_pgsel_inv(iommu->cap))
935 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
936 DMA_TLB_DSI_FLUSH,
937 non_present_entry_flush);
938
939 /*
940 * PSI requires page size to be 2 ^ x, and the base address is naturally
941 * aligned to the size
942 */
943 mask = ilog2(__roundup_pow_of_two(pages));
944 /* Fallback to domain selective flush if size is too big */
945 if (mask > cap_max_amask_val(iommu->cap))
946 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
947 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
948
949 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
950 DMA_TLB_PSI_FLUSH,
951 non_present_entry_flush);
952 }
953
954 static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
955 {
956 u32 pmen;
957 unsigned long flags;
958
959 spin_lock_irqsave(&iommu->register_lock, flags);
960 pmen = readl(iommu->reg + DMAR_PMEN_REG);
961 pmen &= ~DMA_PMEN_EPM;
962 writel(pmen, iommu->reg + DMAR_PMEN_REG);
963
964 /* wait for the protected region status bit to clear */
965 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
966 readl, !(pmen & DMA_PMEN_PRS), pmen);
967
968 spin_unlock_irqrestore(&iommu->register_lock, flags);
969 }
970
971 static int iommu_enable_translation(struct intel_iommu *iommu)
972 {
973 u32 sts;
974 unsigned long flags;
975
976 spin_lock_irqsave(&iommu->register_lock, flags);
977 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
978
979 /* Make sure hardware complete it */
980 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
981 readl, (sts & DMA_GSTS_TES), sts);
982
983 iommu->gcmd |= DMA_GCMD_TE;
984 spin_unlock_irqrestore(&iommu->register_lock, flags);
985 return 0;
986 }
987
988 static int iommu_disable_translation(struct intel_iommu *iommu)
989 {
990 u32 sts;
991 unsigned long flag;
992
993 spin_lock_irqsave(&iommu->register_lock, flag);
994 iommu->gcmd &= ~DMA_GCMD_TE;
995 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
996
997 /* Make sure hardware complete it */
998 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
999 readl, (!(sts & DMA_GSTS_TES)), sts);
1000
1001 spin_unlock_irqrestore(&iommu->register_lock, flag);
1002 return 0;
1003 }
1004
1005 /* iommu interrupt handling. Most stuff are MSI-like. */
1006
1007 static const char *fault_reason_strings[] =
1008 {
1009 "Software",
1010 "Present bit in root entry is clear",
1011 "Present bit in context entry is clear",
1012 "Invalid context entry",
1013 "Access beyond MGAW",
1014 "PTE Write access is not set",
1015 "PTE Read access is not set",
1016 "Next page table ptr is invalid",
1017 "Root table address invalid",
1018 "Context table ptr is invalid",
1019 "non-zero reserved fields in RTP",
1020 "non-zero reserved fields in CTP",
1021 "non-zero reserved fields in PTE",
1022 };
1023 #define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
1024
1025 const char *dmar_get_fault_reason(u8 fault_reason)
1026 {
1027 if (fault_reason > MAX_FAULT_REASON_IDX)
1028 return "Unknown";
1029 else
1030 return fault_reason_strings[fault_reason];
1031 }
1032
1033 void dmar_msi_unmask(unsigned int irq)
1034 {
1035 struct intel_iommu *iommu = get_irq_data(irq);
1036 unsigned long flag;
1037
1038 /* unmask it */
1039 spin_lock_irqsave(&iommu->register_lock, flag);
1040 writel(0, iommu->reg + DMAR_FECTL_REG);
1041 /* Read a reg to force flush the post write */
1042 readl(iommu->reg + DMAR_FECTL_REG);
1043 spin_unlock_irqrestore(&iommu->register_lock, flag);
1044 }
1045
1046 void dmar_msi_mask(unsigned int irq)
1047 {
1048 unsigned long flag;
1049 struct intel_iommu *iommu = get_irq_data(irq);
1050
1051 /* mask it */
1052 spin_lock_irqsave(&iommu->register_lock, flag);
1053 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1054 /* Read a reg to force flush the post write */
1055 readl(iommu->reg + DMAR_FECTL_REG);
1056 spin_unlock_irqrestore(&iommu->register_lock, flag);
1057 }
1058
1059 void dmar_msi_write(int irq, struct msi_msg *msg)
1060 {
1061 struct intel_iommu *iommu = get_irq_data(irq);
1062 unsigned long flag;
1063
1064 spin_lock_irqsave(&iommu->register_lock, flag);
1065 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1066 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1067 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1068 spin_unlock_irqrestore(&iommu->register_lock, flag);
1069 }
1070
1071 void dmar_msi_read(int irq, struct msi_msg *msg)
1072 {
1073 struct intel_iommu *iommu = get_irq_data(irq);
1074 unsigned long flag;
1075
1076 spin_lock_irqsave(&iommu->register_lock, flag);
1077 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1078 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1079 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1080 spin_unlock_irqrestore(&iommu->register_lock, flag);
1081 }
1082
1083 static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
1084 u8 fault_reason, u16 source_id, unsigned long long addr)
1085 {
1086 const char *reason;
1087
1088 reason = dmar_get_fault_reason(fault_reason);
1089
1090 printk(KERN_ERR
1091 "DMAR:[%s] Request device [%02x:%02x.%d] "
1092 "fault addr %llx \n"
1093 "DMAR:[fault reason %02d] %s\n",
1094 (type ? "DMA Read" : "DMA Write"),
1095 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1096 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1097 return 0;
1098 }
1099
1100 #define PRIMARY_FAULT_REG_LEN (16)
1101 static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1102 {
1103 struct intel_iommu *iommu = dev_id;
1104 int reg, fault_index;
1105 u32 fault_status;
1106 unsigned long flag;
1107
1108 spin_lock_irqsave(&iommu->register_lock, flag);
1109 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1110
1111 /* TBD: ignore advanced fault log currently */
1112 if (!(fault_status & DMA_FSTS_PPF))
1113 goto clear_overflow;
1114
1115 fault_index = dma_fsts_fault_record_index(fault_status);
1116 reg = cap_fault_reg_offset(iommu->cap);
1117 while (1) {
1118 u8 fault_reason;
1119 u16 source_id;
1120 u64 guest_addr;
1121 int type;
1122 u32 data;
1123
1124 /* highest 32 bits */
1125 data = readl(iommu->reg + reg +
1126 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1127 if (!(data & DMA_FRCD_F))
1128 break;
1129
1130 fault_reason = dma_frcd_fault_reason(data);
1131 type = dma_frcd_type(data);
1132
1133 data = readl(iommu->reg + reg +
1134 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1135 source_id = dma_frcd_source_id(data);
1136
1137 guest_addr = dmar_readq(iommu->reg + reg +
1138 fault_index * PRIMARY_FAULT_REG_LEN);
1139 guest_addr = dma_frcd_page_addr(guest_addr);
1140 /* clear the fault */
1141 writel(DMA_FRCD_F, iommu->reg + reg +
1142 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1143
1144 spin_unlock_irqrestore(&iommu->register_lock, flag);
1145
1146 iommu_page_fault_do_one(iommu, type, fault_reason,
1147 source_id, guest_addr);
1148
1149 fault_index++;
1150 if (fault_index > cap_num_fault_regs(iommu->cap))
1151 fault_index = 0;
1152 spin_lock_irqsave(&iommu->register_lock, flag);
1153 }
1154 clear_overflow:
1155 /* clear primary fault overflow */
1156 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1157 if (fault_status & DMA_FSTS_PFO)
1158 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1159
1160 spin_unlock_irqrestore(&iommu->register_lock, flag);
1161 return IRQ_HANDLED;
1162 }
1163
1164 int dmar_set_interrupt(struct intel_iommu *iommu)
1165 {
1166 int irq, ret;
1167
1168 irq = create_irq();
1169 if (!irq) {
1170 printk(KERN_ERR "IOMMU: no free vectors\n");
1171 return -EINVAL;
1172 }
1173
1174 set_irq_data(irq, iommu);
1175 iommu->irq = irq;
1176
1177 ret = arch_setup_dmar_msi(irq);
1178 if (ret) {
1179 set_irq_data(irq, NULL);
1180 iommu->irq = 0;
1181 destroy_irq(irq);
1182 return 0;
1183 }
1184
1185 /* Force fault register is cleared */
1186 iommu_page_fault(irq, iommu);
1187
1188 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1189 if (ret)
1190 printk(KERN_ERR "IOMMU: can't request irq\n");
1191 return ret;
1192 }
1193
1194 static int iommu_init_domains(struct intel_iommu *iommu)
1195 {
1196 unsigned long ndomains;
1197 unsigned long nlongs;
1198
1199 ndomains = cap_ndoms(iommu->cap);
1200 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1201 nlongs = BITS_TO_LONGS(ndomains);
1202
1203 /* TBD: there might be 64K domains,
1204 * consider other allocation for future chip
1205 */
1206 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1207 if (!iommu->domain_ids) {
1208 printk(KERN_ERR "Allocating domain id array failed\n");
1209 return -ENOMEM;
1210 }
1211 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1212 GFP_KERNEL);
1213 if (!iommu->domains) {
1214 printk(KERN_ERR "Allocating domain array failed\n");
1215 kfree(iommu->domain_ids);
1216 return -ENOMEM;
1217 }
1218
1219 spin_lock_init(&iommu->lock);
1220
1221 /*
1222 * if Caching mode is set, then invalid translations are tagged
1223 * with domainid 0. Hence we need to pre-allocate it.
1224 */
1225 if (cap_caching_mode(iommu->cap))
1226 set_bit(0, iommu->domain_ids);
1227 return 0;
1228 }
1229
1230
1231 static void domain_exit(struct dmar_domain *domain);
1232 static void vm_domain_exit(struct dmar_domain *domain);
1233
1234 void free_dmar_iommu(struct intel_iommu *iommu)
1235 {
1236 struct dmar_domain *domain;
1237 int i;
1238 unsigned long flags;
1239
1240 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1241 for (; i < cap_ndoms(iommu->cap); ) {
1242 domain = iommu->domains[i];
1243 clear_bit(i, iommu->domain_ids);
1244
1245 spin_lock_irqsave(&domain->iommu_lock, flags);
1246 if (--domain->iommu_count == 0) {
1247 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1248 vm_domain_exit(domain);
1249 else
1250 domain_exit(domain);
1251 }
1252 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1253
1254 i = find_next_bit(iommu->domain_ids,
1255 cap_ndoms(iommu->cap), i+1);
1256 }
1257
1258 if (iommu->gcmd & DMA_GCMD_TE)
1259 iommu_disable_translation(iommu);
1260
1261 if (iommu->irq) {
1262 set_irq_data(iommu->irq, NULL);
1263 /* This will mask the irq */
1264 free_irq(iommu->irq, iommu);
1265 destroy_irq(iommu->irq);
1266 }
1267
1268 kfree(iommu->domains);
1269 kfree(iommu->domain_ids);
1270
1271 g_iommus[iommu->seq_id] = NULL;
1272
1273 /* if all iommus are freed, free g_iommus */
1274 for (i = 0; i < g_num_of_iommus; i++) {
1275 if (g_iommus[i])
1276 break;
1277 }
1278
1279 if (i == g_num_of_iommus)
1280 kfree(g_iommus);
1281
1282 /* free context mapping */
1283 free_context_table(iommu);
1284 }
1285
1286 static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1287 {
1288 unsigned long num;
1289 unsigned long ndomains;
1290 struct dmar_domain *domain;
1291 unsigned long flags;
1292
1293 domain = alloc_domain_mem();
1294 if (!domain)
1295 return NULL;
1296
1297 ndomains = cap_ndoms(iommu->cap);
1298
1299 spin_lock_irqsave(&iommu->lock, flags);
1300 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1301 if (num >= ndomains) {
1302 spin_unlock_irqrestore(&iommu->lock, flags);
1303 free_domain_mem(domain);
1304 printk(KERN_ERR "IOMMU: no free domain ids\n");
1305 return NULL;
1306 }
1307
1308 set_bit(num, iommu->domain_ids);
1309 domain->id = num;
1310 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1311 set_bit(iommu->seq_id, &domain->iommu_bmp);
1312 domain->flags = 0;
1313 iommu->domains[num] = domain;
1314 spin_unlock_irqrestore(&iommu->lock, flags);
1315
1316 return domain;
1317 }
1318
1319 static void iommu_free_domain(struct dmar_domain *domain)
1320 {
1321 unsigned long flags;
1322 struct intel_iommu *iommu;
1323
1324 iommu = domain_get_iommu(domain);
1325
1326 spin_lock_irqsave(&iommu->lock, flags);
1327 clear_bit(domain->id, iommu->domain_ids);
1328 spin_unlock_irqrestore(&iommu->lock, flags);
1329 }
1330
1331 static struct iova_domain reserved_iova_list;
1332 static struct lock_class_key reserved_alloc_key;
1333 static struct lock_class_key reserved_rbtree_key;
1334
1335 static void dmar_init_reserved_ranges(void)
1336 {
1337 struct pci_dev *pdev = NULL;
1338 struct iova *iova;
1339 int i;
1340 u64 addr, size;
1341
1342 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
1343
1344 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1345 &reserved_alloc_key);
1346 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1347 &reserved_rbtree_key);
1348
1349 /* IOAPIC ranges shouldn't be accessed by DMA */
1350 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1351 IOVA_PFN(IOAPIC_RANGE_END));
1352 if (!iova)
1353 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1354
1355 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1356 for_each_pci_dev(pdev) {
1357 struct resource *r;
1358
1359 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1360 r = &pdev->resource[i];
1361 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1362 continue;
1363 addr = r->start;
1364 addr &= PAGE_MASK;
1365 size = r->end - addr;
1366 size = PAGE_ALIGN(size);
1367 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1368 IOVA_PFN(size + addr) - 1);
1369 if (!iova)
1370 printk(KERN_ERR "Reserve iova failed\n");
1371 }
1372 }
1373
1374 }
1375
1376 static void domain_reserve_special_ranges(struct dmar_domain *domain)
1377 {
1378 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1379 }
1380
1381 static inline int guestwidth_to_adjustwidth(int gaw)
1382 {
1383 int agaw;
1384 int r = (gaw - 12) % 9;
1385
1386 if (r == 0)
1387 agaw = gaw;
1388 else
1389 agaw = gaw + 9 - r;
1390 if (agaw > 64)
1391 agaw = 64;
1392 return agaw;
1393 }
1394
1395 static int domain_init(struct dmar_domain *domain, int guest_width)
1396 {
1397 struct intel_iommu *iommu;
1398 int adjust_width, agaw;
1399 unsigned long sagaw;
1400
1401 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
1402 spin_lock_init(&domain->mapping_lock);
1403 spin_lock_init(&domain->iommu_lock);
1404
1405 domain_reserve_special_ranges(domain);
1406
1407 /* calculate AGAW */
1408 iommu = domain_get_iommu(domain);
1409 if (guest_width > cap_mgaw(iommu->cap))
1410 guest_width = cap_mgaw(iommu->cap);
1411 domain->gaw = guest_width;
1412 adjust_width = guestwidth_to_adjustwidth(guest_width);
1413 agaw = width_to_agaw(adjust_width);
1414 sagaw = cap_sagaw(iommu->cap);
1415 if (!test_bit(agaw, &sagaw)) {
1416 /* hardware doesn't support it, choose a bigger one */
1417 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1418 agaw = find_next_bit(&sagaw, 5, agaw);
1419 if (agaw >= 5)
1420 return -ENODEV;
1421 }
1422 domain->agaw = agaw;
1423 INIT_LIST_HEAD(&domain->devices);
1424
1425 if (ecap_coherent(iommu->ecap))
1426 domain->iommu_coherency = 1;
1427 else
1428 domain->iommu_coherency = 0;
1429
1430 domain->iommu_count = 1;
1431
1432 /* always allocate the top pgd */
1433 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1434 if (!domain->pgd)
1435 return -ENOMEM;
1436 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
1437 return 0;
1438 }
1439
1440 static void domain_exit(struct dmar_domain *domain)
1441 {
1442 u64 end;
1443
1444 /* Domain 0 is reserved, so dont process it */
1445 if (!domain)
1446 return;
1447
1448 domain_remove_dev_info(domain);
1449 /* destroy iovas */
1450 put_iova_domain(&domain->iovad);
1451 end = DOMAIN_MAX_ADDR(domain->gaw);
1452 end = end & (~PAGE_MASK);
1453
1454 /* clear ptes */
1455 dma_pte_clear_range(domain, 0, end);
1456
1457 /* free page tables */
1458 dma_pte_free_pagetable(domain, 0, end);
1459
1460 iommu_free_domain(domain);
1461 free_domain_mem(domain);
1462 }
1463
1464 static int domain_context_mapping_one(struct dmar_domain *domain,
1465 u8 bus, u8 devfn)
1466 {
1467 struct context_entry *context;
1468 unsigned long flags;
1469 struct intel_iommu *iommu;
1470 struct dma_pte *pgd;
1471 unsigned long num;
1472 unsigned long ndomains;
1473 int id;
1474 int agaw;
1475
1476 pr_debug("Set context mapping for %02x:%02x.%d\n",
1477 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1478 BUG_ON(!domain->pgd);
1479
1480 iommu = device_to_iommu(bus, devfn);
1481 if (!iommu)
1482 return -ENODEV;
1483
1484 context = device_to_context_entry(iommu, bus, devfn);
1485 if (!context)
1486 return -ENOMEM;
1487 spin_lock_irqsave(&iommu->lock, flags);
1488 if (context_present(context)) {
1489 spin_unlock_irqrestore(&iommu->lock, flags);
1490 return 0;
1491 }
1492
1493 id = domain->id;
1494 pgd = domain->pgd;
1495
1496 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1497 int found = 0;
1498
1499 /* find an available domain id for this device in iommu */
1500 ndomains = cap_ndoms(iommu->cap);
1501 num = find_first_bit(iommu->domain_ids, ndomains);
1502 for (; num < ndomains; ) {
1503 if (iommu->domains[num] == domain) {
1504 id = num;
1505 found = 1;
1506 break;
1507 }
1508 num = find_next_bit(iommu->domain_ids,
1509 cap_ndoms(iommu->cap), num+1);
1510 }
1511
1512 if (found == 0) {
1513 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1514 if (num >= ndomains) {
1515 spin_unlock_irqrestore(&iommu->lock, flags);
1516 printk(KERN_ERR "IOMMU: no free domain ids\n");
1517 return -EFAULT;
1518 }
1519
1520 set_bit(num, iommu->domain_ids);
1521 iommu->domains[num] = domain;
1522 id = num;
1523 }
1524
1525 /* Skip top levels of page tables for
1526 * iommu which has less agaw than default.
1527 */
1528 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1529 pgd = phys_to_virt(dma_pte_addr(pgd));
1530 if (!dma_pte_present(pgd)) {
1531 spin_unlock_irqrestore(&iommu->lock, flags);
1532 return -ENOMEM;
1533 }
1534 }
1535 }
1536
1537 context_set_domain_id(context, id);
1538 context_set_address_width(context, iommu->agaw);
1539 context_set_address_root(context, virt_to_phys(pgd));
1540 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1541 context_set_fault_enable(context);
1542 context_set_present(context);
1543 domain_flush_cache(domain, context, sizeof(*context));
1544
1545 /* it's a non-present to present mapping */
1546 if (iommu->flush.flush_context(iommu, domain->id,
1547 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1548 DMA_CCMD_DEVICE_INVL, 1))
1549 iommu_flush_write_buffer(iommu);
1550 else
1551 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1552
1553 spin_unlock_irqrestore(&iommu->lock, flags);
1554
1555 spin_lock_irqsave(&domain->iommu_lock, flags);
1556 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1557 domain->iommu_count++;
1558 domain_update_iommu_coherency(domain);
1559 }
1560 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1561 return 0;
1562 }
1563
1564 static int
1565 domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1566 {
1567 int ret;
1568 struct pci_dev *tmp, *parent;
1569
1570 ret = domain_context_mapping_one(domain, pdev->bus->number,
1571 pdev->devfn);
1572 if (ret)
1573 return ret;
1574
1575 /* dependent device mapping */
1576 tmp = pci_find_upstream_pcie_bridge(pdev);
1577 if (!tmp)
1578 return 0;
1579 /* Secondary interface's bus number and devfn 0 */
1580 parent = pdev->bus->self;
1581 while (parent != tmp) {
1582 ret = domain_context_mapping_one(domain, parent->bus->number,
1583 parent->devfn);
1584 if (ret)
1585 return ret;
1586 parent = parent->bus->self;
1587 }
1588 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1589 return domain_context_mapping_one(domain,
1590 tmp->subordinate->number, 0);
1591 else /* this is a legacy PCI bridge */
1592 return domain_context_mapping_one(domain,
1593 tmp->bus->number, tmp->devfn);
1594 }
1595
1596 static int domain_context_mapped(struct pci_dev *pdev)
1597 {
1598 int ret;
1599 struct pci_dev *tmp, *parent;
1600 struct intel_iommu *iommu;
1601
1602 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1603 if (!iommu)
1604 return -ENODEV;
1605
1606 ret = device_context_mapped(iommu,
1607 pdev->bus->number, pdev->devfn);
1608 if (!ret)
1609 return ret;
1610 /* dependent device mapping */
1611 tmp = pci_find_upstream_pcie_bridge(pdev);
1612 if (!tmp)
1613 return ret;
1614 /* Secondary interface's bus number and devfn 0 */
1615 parent = pdev->bus->self;
1616 while (parent != tmp) {
1617 ret = device_context_mapped(iommu, parent->bus->number,
1618 parent->devfn);
1619 if (!ret)
1620 return ret;
1621 parent = parent->bus->self;
1622 }
1623 if (tmp->is_pcie)
1624 return device_context_mapped(iommu,
1625 tmp->subordinate->number, 0);
1626 else
1627 return device_context_mapped(iommu,
1628 tmp->bus->number, tmp->devfn);
1629 }
1630
1631 static int
1632 domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1633 u64 hpa, size_t size, int prot)
1634 {
1635 u64 start_pfn, end_pfn;
1636 struct dma_pte *pte;
1637 int index;
1638 int addr_width = agaw_to_width(domain->agaw);
1639
1640 hpa &= (((u64)1) << addr_width) - 1;
1641
1642 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1643 return -EINVAL;
1644 iova &= PAGE_MASK;
1645 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1646 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
1647 index = 0;
1648 while (start_pfn < end_pfn) {
1649 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
1650 if (!pte)
1651 return -ENOMEM;
1652 /* We don't need lock here, nobody else
1653 * touches the iova range
1654 */
1655 BUG_ON(dma_pte_addr(pte));
1656 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1657 dma_set_pte_prot(pte, prot);
1658 domain_flush_cache(domain, pte, sizeof(*pte));
1659 start_pfn++;
1660 index++;
1661 }
1662 return 0;
1663 }
1664
1665 static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
1666 {
1667 if (!iommu)
1668 return;
1669
1670 clear_context_table(iommu, bus, devfn);
1671 iommu->flush.flush_context(iommu, 0, 0, 0,
1672 DMA_CCMD_GLOBAL_INVL, 0);
1673 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1674 DMA_TLB_GLOBAL_FLUSH, 0);
1675 }
1676
1677 static void domain_remove_dev_info(struct dmar_domain *domain)
1678 {
1679 struct device_domain_info *info;
1680 unsigned long flags;
1681 struct intel_iommu *iommu;
1682
1683 spin_lock_irqsave(&device_domain_lock, flags);
1684 while (!list_empty(&domain->devices)) {
1685 info = list_entry(domain->devices.next,
1686 struct device_domain_info, link);
1687 list_del(&info->link);
1688 list_del(&info->global);
1689 if (info->dev)
1690 info->dev->dev.archdata.iommu = NULL;
1691 spin_unlock_irqrestore(&device_domain_lock, flags);
1692
1693 iommu = device_to_iommu(info->bus, info->devfn);
1694 iommu_detach_dev(iommu, info->bus, info->devfn);
1695 free_devinfo_mem(info);
1696
1697 spin_lock_irqsave(&device_domain_lock, flags);
1698 }
1699 spin_unlock_irqrestore(&device_domain_lock, flags);
1700 }
1701
1702 /*
1703 * find_domain
1704 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
1705 */
1706 static struct dmar_domain *
1707 find_domain(struct pci_dev *pdev)
1708 {
1709 struct device_domain_info *info;
1710
1711 /* No lock here, assumes no domain exit in normal case */
1712 info = pdev->dev.archdata.iommu;
1713 if (info)
1714 return info->domain;
1715 return NULL;
1716 }
1717
1718 /* domain is initialized */
1719 static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1720 {
1721 struct dmar_domain *domain, *found = NULL;
1722 struct intel_iommu *iommu;
1723 struct dmar_drhd_unit *drhd;
1724 struct device_domain_info *info, *tmp;
1725 struct pci_dev *dev_tmp;
1726 unsigned long flags;
1727 int bus = 0, devfn = 0;
1728
1729 domain = find_domain(pdev);
1730 if (domain)
1731 return domain;
1732
1733 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1734 if (dev_tmp) {
1735 if (dev_tmp->is_pcie) {
1736 bus = dev_tmp->subordinate->number;
1737 devfn = 0;
1738 } else {
1739 bus = dev_tmp->bus->number;
1740 devfn = dev_tmp->devfn;
1741 }
1742 spin_lock_irqsave(&device_domain_lock, flags);
1743 list_for_each_entry(info, &device_domain_list, global) {
1744 if (info->bus == bus && info->devfn == devfn) {
1745 found = info->domain;
1746 break;
1747 }
1748 }
1749 spin_unlock_irqrestore(&device_domain_lock, flags);
1750 /* pcie-pci bridge already has a domain, uses it */
1751 if (found) {
1752 domain = found;
1753 goto found_domain;
1754 }
1755 }
1756
1757 /* Allocate new domain for the device */
1758 drhd = dmar_find_matched_drhd_unit(pdev);
1759 if (!drhd) {
1760 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1761 pci_name(pdev));
1762 return NULL;
1763 }
1764 iommu = drhd->iommu;
1765
1766 domain = iommu_alloc_domain(iommu);
1767 if (!domain)
1768 goto error;
1769
1770 if (domain_init(domain, gaw)) {
1771 domain_exit(domain);
1772 goto error;
1773 }
1774
1775 /* register pcie-to-pci device */
1776 if (dev_tmp) {
1777 info = alloc_devinfo_mem();
1778 if (!info) {
1779 domain_exit(domain);
1780 goto error;
1781 }
1782 info->bus = bus;
1783 info->devfn = devfn;
1784 info->dev = NULL;
1785 info->domain = domain;
1786 /* This domain is shared by devices under p2p bridge */
1787 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
1788
1789 /* pcie-to-pci bridge already has a domain, uses it */
1790 found = NULL;
1791 spin_lock_irqsave(&device_domain_lock, flags);
1792 list_for_each_entry(tmp, &device_domain_list, global) {
1793 if (tmp->bus == bus && tmp->devfn == devfn) {
1794 found = tmp->domain;
1795 break;
1796 }
1797 }
1798 if (found) {
1799 free_devinfo_mem(info);
1800 domain_exit(domain);
1801 domain = found;
1802 } else {
1803 list_add(&info->link, &domain->devices);
1804 list_add(&info->global, &device_domain_list);
1805 }
1806 spin_unlock_irqrestore(&device_domain_lock, flags);
1807 }
1808
1809 found_domain:
1810 info = alloc_devinfo_mem();
1811 if (!info)
1812 goto error;
1813 info->bus = pdev->bus->number;
1814 info->devfn = pdev->devfn;
1815 info->dev = pdev;
1816 info->domain = domain;
1817 spin_lock_irqsave(&device_domain_lock, flags);
1818 /* somebody is fast */
1819 found = find_domain(pdev);
1820 if (found != NULL) {
1821 spin_unlock_irqrestore(&device_domain_lock, flags);
1822 if (found != domain) {
1823 domain_exit(domain);
1824 domain = found;
1825 }
1826 free_devinfo_mem(info);
1827 return domain;
1828 }
1829 list_add(&info->link, &domain->devices);
1830 list_add(&info->global, &device_domain_list);
1831 pdev->dev.archdata.iommu = info;
1832 spin_unlock_irqrestore(&device_domain_lock, flags);
1833 return domain;
1834 error:
1835 /* recheck it here, maybe others set it */
1836 return find_domain(pdev);
1837 }
1838
1839 static int iommu_prepare_identity_map(struct pci_dev *pdev,
1840 unsigned long long start,
1841 unsigned long long end)
1842 {
1843 struct dmar_domain *domain;
1844 unsigned long size;
1845 unsigned long long base;
1846 int ret;
1847
1848 printk(KERN_INFO
1849 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1850 pci_name(pdev), start, end);
1851 /* page table init */
1852 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1853 if (!domain)
1854 return -ENOMEM;
1855
1856 /* The address might not be aligned */
1857 base = start & PAGE_MASK;
1858 size = end - base;
1859 size = PAGE_ALIGN(size);
1860 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1861 IOVA_PFN(base + size) - 1)) {
1862 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1863 ret = -ENOMEM;
1864 goto error;
1865 }
1866
1867 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1868 size, base, pci_name(pdev));
1869 /*
1870 * RMRR range might have overlap with physical memory range,
1871 * clear it first
1872 */
1873 dma_pte_clear_range(domain, base, base + size);
1874
1875 ret = domain_page_mapping(domain, base, base, size,
1876 DMA_PTE_READ|DMA_PTE_WRITE);
1877 if (ret)
1878 goto error;
1879
1880 /* context entry init */
1881 ret = domain_context_mapping(domain, pdev);
1882 if (!ret)
1883 return 0;
1884 error:
1885 domain_exit(domain);
1886 return ret;
1887
1888 }
1889
1890 static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1891 struct pci_dev *pdev)
1892 {
1893 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
1894 return 0;
1895 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1896 rmrr->end_address + 1);
1897 }
1898
1899 #ifdef CONFIG_DMAR_GFX_WA
1900 struct iommu_prepare_data {
1901 struct pci_dev *pdev;
1902 int ret;
1903 };
1904
1905 static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1906 unsigned long end_pfn, void *datax)
1907 {
1908 struct iommu_prepare_data *data;
1909
1910 data = (struct iommu_prepare_data *)datax;
1911
1912 data->ret = iommu_prepare_identity_map(data->pdev,
1913 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1914 return data->ret;
1915
1916 }
1917
1918 static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1919 {
1920 int nid;
1921 struct iommu_prepare_data data;
1922
1923 data.pdev = pdev;
1924 data.ret = 0;
1925
1926 for_each_online_node(nid) {
1927 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1928 if (data.ret)
1929 return data.ret;
1930 }
1931 return data.ret;
1932 }
1933
1934 static void __init iommu_prepare_gfx_mapping(void)
1935 {
1936 struct pci_dev *pdev = NULL;
1937 int ret;
1938
1939 for_each_pci_dev(pdev) {
1940 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
1941 !IS_GFX_DEVICE(pdev))
1942 continue;
1943 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1944 pci_name(pdev));
1945 ret = iommu_prepare_with_active_regions(pdev);
1946 if (ret)
1947 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1948 }
1949 }
1950 #else /* !CONFIG_DMAR_GFX_WA */
1951 static inline void iommu_prepare_gfx_mapping(void)
1952 {
1953 return;
1954 }
1955 #endif
1956
1957 #ifdef CONFIG_DMAR_FLOPPY_WA
1958 static inline void iommu_prepare_isa(void)
1959 {
1960 struct pci_dev *pdev;
1961 int ret;
1962
1963 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1964 if (!pdev)
1965 return;
1966
1967 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1968 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1969
1970 if (ret)
1971 printk("IOMMU: Failed to create 0-64M identity map, "
1972 "floppy might not work\n");
1973
1974 }
1975 #else
1976 static inline void iommu_prepare_isa(void)
1977 {
1978 return;
1979 }
1980 #endif /* !CONFIG_DMAR_FLPY_WA */
1981
1982 static int __init init_dmars(void)
1983 {
1984 struct dmar_drhd_unit *drhd;
1985 struct dmar_rmrr_unit *rmrr;
1986 struct pci_dev *pdev;
1987 struct intel_iommu *iommu;
1988 int i, ret, unit = 0;
1989
1990 /*
1991 * for each drhd
1992 * allocate root
1993 * initialize and program root entry to not present
1994 * endfor
1995 */
1996 for_each_drhd_unit(drhd) {
1997 g_num_of_iommus++;
1998 /*
1999 * lock not needed as this is only incremented in the single
2000 * threaded kernel __init code path all other access are read
2001 * only
2002 */
2003 }
2004
2005 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2006 GFP_KERNEL);
2007 if (!g_iommus) {
2008 printk(KERN_ERR "Allocating global iommu array failed\n");
2009 ret = -ENOMEM;
2010 goto error;
2011 }
2012
2013 deferred_flush = kzalloc(g_num_of_iommus *
2014 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2015 if (!deferred_flush) {
2016 kfree(g_iommus);
2017 ret = -ENOMEM;
2018 goto error;
2019 }
2020
2021 for_each_drhd_unit(drhd) {
2022 if (drhd->ignored)
2023 continue;
2024
2025 iommu = drhd->iommu;
2026 g_iommus[iommu->seq_id] = iommu;
2027
2028 ret = iommu_init_domains(iommu);
2029 if (ret)
2030 goto error;
2031
2032 /*
2033 * TBD:
2034 * we could share the same root & context tables
2035 * amoung all IOMMU's. Need to Split it later.
2036 */
2037 ret = iommu_alloc_root_entry(iommu);
2038 if (ret) {
2039 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2040 goto error;
2041 }
2042 }
2043
2044 for_each_drhd_unit(drhd) {
2045 if (drhd->ignored)
2046 continue;
2047
2048 iommu = drhd->iommu;
2049 if (dmar_enable_qi(iommu)) {
2050 /*
2051 * Queued Invalidate not enabled, use Register Based
2052 * Invalidate
2053 */
2054 iommu->flush.flush_context = __iommu_flush_context;
2055 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2056 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
2057 "invalidation\n",
2058 (unsigned long long)drhd->reg_base_addr);
2059 } else {
2060 iommu->flush.flush_context = qi_flush_context;
2061 iommu->flush.flush_iotlb = qi_flush_iotlb;
2062 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
2063 "invalidation\n",
2064 (unsigned long long)drhd->reg_base_addr);
2065 }
2066 }
2067
2068 /*
2069 * For each rmrr
2070 * for each dev attached to rmrr
2071 * do
2072 * locate drhd for dev, alloc domain for dev
2073 * allocate free domain
2074 * allocate page table entries for rmrr
2075 * if context not allocated for bus
2076 * allocate and init context
2077 * set present in root table for this bus
2078 * init context with domain, translation etc
2079 * endfor
2080 * endfor
2081 */
2082 for_each_rmrr_units(rmrr) {
2083 for (i = 0; i < rmrr->devices_cnt; i++) {
2084 pdev = rmrr->devices[i];
2085 /* some BIOS lists non-exist devices in DMAR table */
2086 if (!pdev)
2087 continue;
2088 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2089 if (ret)
2090 printk(KERN_ERR
2091 "IOMMU: mapping reserved region failed\n");
2092 }
2093 }
2094
2095 iommu_prepare_gfx_mapping();
2096
2097 iommu_prepare_isa();
2098
2099 /*
2100 * for each drhd
2101 * enable fault log
2102 * global invalidate context cache
2103 * global invalidate iotlb
2104 * enable translation
2105 */
2106 for_each_drhd_unit(drhd) {
2107 if (drhd->ignored)
2108 continue;
2109 iommu = drhd->iommu;
2110 sprintf (iommu->name, "dmar%d", unit++);
2111
2112 iommu_flush_write_buffer(iommu);
2113
2114 ret = dmar_set_interrupt(iommu);
2115 if (ret)
2116 goto error;
2117
2118 iommu_set_root_entry(iommu);
2119
2120 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2121 0);
2122 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2123 0);
2124 iommu_disable_protect_mem_regions(iommu);
2125
2126 ret = iommu_enable_translation(iommu);
2127 if (ret)
2128 goto error;
2129 }
2130
2131 return 0;
2132 error:
2133 for_each_drhd_unit(drhd) {
2134 if (drhd->ignored)
2135 continue;
2136 iommu = drhd->iommu;
2137 free_iommu(iommu);
2138 }
2139 kfree(g_iommus);
2140 return ret;
2141 }
2142
2143 static inline u64 aligned_size(u64 host_addr, size_t size)
2144 {
2145 u64 addr;
2146 addr = (host_addr & (~PAGE_MASK)) + size;
2147 return PAGE_ALIGN(addr);
2148 }
2149
2150 struct iova *
2151 iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
2152 {
2153 struct iova *piova;
2154
2155 /* Make sure it's in range */
2156 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
2157 if (!size || (IOVA_START_ADDR + size > end))
2158 return NULL;
2159
2160 piova = alloc_iova(&domain->iovad,
2161 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
2162 return piova;
2163 }
2164
2165 static struct iova *
2166 __intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
2167 size_t size, u64 dma_mask)
2168 {
2169 struct pci_dev *pdev = to_pci_dev(dev);
2170 struct iova *iova = NULL;
2171
2172 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2173 iova = iommu_alloc_iova(domain, size, dma_mask);
2174 else {
2175 /*
2176 * First try to allocate an io virtual address in
2177 * DMA_32BIT_MASK and if that fails then try allocating
2178 * from higher range
2179 */
2180 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2181 if (!iova)
2182 iova = iommu_alloc_iova(domain, size, dma_mask);
2183 }
2184
2185 if (!iova) {
2186 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2187 return NULL;
2188 }
2189
2190 return iova;
2191 }
2192
2193 static struct dmar_domain *
2194 get_valid_domain_for_dev(struct pci_dev *pdev)
2195 {
2196 struct dmar_domain *domain;
2197 int ret;
2198
2199 domain = get_domain_for_dev(pdev,
2200 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2201 if (!domain) {
2202 printk(KERN_ERR
2203 "Allocating domain for %s failed", pci_name(pdev));
2204 return NULL;
2205 }
2206
2207 /* make sure context mapping is ok */
2208 if (unlikely(!domain_context_mapped(pdev))) {
2209 ret = domain_context_mapping(domain, pdev);
2210 if (ret) {
2211 printk(KERN_ERR
2212 "Domain context map for %s failed",
2213 pci_name(pdev));
2214 return NULL;
2215 }
2216 }
2217
2218 return domain;
2219 }
2220
2221 static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2222 size_t size, int dir, u64 dma_mask)
2223 {
2224 struct pci_dev *pdev = to_pci_dev(hwdev);
2225 struct dmar_domain *domain;
2226 phys_addr_t start_paddr;
2227 struct iova *iova;
2228 int prot = 0;
2229 int ret;
2230 struct intel_iommu *iommu;
2231
2232 BUG_ON(dir == DMA_NONE);
2233 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2234 return paddr;
2235
2236 domain = get_valid_domain_for_dev(pdev);
2237 if (!domain)
2238 return 0;
2239
2240 iommu = domain_get_iommu(domain);
2241 size = aligned_size((u64)paddr, size);
2242
2243 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2244 if (!iova)
2245 goto error;
2246
2247 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2248
2249 /*
2250 * Check if DMAR supports zero-length reads on write only
2251 * mappings..
2252 */
2253 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2254 !cap_zlr(iommu->cap))
2255 prot |= DMA_PTE_READ;
2256 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2257 prot |= DMA_PTE_WRITE;
2258 /*
2259 * paddr - (paddr + size) might be partial page, we should map the whole
2260 * page. Note: if two part of one page are separately mapped, we
2261 * might have two guest_addr mapping to the same host paddr, but this
2262 * is not a big problem
2263 */
2264 ret = domain_page_mapping(domain, start_paddr,
2265 ((u64)paddr) & PAGE_MASK, size, prot);
2266 if (ret)
2267 goto error;
2268
2269 /* it's a non-present to present mapping */
2270 ret = iommu_flush_iotlb_psi(iommu, domain->id,
2271 start_paddr, size >> VTD_PAGE_SHIFT, 1);
2272 if (ret)
2273 iommu_flush_write_buffer(iommu);
2274
2275 return start_paddr + ((u64)paddr & (~PAGE_MASK));
2276
2277 error:
2278 if (iova)
2279 __free_iova(&domain->iovad, iova);
2280 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
2281 pci_name(pdev), size, (unsigned long long)paddr, dir);
2282 return 0;
2283 }
2284
2285 dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2286 size_t size, int dir)
2287 {
2288 return __intel_map_single(hwdev, paddr, size, dir,
2289 to_pci_dev(hwdev)->dma_mask);
2290 }
2291
2292 static void flush_unmaps(void)
2293 {
2294 int i, j;
2295
2296 timer_on = 0;
2297
2298 /* just flush them all */
2299 for (i = 0; i < g_num_of_iommus; i++) {
2300 struct intel_iommu *iommu = g_iommus[i];
2301 if (!iommu)
2302 continue;
2303
2304 if (deferred_flush[i].next) {
2305 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2306 DMA_TLB_GLOBAL_FLUSH, 0);
2307 for (j = 0; j < deferred_flush[i].next; j++) {
2308 __free_iova(&deferred_flush[i].domain[j]->iovad,
2309 deferred_flush[i].iova[j]);
2310 }
2311 deferred_flush[i].next = 0;
2312 }
2313 }
2314
2315 list_size = 0;
2316 }
2317
2318 static void flush_unmaps_timeout(unsigned long data)
2319 {
2320 unsigned long flags;
2321
2322 spin_lock_irqsave(&async_umap_flush_lock, flags);
2323 flush_unmaps();
2324 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2325 }
2326
2327 static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2328 {
2329 unsigned long flags;
2330 int next, iommu_id;
2331 struct intel_iommu *iommu;
2332
2333 spin_lock_irqsave(&async_umap_flush_lock, flags);
2334 if (list_size == HIGH_WATER_MARK)
2335 flush_unmaps();
2336
2337 iommu = domain_get_iommu(dom);
2338 iommu_id = iommu->seq_id;
2339
2340 next = deferred_flush[iommu_id].next;
2341 deferred_flush[iommu_id].domain[next] = dom;
2342 deferred_flush[iommu_id].iova[next] = iova;
2343 deferred_flush[iommu_id].next++;
2344
2345 if (!timer_on) {
2346 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2347 timer_on = 1;
2348 }
2349 list_size++;
2350 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2351 }
2352
2353 void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2354 int dir)
2355 {
2356 struct pci_dev *pdev = to_pci_dev(dev);
2357 struct dmar_domain *domain;
2358 unsigned long start_addr;
2359 struct iova *iova;
2360 struct intel_iommu *iommu;
2361
2362 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2363 return;
2364 domain = find_domain(pdev);
2365 BUG_ON(!domain);
2366
2367 iommu = domain_get_iommu(domain);
2368
2369 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2370 if (!iova)
2371 return;
2372
2373 start_addr = iova->pfn_lo << PAGE_SHIFT;
2374 size = aligned_size((u64)dev_addr, size);
2375
2376 pr_debug("Device %s unmapping: %lx@%llx\n",
2377 pci_name(pdev), size, (unsigned long long)start_addr);
2378
2379 /* clear the whole page */
2380 dma_pte_clear_range(domain, start_addr, start_addr + size);
2381 /* free page tables */
2382 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2383 if (intel_iommu_strict) {
2384 if (iommu_flush_iotlb_psi(iommu,
2385 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
2386 iommu_flush_write_buffer(iommu);
2387 /* free iova */
2388 __free_iova(&domain->iovad, iova);
2389 } else {
2390 add_unmap(domain, iova);
2391 /*
2392 * queue up the release of the unmap to save the 1/6th of the
2393 * cpu used up by the iotlb flush operation...
2394 */
2395 }
2396 }
2397
2398 void *intel_alloc_coherent(struct device *hwdev, size_t size,
2399 dma_addr_t *dma_handle, gfp_t flags)
2400 {
2401 void *vaddr;
2402 int order;
2403
2404 size = PAGE_ALIGN(size);
2405 order = get_order(size);
2406 flags &= ~(GFP_DMA | GFP_DMA32);
2407
2408 vaddr = (void *)__get_free_pages(flags, order);
2409 if (!vaddr)
2410 return NULL;
2411 memset(vaddr, 0, size);
2412
2413 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2414 DMA_BIDIRECTIONAL,
2415 hwdev->coherent_dma_mask);
2416 if (*dma_handle)
2417 return vaddr;
2418 free_pages((unsigned long)vaddr, order);
2419 return NULL;
2420 }
2421
2422 void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2423 dma_addr_t dma_handle)
2424 {
2425 int order;
2426
2427 size = PAGE_ALIGN(size);
2428 order = get_order(size);
2429
2430 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2431 free_pages((unsigned long)vaddr, order);
2432 }
2433
2434 #define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
2435
2436 void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2437 int nelems, int dir)
2438 {
2439 int i;
2440 struct pci_dev *pdev = to_pci_dev(hwdev);
2441 struct dmar_domain *domain;
2442 unsigned long start_addr;
2443 struct iova *iova;
2444 size_t size = 0;
2445 void *addr;
2446 struct scatterlist *sg;
2447 struct intel_iommu *iommu;
2448
2449 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2450 return;
2451
2452 domain = find_domain(pdev);
2453 BUG_ON(!domain);
2454
2455 iommu = domain_get_iommu(domain);
2456
2457 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
2458 if (!iova)
2459 return;
2460 for_each_sg(sglist, sg, nelems, i) {
2461 addr = SG_ENT_VIRT_ADDRESS(sg);
2462 size += aligned_size((u64)addr, sg->length);
2463 }
2464
2465 start_addr = iova->pfn_lo << PAGE_SHIFT;
2466
2467 /* clear the whole page */
2468 dma_pte_clear_range(domain, start_addr, start_addr + size);
2469 /* free page tables */
2470 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2471
2472 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
2473 size >> VTD_PAGE_SHIFT, 0))
2474 iommu_flush_write_buffer(iommu);
2475
2476 /* free iova */
2477 __free_iova(&domain->iovad, iova);
2478 }
2479
2480 static int intel_nontranslate_map_sg(struct device *hddev,
2481 struct scatterlist *sglist, int nelems, int dir)
2482 {
2483 int i;
2484 struct scatterlist *sg;
2485
2486 for_each_sg(sglist, sg, nelems, i) {
2487 BUG_ON(!sg_page(sg));
2488 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2489 sg->dma_length = sg->length;
2490 }
2491 return nelems;
2492 }
2493
2494 int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2495 int dir)
2496 {
2497 void *addr;
2498 int i;
2499 struct pci_dev *pdev = to_pci_dev(hwdev);
2500 struct dmar_domain *domain;
2501 size_t size = 0;
2502 int prot = 0;
2503 size_t offset = 0;
2504 struct iova *iova = NULL;
2505 int ret;
2506 struct scatterlist *sg;
2507 unsigned long start_addr;
2508 struct intel_iommu *iommu;
2509
2510 BUG_ON(dir == DMA_NONE);
2511 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
2512 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
2513
2514 domain = get_valid_domain_for_dev(pdev);
2515 if (!domain)
2516 return 0;
2517
2518 iommu = domain_get_iommu(domain);
2519
2520 for_each_sg(sglist, sg, nelems, i) {
2521 addr = SG_ENT_VIRT_ADDRESS(sg);
2522 addr = (void *)virt_to_phys(addr);
2523 size += aligned_size((u64)addr, sg->length);
2524 }
2525
2526 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
2527 if (!iova) {
2528 sglist->dma_length = 0;
2529 return 0;
2530 }
2531
2532 /*
2533 * Check if DMAR supports zero-length reads on write only
2534 * mappings..
2535 */
2536 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2537 !cap_zlr(iommu->cap))
2538 prot |= DMA_PTE_READ;
2539 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2540 prot |= DMA_PTE_WRITE;
2541
2542 start_addr = iova->pfn_lo << PAGE_SHIFT;
2543 offset = 0;
2544 for_each_sg(sglist, sg, nelems, i) {
2545 addr = SG_ENT_VIRT_ADDRESS(sg);
2546 addr = (void *)virt_to_phys(addr);
2547 size = aligned_size((u64)addr, sg->length);
2548 ret = domain_page_mapping(domain, start_addr + offset,
2549 ((u64)addr) & PAGE_MASK,
2550 size, prot);
2551 if (ret) {
2552 /* clear the page */
2553 dma_pte_clear_range(domain, start_addr,
2554 start_addr + offset);
2555 /* free page tables */
2556 dma_pte_free_pagetable(domain, start_addr,
2557 start_addr + offset);
2558 /* free iova */
2559 __free_iova(&domain->iovad, iova);
2560 return 0;
2561 }
2562 sg->dma_address = start_addr + offset +
2563 ((u64)addr & (~PAGE_MASK));
2564 sg->dma_length = sg->length;
2565 offset += size;
2566 }
2567
2568 /* it's a non-present to present mapping */
2569 if (iommu_flush_iotlb_psi(iommu, domain->id,
2570 start_addr, offset >> VTD_PAGE_SHIFT, 1))
2571 iommu_flush_write_buffer(iommu);
2572 return nelems;
2573 }
2574
2575 static struct dma_mapping_ops intel_dma_ops = {
2576 .alloc_coherent = intel_alloc_coherent,
2577 .free_coherent = intel_free_coherent,
2578 .map_single = intel_map_single,
2579 .unmap_single = intel_unmap_single,
2580 .map_sg = intel_map_sg,
2581 .unmap_sg = intel_unmap_sg,
2582 };
2583
2584 static inline int iommu_domain_cache_init(void)
2585 {
2586 int ret = 0;
2587
2588 iommu_domain_cache = kmem_cache_create("iommu_domain",
2589 sizeof(struct dmar_domain),
2590 0,
2591 SLAB_HWCACHE_ALIGN,
2592
2593 NULL);
2594 if (!iommu_domain_cache) {
2595 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2596 ret = -ENOMEM;
2597 }
2598
2599 return ret;
2600 }
2601
2602 static inline int iommu_devinfo_cache_init(void)
2603 {
2604 int ret = 0;
2605
2606 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2607 sizeof(struct device_domain_info),
2608 0,
2609 SLAB_HWCACHE_ALIGN,
2610 NULL);
2611 if (!iommu_devinfo_cache) {
2612 printk(KERN_ERR "Couldn't create devinfo cache\n");
2613 ret = -ENOMEM;
2614 }
2615
2616 return ret;
2617 }
2618
2619 static inline int iommu_iova_cache_init(void)
2620 {
2621 int ret = 0;
2622
2623 iommu_iova_cache = kmem_cache_create("iommu_iova",
2624 sizeof(struct iova),
2625 0,
2626 SLAB_HWCACHE_ALIGN,
2627 NULL);
2628 if (!iommu_iova_cache) {
2629 printk(KERN_ERR "Couldn't create iova cache\n");
2630 ret = -ENOMEM;
2631 }
2632
2633 return ret;
2634 }
2635
2636 static int __init iommu_init_mempool(void)
2637 {
2638 int ret;
2639 ret = iommu_iova_cache_init();
2640 if (ret)
2641 return ret;
2642
2643 ret = iommu_domain_cache_init();
2644 if (ret)
2645 goto domain_error;
2646
2647 ret = iommu_devinfo_cache_init();
2648 if (!ret)
2649 return ret;
2650
2651 kmem_cache_destroy(iommu_domain_cache);
2652 domain_error:
2653 kmem_cache_destroy(iommu_iova_cache);
2654
2655 return -ENOMEM;
2656 }
2657
2658 static void __init iommu_exit_mempool(void)
2659 {
2660 kmem_cache_destroy(iommu_devinfo_cache);
2661 kmem_cache_destroy(iommu_domain_cache);
2662 kmem_cache_destroy(iommu_iova_cache);
2663
2664 }
2665
2666 static void __init init_no_remapping_devices(void)
2667 {
2668 struct dmar_drhd_unit *drhd;
2669
2670 for_each_drhd_unit(drhd) {
2671 if (!drhd->include_all) {
2672 int i;
2673 for (i = 0; i < drhd->devices_cnt; i++)
2674 if (drhd->devices[i] != NULL)
2675 break;
2676 /* ignore DMAR unit if no pci devices exist */
2677 if (i == drhd->devices_cnt)
2678 drhd->ignored = 1;
2679 }
2680 }
2681
2682 if (dmar_map_gfx)
2683 return;
2684
2685 for_each_drhd_unit(drhd) {
2686 int i;
2687 if (drhd->ignored || drhd->include_all)
2688 continue;
2689
2690 for (i = 0; i < drhd->devices_cnt; i++)
2691 if (drhd->devices[i] &&
2692 !IS_GFX_DEVICE(drhd->devices[i]))
2693 break;
2694
2695 if (i < drhd->devices_cnt)
2696 continue;
2697
2698 /* bypass IOMMU if it is just for gfx devices */
2699 drhd->ignored = 1;
2700 for (i = 0; i < drhd->devices_cnt; i++) {
2701 if (!drhd->devices[i])
2702 continue;
2703 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
2704 }
2705 }
2706 }
2707
2708 int __init intel_iommu_init(void)
2709 {
2710 int ret = 0;
2711
2712 if (dmar_table_init())
2713 return -ENODEV;
2714
2715 if (dmar_dev_scope_init())
2716 return -ENODEV;
2717
2718 /*
2719 * Check the need for DMA-remapping initialization now.
2720 * Above initialization will also be used by Interrupt-remapping.
2721 */
2722 if (no_iommu || swiotlb || dmar_disabled)
2723 return -ENODEV;
2724
2725 iommu_init_mempool();
2726 dmar_init_reserved_ranges();
2727
2728 init_no_remapping_devices();
2729
2730 ret = init_dmars();
2731 if (ret) {
2732 printk(KERN_ERR "IOMMU: dmar init failed\n");
2733 put_iova_domain(&reserved_iova_list);
2734 iommu_exit_mempool();
2735 return ret;
2736 }
2737 printk(KERN_INFO
2738 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2739
2740 init_timer(&unmap_timer);
2741 force_iommu = 1;
2742 dma_ops = &intel_dma_ops;
2743
2744 register_iommu(&intel_iommu_ops);
2745
2746 return 0;
2747 }
2748
2749 static int vm_domain_add_dev_info(struct dmar_domain *domain,
2750 struct pci_dev *pdev)
2751 {
2752 struct device_domain_info *info;
2753 unsigned long flags;
2754
2755 info = alloc_devinfo_mem();
2756 if (!info)
2757 return -ENOMEM;
2758
2759 info->bus = pdev->bus->number;
2760 info->devfn = pdev->devfn;
2761 info->dev = pdev;
2762 info->domain = domain;
2763
2764 spin_lock_irqsave(&device_domain_lock, flags);
2765 list_add(&info->link, &domain->devices);
2766 list_add(&info->global, &device_domain_list);
2767 pdev->dev.archdata.iommu = info;
2768 spin_unlock_irqrestore(&device_domain_lock, flags);
2769
2770 return 0;
2771 }
2772
2773 static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2774 struct pci_dev *pdev)
2775 {
2776 struct device_domain_info *info;
2777 struct intel_iommu *iommu;
2778 unsigned long flags;
2779 int found = 0;
2780 struct list_head *entry, *tmp;
2781
2782 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2783 if (!iommu)
2784 return;
2785
2786 spin_lock_irqsave(&device_domain_lock, flags);
2787 list_for_each_safe(entry, tmp, &domain->devices) {
2788 info = list_entry(entry, struct device_domain_info, link);
2789 if (info->bus == pdev->bus->number &&
2790 info->devfn == pdev->devfn) {
2791 list_del(&info->link);
2792 list_del(&info->global);
2793 if (info->dev)
2794 info->dev->dev.archdata.iommu = NULL;
2795 spin_unlock_irqrestore(&device_domain_lock, flags);
2796
2797 iommu_detach_dev(iommu, info->bus, info->devfn);
2798 free_devinfo_mem(info);
2799
2800 spin_lock_irqsave(&device_domain_lock, flags);
2801
2802 if (found)
2803 break;
2804 else
2805 continue;
2806 }
2807
2808 /* if there is no other devices under the same iommu
2809 * owned by this domain, clear this iommu in iommu_bmp
2810 * update iommu count and coherency
2811 */
2812 if (device_to_iommu(info->bus, info->devfn) == iommu)
2813 found = 1;
2814 }
2815
2816 if (found == 0) {
2817 unsigned long tmp_flags;
2818 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2819 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2820 domain->iommu_count--;
2821 domain_update_iommu_coherency(domain);
2822 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2823 }
2824
2825 spin_unlock_irqrestore(&device_domain_lock, flags);
2826 }
2827
2828 static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2829 {
2830 struct device_domain_info *info;
2831 struct intel_iommu *iommu;
2832 unsigned long flags1, flags2;
2833
2834 spin_lock_irqsave(&device_domain_lock, flags1);
2835 while (!list_empty(&domain->devices)) {
2836 info = list_entry(domain->devices.next,
2837 struct device_domain_info, link);
2838 list_del(&info->link);
2839 list_del(&info->global);
2840 if (info->dev)
2841 info->dev->dev.archdata.iommu = NULL;
2842
2843 spin_unlock_irqrestore(&device_domain_lock, flags1);
2844
2845 iommu = device_to_iommu(info->bus, info->devfn);
2846 iommu_detach_dev(iommu, info->bus, info->devfn);
2847
2848 /* clear this iommu in iommu_bmp, update iommu count
2849 * and coherency
2850 */
2851 spin_lock_irqsave(&domain->iommu_lock, flags2);
2852 if (test_and_clear_bit(iommu->seq_id,
2853 &domain->iommu_bmp)) {
2854 domain->iommu_count--;
2855 domain_update_iommu_coherency(domain);
2856 }
2857 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2858
2859 free_devinfo_mem(info);
2860 spin_lock_irqsave(&device_domain_lock, flags1);
2861 }
2862 spin_unlock_irqrestore(&device_domain_lock, flags1);
2863 }
2864
2865 /* domain id for virtual machine, it won't be set in context */
2866 static unsigned long vm_domid;
2867
2868 static int vm_domain_min_agaw(struct dmar_domain *domain)
2869 {
2870 int i;
2871 int min_agaw = domain->agaw;
2872
2873 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2874 for (; i < g_num_of_iommus; ) {
2875 if (min_agaw > g_iommus[i]->agaw)
2876 min_agaw = g_iommus[i]->agaw;
2877
2878 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2879 }
2880
2881 return min_agaw;
2882 }
2883
2884 static struct dmar_domain *iommu_alloc_vm_domain(void)
2885 {
2886 struct dmar_domain *domain;
2887
2888 domain = alloc_domain_mem();
2889 if (!domain)
2890 return NULL;
2891
2892 domain->id = vm_domid++;
2893 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2894 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2895
2896 return domain;
2897 }
2898
2899 static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2900 {
2901 int adjust_width;
2902
2903 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2904 spin_lock_init(&domain->mapping_lock);
2905 spin_lock_init(&domain->iommu_lock);
2906
2907 domain_reserve_special_ranges(domain);
2908
2909 /* calculate AGAW */
2910 domain->gaw = guest_width;
2911 adjust_width = guestwidth_to_adjustwidth(guest_width);
2912 domain->agaw = width_to_agaw(adjust_width);
2913
2914 INIT_LIST_HEAD(&domain->devices);
2915
2916 domain->iommu_count = 0;
2917 domain->iommu_coherency = 0;
2918 domain->max_addr = 0;
2919
2920 /* always allocate the top pgd */
2921 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2922 if (!domain->pgd)
2923 return -ENOMEM;
2924 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2925 return 0;
2926 }
2927
2928 static void iommu_free_vm_domain(struct dmar_domain *domain)
2929 {
2930 unsigned long flags;
2931 struct dmar_drhd_unit *drhd;
2932 struct intel_iommu *iommu;
2933 unsigned long i;
2934 unsigned long ndomains;
2935
2936 for_each_drhd_unit(drhd) {
2937 if (drhd->ignored)
2938 continue;
2939 iommu = drhd->iommu;
2940
2941 ndomains = cap_ndoms(iommu->cap);
2942 i = find_first_bit(iommu->domain_ids, ndomains);
2943 for (; i < ndomains; ) {
2944 if (iommu->domains[i] == domain) {
2945 spin_lock_irqsave(&iommu->lock, flags);
2946 clear_bit(i, iommu->domain_ids);
2947 iommu->domains[i] = NULL;
2948 spin_unlock_irqrestore(&iommu->lock, flags);
2949 break;
2950 }
2951 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2952 }
2953 }
2954 }
2955
2956 static void vm_domain_exit(struct dmar_domain *domain)
2957 {
2958 u64 end;
2959
2960 /* Domain 0 is reserved, so dont process it */
2961 if (!domain)
2962 return;
2963
2964 vm_domain_remove_all_dev_info(domain);
2965 /* destroy iovas */
2966 put_iova_domain(&domain->iovad);
2967 end = DOMAIN_MAX_ADDR(domain->gaw);
2968 end = end & (~VTD_PAGE_MASK);
2969
2970 /* clear ptes */
2971 dma_pte_clear_range(domain, 0, end);
2972
2973 /* free page tables */
2974 dma_pte_free_pagetable(domain, 0, end);
2975
2976 iommu_free_vm_domain(domain);
2977 free_domain_mem(domain);
2978 }
2979
2980 static int intel_iommu_domain_init(struct iommu_domain *domain)
2981 {
2982 struct dmar_domain *dmar_domain;
2983
2984 dmar_domain = iommu_alloc_vm_domain();
2985 if (!dmar_domain) {
2986 printk(KERN_ERR
2987 "intel_iommu_domain_init: dmar_domain == NULL\n");
2988 return -ENOMEM;
2989 }
2990 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2991 printk(KERN_ERR
2992 "intel_iommu_domain_init() failed\n");
2993 vm_domain_exit(dmar_domain);
2994 return -ENOMEM;
2995 }
2996 domain->priv = dmar_domain;
2997
2998 return 0;
2999 }
3000
3001 static void intel_iommu_domain_destroy(struct iommu_domain *domain)
3002 {
3003 struct dmar_domain *dmar_domain = domain->priv;
3004
3005 domain->priv = NULL;
3006 vm_domain_exit(dmar_domain);
3007 }
3008
3009 static int intel_iommu_attach_device(struct iommu_domain *domain,
3010 struct device *dev)
3011 {
3012 struct dmar_domain *dmar_domain = domain->priv;
3013 struct pci_dev *pdev = to_pci_dev(dev);
3014 struct intel_iommu *iommu;
3015 int addr_width;
3016 u64 end;
3017 int ret;
3018
3019 /* normally pdev is not mapped */
3020 if (unlikely(domain_context_mapped(pdev))) {
3021 struct dmar_domain *old_domain;
3022
3023 old_domain = find_domain(pdev);
3024 if (old_domain) {
3025 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
3026 vm_domain_remove_one_dev_info(old_domain, pdev);
3027 else
3028 domain_remove_dev_info(old_domain);
3029 }
3030 }
3031
3032 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3033 if (!iommu)
3034 return -ENODEV;
3035
3036 /* check if this iommu agaw is sufficient for max mapped address */
3037 addr_width = agaw_to_width(iommu->agaw);
3038 end = DOMAIN_MAX_ADDR(addr_width);
3039 end = end & VTD_PAGE_MASK;
3040 if (end < dmar_domain->max_addr) {
3041 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3042 "sufficient for the mapped address (%llx)\n",
3043 __func__, iommu->agaw, dmar_domain->max_addr);
3044 return -EFAULT;
3045 }
3046
3047 ret = domain_context_mapping(dmar_domain, pdev);
3048 if (ret)
3049 return ret;
3050
3051 ret = vm_domain_add_dev_info(dmar_domain, pdev);
3052 return ret;
3053 }
3054
3055 static void intel_iommu_detach_device(struct iommu_domain *domain,
3056 struct device *dev)
3057 {
3058 struct dmar_domain *dmar_domain = domain->priv;
3059 struct pci_dev *pdev = to_pci_dev(dev);
3060
3061 vm_domain_remove_one_dev_info(dmar_domain, pdev);
3062 }
3063
3064 static int intel_iommu_map_range(struct iommu_domain *domain,
3065 unsigned long iova, phys_addr_t hpa,
3066 size_t size, int iommu_prot)
3067 {
3068 struct dmar_domain *dmar_domain = domain->priv;
3069 u64 max_addr;
3070 int addr_width;
3071 int prot = 0;
3072 int ret;
3073
3074 if (iommu_prot & IOMMU_READ)
3075 prot |= DMA_PTE_READ;
3076 if (iommu_prot & IOMMU_WRITE)
3077 prot |= DMA_PTE_WRITE;
3078
3079 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
3080 if (dmar_domain->max_addr < max_addr) {
3081 int min_agaw;
3082 u64 end;
3083
3084 /* check if minimum agaw is sufficient for mapped address */
3085 min_agaw = vm_domain_min_agaw(dmar_domain);
3086 addr_width = agaw_to_width(min_agaw);
3087 end = DOMAIN_MAX_ADDR(addr_width);
3088 end = end & VTD_PAGE_MASK;
3089 if (end < max_addr) {
3090 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3091 "sufficient for the mapped address (%llx)\n",
3092 __func__, min_agaw, max_addr);
3093 return -EFAULT;
3094 }
3095 dmar_domain->max_addr = max_addr;
3096 }
3097
3098 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
3099 return ret;
3100 }
3101
3102 static void intel_iommu_unmap_range(struct iommu_domain *domain,
3103 unsigned long iova, size_t size)
3104 {
3105 struct dmar_domain *dmar_domain = domain->priv;
3106 dma_addr_t base;
3107
3108 /* The address might not be aligned */
3109 base = iova & VTD_PAGE_MASK;
3110 size = VTD_PAGE_ALIGN(size);
3111 dma_pte_clear_range(dmar_domain, base, base + size);
3112
3113 if (dmar_domain->max_addr == base + size)
3114 dmar_domain->max_addr = base;
3115 }
3116
3117 static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3118 unsigned long iova)
3119 {
3120 struct dmar_domain *dmar_domain = domain->priv;
3121 struct dma_pte *pte;
3122 u64 phys = 0;
3123
3124 pte = addr_to_dma_pte(dmar_domain, iova);
3125 if (pte)
3126 phys = dma_pte_addr(pte);
3127
3128 return phys;
3129 }
3130
3131 static struct iommu_ops intel_iommu_ops = {
3132 .domain_init = intel_iommu_domain_init,
3133 .domain_destroy = intel_iommu_domain_destroy,
3134 .attach_dev = intel_iommu_attach_device,
3135 .detach_dev = intel_iommu_detach_device,
3136 .map = intel_iommu_map_range,
3137 .unmap = intel_iommu_unmap_range,
3138 .iova_to_phys = intel_iommu_iova_to_phys,
3139 };
This page took 0.13344 seconds and 6 git commands to generate.