Allocation and free functions of virtual machine domain
[deliverable/linux.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
d9630fe9
WH
60/* global iommu list, set NULL for ignored DMAR units */
61static struct intel_iommu **g_iommus;
62
46b08e1a
MM
63/*
64 * 0: Present
65 * 1-11: Reserved
66 * 12-63: Context Ptr (12 - (haw-1))
67 * 64-127: Reserved
68 */
69struct root_entry {
70 u64 val;
71 u64 rsvd1;
72};
73#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74static inline bool root_present(struct root_entry *root)
75{
76 return (root->val & 1);
77}
78static inline void set_root_present(struct root_entry *root)
79{
80 root->val |= 1;
81}
82static inline void set_root_value(struct root_entry *root, unsigned long value)
83{
84 root->val |= value & VTD_PAGE_MASK;
85}
86
87static inline struct context_entry *
88get_context_addr_from_root(struct root_entry *root)
89{
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
93 NULL);
94}
95
7a8fc25e
MM
96/*
97 * low 64 bits:
98 * 0: present
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
102 * high 64 bits:
103 * 0-2: address width
104 * 3-6: aval
105 * 8-23: domain id
106 */
107struct context_entry {
108 u64 lo;
109 u64 hi;
110};
c07e7d21
MM
111
112static inline bool context_present(struct context_entry *context)
113{
114 return (context->lo & 1);
115}
116static inline void context_set_present(struct context_entry *context)
117{
118 context->lo |= 1;
119}
120
121static inline void context_set_fault_enable(struct context_entry *context)
122{
123 context->lo &= (((u64)-1) << 2) | 1;
124}
125
7a8fc25e 126#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
127
128static inline void context_set_translation_type(struct context_entry *context,
129 unsigned long value)
130{
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
133}
134
135static inline void context_set_address_root(struct context_entry *context,
136 unsigned long value)
137{
138 context->lo |= value & VTD_PAGE_MASK;
139}
140
141static inline void context_set_address_width(struct context_entry *context,
142 unsigned long value)
143{
144 context->hi |= value & 7;
145}
146
147static inline void context_set_domain_id(struct context_entry *context,
148 unsigned long value)
149{
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
151}
152
153static inline void context_clear_entry(struct context_entry *context)
154{
155 context->lo = 0;
156 context->hi = 0;
157}
7a8fc25e 158
622ba12a
MM
159/*
160 * 0: readable
161 * 1: writable
162 * 2-6: reserved
163 * 7: super page
164 * 8-11: available
165 * 12-63: Host physcial address
166 */
167struct dma_pte {
168 u64 val;
169};
622ba12a 170
19c239ce
MM
171static inline void dma_clear_pte(struct dma_pte *pte)
172{
173 pte->val = 0;
174}
175
176static inline void dma_set_pte_readable(struct dma_pte *pte)
177{
178 pte->val |= DMA_PTE_READ;
179}
180
181static inline void dma_set_pte_writable(struct dma_pte *pte)
182{
183 pte->val |= DMA_PTE_WRITE;
184}
185
186static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187{
188 pte->val = (pte->val & ~3) | (prot & 3);
189}
190
191static inline u64 dma_pte_addr(struct dma_pte *pte)
192{
193 return (pte->val & VTD_PAGE_MASK);
194}
195
196static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197{
198 pte->val |= (addr & VTD_PAGE_MASK);
199}
200
201static inline bool dma_pte_present(struct dma_pte *pte)
202{
203 return (pte->val & 3) != 0;
204}
622ba12a 205
3b5410e7
WH
206/* devices under the same p2p bridge are owned in one domain */
207#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
1ce28feb
WH
209/* domain represents a virtual machine, more than one devices
210 * across iommus may be owned in one domain, e.g. kvm guest.
211 */
212#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
213
99126f7c
MM
214struct dmar_domain {
215 int id; /* domain id */
8c11e798 216 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
217
218 struct list_head devices; /* all devices' list */
219 struct iova_domain iovad; /* iova's that belong to this domain */
220
221 struct dma_pte *pgd; /* virtual address */
222 spinlock_t mapping_lock; /* page table lock */
223 int gaw; /* max guest address width */
224
225 /* adjusted guest address width, 0 is level 2 30-bit */
226 int agaw;
227
3b5410e7 228 int flags; /* flags to find out type of domain */
8e604097
WH
229
230 int iommu_coherency;/* indicate coherency of iommu access */
c7151a8d
WH
231 int iommu_count; /* reference count of iommu */
232 spinlock_t iommu_lock; /* protect iommu set in domain */
99126f7c
MM
233};
234
a647dacb
MM
235/* PCI domain-device relationship */
236struct device_domain_info {
237 struct list_head link; /* link to domain siblings */
238 struct list_head global; /* link to global list */
239 u8 bus; /* PCI bus numer */
240 u8 devfn; /* PCI devfn number */
241 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
242 struct dmar_domain *domain; /* pointer to domain */
243};
244
5e0d2a6f 245static void flush_unmaps_timeout(unsigned long data);
246
247DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
248
80b20dd8 249#define HIGH_WATER_MARK 250
250struct deferred_flush_tables {
251 int next;
252 struct iova *iova[HIGH_WATER_MARK];
253 struct dmar_domain *domain[HIGH_WATER_MARK];
254};
255
256static struct deferred_flush_tables *deferred_flush;
257
5e0d2a6f 258/* bitmap for indexing intel_iommus */
5e0d2a6f 259static int g_num_of_iommus;
260
261static DEFINE_SPINLOCK(async_umap_flush_lock);
262static LIST_HEAD(unmaps_to_do);
263
264static int timer_on;
265static long list_size;
5e0d2a6f 266
ba395927
KA
267static void domain_remove_dev_info(struct dmar_domain *domain);
268
2ae21010 269int dmar_disabled;
ba395927 270static int __initdata dmar_map_gfx = 1;
7d3b03ce 271static int dmar_forcedac;
5e0d2a6f 272static int intel_iommu_strict;
ba395927
KA
273
274#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275static DEFINE_SPINLOCK(device_domain_lock);
276static LIST_HEAD(device_domain_list);
277
278static int __init intel_iommu_setup(char *str)
279{
280 if (!str)
281 return -EINVAL;
282 while (*str) {
283 if (!strncmp(str, "off", 3)) {
284 dmar_disabled = 1;
285 printk(KERN_INFO"Intel-IOMMU: disabled\n");
286 } else if (!strncmp(str, "igfx_off", 8)) {
287 dmar_map_gfx = 0;
288 printk(KERN_INFO
289 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 290 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 291 printk(KERN_INFO
7d3b03ce
KA
292 "Intel-IOMMU: Forcing DAC for PCI devices\n");
293 dmar_forcedac = 1;
5e0d2a6f 294 } else if (!strncmp(str, "strict", 6)) {
295 printk(KERN_INFO
296 "Intel-IOMMU: disable batched IOTLB flush\n");
297 intel_iommu_strict = 1;
ba395927
KA
298 }
299
300 str += strcspn(str, ",");
301 while (*str == ',')
302 str++;
303 }
304 return 0;
305}
306__setup("intel_iommu=", intel_iommu_setup);
307
308static struct kmem_cache *iommu_domain_cache;
309static struct kmem_cache *iommu_devinfo_cache;
310static struct kmem_cache *iommu_iova_cache;
311
eb3fa7cb
KA
312static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
313{
314 unsigned int flags;
315 void *vaddr;
316
317 /* trying to avoid low memory issues */
318 flags = current->flags & PF_MEMALLOC;
319 current->flags |= PF_MEMALLOC;
320 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
321 current->flags &= (~PF_MEMALLOC | flags);
322 return vaddr;
323}
324
325
ba395927
KA
326static inline void *alloc_pgtable_page(void)
327{
eb3fa7cb
KA
328 unsigned int flags;
329 void *vaddr;
330
331 /* trying to avoid low memory issues */
332 flags = current->flags & PF_MEMALLOC;
333 current->flags |= PF_MEMALLOC;
334 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
335 current->flags &= (~PF_MEMALLOC | flags);
336 return vaddr;
ba395927
KA
337}
338
339static inline void free_pgtable_page(void *vaddr)
340{
341 free_page((unsigned long)vaddr);
342}
343
344static inline void *alloc_domain_mem(void)
345{
eb3fa7cb 346 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
347}
348
38717946 349static void free_domain_mem(void *vaddr)
ba395927
KA
350{
351 kmem_cache_free(iommu_domain_cache, vaddr);
352}
353
354static inline void * alloc_devinfo_mem(void)
355{
eb3fa7cb 356 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
357}
358
359static inline void free_devinfo_mem(void *vaddr)
360{
361 kmem_cache_free(iommu_devinfo_cache, vaddr);
362}
363
364struct iova *alloc_iova_mem(void)
365{
eb3fa7cb 366 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
367}
368
369void free_iova_mem(struct iova *iova)
370{
371 kmem_cache_free(iommu_iova_cache, iova);
372}
373
1b573683
WH
374
375static inline int width_to_agaw(int width);
376
377/* calculate agaw for each iommu.
378 * "SAGAW" may be different across iommus, use a default agaw, and
379 * get a supported less agaw for iommus that don't support the default agaw.
380 */
381int iommu_calculate_agaw(struct intel_iommu *iommu)
382{
383 unsigned long sagaw;
384 int agaw = -1;
385
386 sagaw = cap_sagaw(iommu->cap);
387 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
388 agaw >= 0; agaw--) {
389 if (test_bit(agaw, &sagaw))
390 break;
391 }
392
393 return agaw;
394}
395
8c11e798
WH
396/* in native case, each domain is related to only one iommu */
397static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
398{
399 int iommu_id;
400
1ce28feb
WH
401 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
402
8c11e798
WH
403 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
404 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
405 return NULL;
406
407 return g_iommus[iommu_id];
408}
409
8e604097
WH
410/* "Coherency" capability may be different across iommus */
411static void domain_update_iommu_coherency(struct dmar_domain *domain)
412{
413 int i;
414
415 domain->iommu_coherency = 1;
416
417 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418 for (; i < g_num_of_iommus; ) {
419 if (!ecap_coherent(g_iommus[i]->ecap)) {
420 domain->iommu_coherency = 0;
421 break;
422 }
423 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
424 }
425}
426
c7151a8d
WH
427static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
428{
429 struct dmar_drhd_unit *drhd = NULL;
430 int i;
431
432 for_each_drhd_unit(drhd) {
433 if (drhd->ignored)
434 continue;
435
436 for (i = 0; i < drhd->devices_cnt; i++)
437 if (drhd->devices[i]->bus->number == bus &&
438 drhd->devices[i]->devfn == devfn)
439 return drhd->iommu;
440
441 if (drhd->include_all)
442 return drhd->iommu;
443 }
444
445 return NULL;
446}
447
5331fe6f
WH
448static void domain_flush_cache(struct dmar_domain *domain,
449 void *addr, int size)
450{
451 if (!domain->iommu_coherency)
452 clflush_cache_range(addr, size);
453}
454
ba395927
KA
455/* Gets context entry for a given bus and devfn */
456static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
457 u8 bus, u8 devfn)
458{
459 struct root_entry *root;
460 struct context_entry *context;
461 unsigned long phy_addr;
462 unsigned long flags;
463
464 spin_lock_irqsave(&iommu->lock, flags);
465 root = &iommu->root_entry[bus];
466 context = get_context_addr_from_root(root);
467 if (!context) {
468 context = (struct context_entry *)alloc_pgtable_page();
469 if (!context) {
470 spin_unlock_irqrestore(&iommu->lock, flags);
471 return NULL;
472 }
5b6985ce 473 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
474 phy_addr = virt_to_phys((void *)context);
475 set_root_value(root, phy_addr);
476 set_root_present(root);
477 __iommu_flush_cache(iommu, root, sizeof(*root));
478 }
479 spin_unlock_irqrestore(&iommu->lock, flags);
480 return &context[devfn];
481}
482
483static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
484{
485 struct root_entry *root;
486 struct context_entry *context;
487 int ret;
488 unsigned long flags;
489
490 spin_lock_irqsave(&iommu->lock, flags);
491 root = &iommu->root_entry[bus];
492 context = get_context_addr_from_root(root);
493 if (!context) {
494 ret = 0;
495 goto out;
496 }
c07e7d21 497 ret = context_present(&context[devfn]);
ba395927
KA
498out:
499 spin_unlock_irqrestore(&iommu->lock, flags);
500 return ret;
501}
502
503static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
504{
505 struct root_entry *root;
506 struct context_entry *context;
507 unsigned long flags;
508
509 spin_lock_irqsave(&iommu->lock, flags);
510 root = &iommu->root_entry[bus];
511 context = get_context_addr_from_root(root);
512 if (context) {
c07e7d21 513 context_clear_entry(&context[devfn]);
ba395927
KA
514 __iommu_flush_cache(iommu, &context[devfn], \
515 sizeof(*context));
516 }
517 spin_unlock_irqrestore(&iommu->lock, flags);
518}
519
520static void free_context_table(struct intel_iommu *iommu)
521{
522 struct root_entry *root;
523 int i;
524 unsigned long flags;
525 struct context_entry *context;
526
527 spin_lock_irqsave(&iommu->lock, flags);
528 if (!iommu->root_entry) {
529 goto out;
530 }
531 for (i = 0; i < ROOT_ENTRY_NR; i++) {
532 root = &iommu->root_entry[i];
533 context = get_context_addr_from_root(root);
534 if (context)
535 free_pgtable_page(context);
536 }
537 free_pgtable_page(iommu->root_entry);
538 iommu->root_entry = NULL;
539out:
540 spin_unlock_irqrestore(&iommu->lock, flags);
541}
542
543/* page table handling */
544#define LEVEL_STRIDE (9)
545#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
546
547static inline int agaw_to_level(int agaw)
548{
549 return agaw + 2;
550}
551
552static inline int agaw_to_width(int agaw)
553{
554 return 30 + agaw * LEVEL_STRIDE;
555
556}
557
558static inline int width_to_agaw(int width)
559{
560 return (width - 30) / LEVEL_STRIDE;
561}
562
563static inline unsigned int level_to_offset_bits(int level)
564{
565 return (12 + (level - 1) * LEVEL_STRIDE);
566}
567
568static inline int address_level_offset(u64 addr, int level)
569{
570 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
571}
572
573static inline u64 level_mask(int level)
574{
575 return ((u64)-1 << level_to_offset_bits(level));
576}
577
578static inline u64 level_size(int level)
579{
580 return ((u64)1 << level_to_offset_bits(level));
581}
582
583static inline u64 align_to_level(u64 addr, int level)
584{
585 return ((addr + level_size(level) - 1) & level_mask(level));
586}
587
588static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
589{
590 int addr_width = agaw_to_width(domain->agaw);
591 struct dma_pte *parent, *pte = NULL;
592 int level = agaw_to_level(domain->agaw);
593 int offset;
594 unsigned long flags;
595
596 BUG_ON(!domain->pgd);
597
598 addr &= (((u64)1) << addr_width) - 1;
599 parent = domain->pgd;
600
601 spin_lock_irqsave(&domain->mapping_lock, flags);
602 while (level > 0) {
603 void *tmp_page;
604
605 offset = address_level_offset(addr, level);
606 pte = &parent[offset];
607 if (level == 1)
608 break;
609
19c239ce 610 if (!dma_pte_present(pte)) {
ba395927
KA
611 tmp_page = alloc_pgtable_page();
612
613 if (!tmp_page) {
614 spin_unlock_irqrestore(&domain->mapping_lock,
615 flags);
616 return NULL;
617 }
5331fe6f 618 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
19c239ce 619 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
620 /*
621 * high level table always sets r/w, last level page
622 * table control read/write
623 */
19c239ce
MM
624 dma_set_pte_readable(pte);
625 dma_set_pte_writable(pte);
5331fe6f 626 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 627 }
19c239ce 628 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
629 level--;
630 }
631
632 spin_unlock_irqrestore(&domain->mapping_lock, flags);
633 return pte;
634}
635
636/* return address's pte at specific level */
637static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
638 int level)
639{
640 struct dma_pte *parent, *pte = NULL;
641 int total = agaw_to_level(domain->agaw);
642 int offset;
643
644 parent = domain->pgd;
645 while (level <= total) {
646 offset = address_level_offset(addr, total);
647 pte = &parent[offset];
648 if (level == total)
649 return pte;
650
19c239ce 651 if (!dma_pte_present(pte))
ba395927 652 break;
19c239ce 653 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
654 total--;
655 }
656 return NULL;
657}
658
659/* clear one page's page table */
660static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
661{
662 struct dma_pte *pte = NULL;
663
664 /* get last level pte */
665 pte = dma_addr_level_pte(domain, addr, 1);
666
667 if (pte) {
19c239ce 668 dma_clear_pte(pte);
5331fe6f 669 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
670 }
671}
672
673/* clear last level pte, a tlb flush should be followed */
674static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
675{
676 int addr_width = agaw_to_width(domain->agaw);
677
678 start &= (((u64)1) << addr_width) - 1;
679 end &= (((u64)1) << addr_width) - 1;
680 /* in case it's partial page */
5b6985ce
FY
681 start = PAGE_ALIGN(start);
682 end &= PAGE_MASK;
ba395927
KA
683
684 /* we don't need lock here, nobody else touches the iova range */
685 while (start < end) {
686 dma_pte_clear_one(domain, start);
5b6985ce 687 start += VTD_PAGE_SIZE;
ba395927
KA
688 }
689}
690
691/* free page table pages. last level pte should already be cleared */
692static void dma_pte_free_pagetable(struct dmar_domain *domain,
693 u64 start, u64 end)
694{
695 int addr_width = agaw_to_width(domain->agaw);
696 struct dma_pte *pte;
697 int total = agaw_to_level(domain->agaw);
698 int level;
699 u64 tmp;
700
701 start &= (((u64)1) << addr_width) - 1;
702 end &= (((u64)1) << addr_width) - 1;
703
704 /* we don't need lock here, nobody else touches the iova range */
705 level = 2;
706 while (level <= total) {
707 tmp = align_to_level(start, level);
708 if (tmp >= end || (tmp + level_size(level) > end))
709 return;
710
711 while (tmp < end) {
712 pte = dma_addr_level_pte(domain, tmp, level);
713 if (pte) {
714 free_pgtable_page(
19c239ce
MM
715 phys_to_virt(dma_pte_addr(pte)));
716 dma_clear_pte(pte);
5331fe6f 717 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
718 }
719 tmp += level_size(level);
720 }
721 level++;
722 }
723 /* free pgd */
724 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
725 free_pgtable_page(domain->pgd);
726 domain->pgd = NULL;
727 }
728}
729
730/* iommu handling */
731static int iommu_alloc_root_entry(struct intel_iommu *iommu)
732{
733 struct root_entry *root;
734 unsigned long flags;
735
736 root = (struct root_entry *)alloc_pgtable_page();
737 if (!root)
738 return -ENOMEM;
739
5b6985ce 740 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
741
742 spin_lock_irqsave(&iommu->lock, flags);
743 iommu->root_entry = root;
744 spin_unlock_irqrestore(&iommu->lock, flags);
745
746 return 0;
747}
748
ba395927
KA
749static void iommu_set_root_entry(struct intel_iommu *iommu)
750{
751 void *addr;
752 u32 cmd, sts;
753 unsigned long flag;
754
755 addr = iommu->root_entry;
756
757 spin_lock_irqsave(&iommu->register_lock, flag);
758 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
759
760 cmd = iommu->gcmd | DMA_GCMD_SRTP;
761 writel(cmd, iommu->reg + DMAR_GCMD_REG);
762
763 /* Make sure hardware complete it */
764 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
765 readl, (sts & DMA_GSTS_RTPS), sts);
766
767 spin_unlock_irqrestore(&iommu->register_lock, flag);
768}
769
770static void iommu_flush_write_buffer(struct intel_iommu *iommu)
771{
772 u32 val;
773 unsigned long flag;
774
775 if (!cap_rwbf(iommu->cap))
776 return;
777 val = iommu->gcmd | DMA_GCMD_WBF;
778
779 spin_lock_irqsave(&iommu->register_lock, flag);
780 writel(val, iommu->reg + DMAR_GCMD_REG);
781
782 /* Make sure hardware complete it */
783 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
784 readl, (!(val & DMA_GSTS_WBFS)), val);
785
786 spin_unlock_irqrestore(&iommu->register_lock, flag);
787}
788
789/* return value determine if we need a write buffer flush */
790static int __iommu_flush_context(struct intel_iommu *iommu,
791 u16 did, u16 source_id, u8 function_mask, u64 type,
792 int non_present_entry_flush)
793{
794 u64 val = 0;
795 unsigned long flag;
796
797 /*
798 * In the non-present entry flush case, if hardware doesn't cache
799 * non-present entry we do nothing and if hardware cache non-present
800 * entry, we flush entries of domain 0 (the domain id is used to cache
801 * any non-present entries)
802 */
803 if (non_present_entry_flush) {
804 if (!cap_caching_mode(iommu->cap))
805 return 1;
806 else
807 did = 0;
808 }
809
810 switch (type) {
811 case DMA_CCMD_GLOBAL_INVL:
812 val = DMA_CCMD_GLOBAL_INVL;
813 break;
814 case DMA_CCMD_DOMAIN_INVL:
815 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
816 break;
817 case DMA_CCMD_DEVICE_INVL:
818 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
819 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
820 break;
821 default:
822 BUG();
823 }
824 val |= DMA_CCMD_ICC;
825
826 spin_lock_irqsave(&iommu->register_lock, flag);
827 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
828
829 /* Make sure hardware complete it */
830 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
831 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
832
833 spin_unlock_irqrestore(&iommu->register_lock, flag);
834
4d235ba6 835 /* flush context entry will implicitly flush write buffer */
ba395927
KA
836 return 0;
837}
838
ba395927
KA
839/* return value determine if we need a write buffer flush */
840static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
841 u64 addr, unsigned int size_order, u64 type,
842 int non_present_entry_flush)
843{
844 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
845 u64 val = 0, val_iva = 0;
846 unsigned long flag;
847
848 /*
849 * In the non-present entry flush case, if hardware doesn't cache
850 * non-present entry we do nothing and if hardware cache non-present
851 * entry, we flush entries of domain 0 (the domain id is used to cache
852 * any non-present entries)
853 */
854 if (non_present_entry_flush) {
855 if (!cap_caching_mode(iommu->cap))
856 return 1;
857 else
858 did = 0;
859 }
860
861 switch (type) {
862 case DMA_TLB_GLOBAL_FLUSH:
863 /* global flush doesn't need set IVA_REG */
864 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
865 break;
866 case DMA_TLB_DSI_FLUSH:
867 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
868 break;
869 case DMA_TLB_PSI_FLUSH:
870 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
871 /* Note: always flush non-leaf currently */
872 val_iva = size_order | addr;
873 break;
874 default:
875 BUG();
876 }
877 /* Note: set drain read/write */
878#if 0
879 /*
880 * This is probably to be super secure.. Looks like we can
881 * ignore it without any impact.
882 */
883 if (cap_read_drain(iommu->cap))
884 val |= DMA_TLB_READ_DRAIN;
885#endif
886 if (cap_write_drain(iommu->cap))
887 val |= DMA_TLB_WRITE_DRAIN;
888
889 spin_lock_irqsave(&iommu->register_lock, flag);
890 /* Note: Only uses first TLB reg currently */
891 if (val_iva)
892 dmar_writeq(iommu->reg + tlb_offset, val_iva);
893 dmar_writeq(iommu->reg + tlb_offset + 8, val);
894
895 /* Make sure hardware complete it */
896 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
897 dmar_readq, (!(val & DMA_TLB_IVT)), val);
898
899 spin_unlock_irqrestore(&iommu->register_lock, flag);
900
901 /* check IOTLB invalidation granularity */
902 if (DMA_TLB_IAIG(val) == 0)
903 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
904 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
905 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
906 (unsigned long long)DMA_TLB_IIRG(type),
907 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 908 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
909 return 0;
910}
911
ba395927
KA
912static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
913 u64 addr, unsigned int pages, int non_present_entry_flush)
914{
f76aec76 915 unsigned int mask;
ba395927 916
5b6985ce 917 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
918 BUG_ON(pages == 0);
919
920 /* Fallback to domain selective flush if no PSI support */
921 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
922 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
923 DMA_TLB_DSI_FLUSH,
924 non_present_entry_flush);
ba395927
KA
925
926 /*
927 * PSI requires page size to be 2 ^ x, and the base address is naturally
928 * aligned to the size
929 */
f76aec76 930 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 931 /* Fallback to domain selective flush if size is too big */
f76aec76 932 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
933 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
934 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 935
a77b67d4
YS
936 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
937 DMA_TLB_PSI_FLUSH,
938 non_present_entry_flush);
ba395927
KA
939}
940
f8bab735 941static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
942{
943 u32 pmen;
944 unsigned long flags;
945
946 spin_lock_irqsave(&iommu->register_lock, flags);
947 pmen = readl(iommu->reg + DMAR_PMEN_REG);
948 pmen &= ~DMA_PMEN_EPM;
949 writel(pmen, iommu->reg + DMAR_PMEN_REG);
950
951 /* wait for the protected region status bit to clear */
952 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
953 readl, !(pmen & DMA_PMEN_PRS), pmen);
954
955 spin_unlock_irqrestore(&iommu->register_lock, flags);
956}
957
ba395927
KA
958static int iommu_enable_translation(struct intel_iommu *iommu)
959{
960 u32 sts;
961 unsigned long flags;
962
963 spin_lock_irqsave(&iommu->register_lock, flags);
964 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
965
966 /* Make sure hardware complete it */
967 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
968 readl, (sts & DMA_GSTS_TES), sts);
969
970 iommu->gcmd |= DMA_GCMD_TE;
971 spin_unlock_irqrestore(&iommu->register_lock, flags);
972 return 0;
973}
974
975static int iommu_disable_translation(struct intel_iommu *iommu)
976{
977 u32 sts;
978 unsigned long flag;
979
980 spin_lock_irqsave(&iommu->register_lock, flag);
981 iommu->gcmd &= ~DMA_GCMD_TE;
982 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
983
984 /* Make sure hardware complete it */
985 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
986 readl, (!(sts & DMA_GSTS_TES)), sts);
987
988 spin_unlock_irqrestore(&iommu->register_lock, flag);
989 return 0;
990}
991
3460a6d9
KA
992/* iommu interrupt handling. Most stuff are MSI-like. */
993
d94afc6c 994static const char *fault_reason_strings[] =
3460a6d9
KA
995{
996 "Software",
997 "Present bit in root entry is clear",
998 "Present bit in context entry is clear",
999 "Invalid context entry",
1000 "Access beyond MGAW",
1001 "PTE Write access is not set",
1002 "PTE Read access is not set",
1003 "Next page table ptr is invalid",
1004 "Root table address invalid",
1005 "Context table ptr is invalid",
1006 "non-zero reserved fields in RTP",
1007 "non-zero reserved fields in CTP",
1008 "non-zero reserved fields in PTE",
3460a6d9 1009};
f8bab735 1010#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 1011
d94afc6c 1012const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 1013{
d94afc6c 1014 if (fault_reason > MAX_FAULT_REASON_IDX)
1015 return "Unknown";
3460a6d9
KA
1016 else
1017 return fault_reason_strings[fault_reason];
1018}
1019
1020void dmar_msi_unmask(unsigned int irq)
1021{
1022 struct intel_iommu *iommu = get_irq_data(irq);
1023 unsigned long flag;
1024
1025 /* unmask it */
1026 spin_lock_irqsave(&iommu->register_lock, flag);
1027 writel(0, iommu->reg + DMAR_FECTL_REG);
1028 /* Read a reg to force flush the post write */
1029 readl(iommu->reg + DMAR_FECTL_REG);
1030 spin_unlock_irqrestore(&iommu->register_lock, flag);
1031}
1032
1033void dmar_msi_mask(unsigned int irq)
1034{
1035 unsigned long flag;
1036 struct intel_iommu *iommu = get_irq_data(irq);
1037
1038 /* mask it */
1039 spin_lock_irqsave(&iommu->register_lock, flag);
1040 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1041 /* Read a reg to force flush the post write */
1042 readl(iommu->reg + DMAR_FECTL_REG);
1043 spin_unlock_irqrestore(&iommu->register_lock, flag);
1044}
1045
1046void dmar_msi_write(int irq, struct msi_msg *msg)
1047{
1048 struct intel_iommu *iommu = get_irq_data(irq);
1049 unsigned long flag;
1050
1051 spin_lock_irqsave(&iommu->register_lock, flag);
1052 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1053 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1054 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1055 spin_unlock_irqrestore(&iommu->register_lock, flag);
1056}
1057
1058void dmar_msi_read(int irq, struct msi_msg *msg)
1059{
1060 struct intel_iommu *iommu = get_irq_data(irq);
1061 unsigned long flag;
1062
1063 spin_lock_irqsave(&iommu->register_lock, flag);
1064 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1065 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1066 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1067 spin_unlock_irqrestore(&iommu->register_lock, flag);
1068}
1069
1070static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1071 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1072{
d94afc6c 1073 const char *reason;
3460a6d9
KA
1074
1075 reason = dmar_get_fault_reason(fault_reason);
1076
1077 printk(KERN_ERR
1078 "DMAR:[%s] Request device [%02x:%02x.%d] "
1079 "fault addr %llx \n"
1080 "DMAR:[fault reason %02d] %s\n",
1081 (type ? "DMA Read" : "DMA Write"),
1082 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1083 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1084 return 0;
1085}
1086
1087#define PRIMARY_FAULT_REG_LEN (16)
1088static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1089{
1090 struct intel_iommu *iommu = dev_id;
1091 int reg, fault_index;
1092 u32 fault_status;
1093 unsigned long flag;
1094
1095 spin_lock_irqsave(&iommu->register_lock, flag);
1096 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1097
1098 /* TBD: ignore advanced fault log currently */
1099 if (!(fault_status & DMA_FSTS_PPF))
1100 goto clear_overflow;
1101
1102 fault_index = dma_fsts_fault_record_index(fault_status);
1103 reg = cap_fault_reg_offset(iommu->cap);
1104 while (1) {
1105 u8 fault_reason;
1106 u16 source_id;
1107 u64 guest_addr;
1108 int type;
1109 u32 data;
1110
1111 /* highest 32 bits */
1112 data = readl(iommu->reg + reg +
1113 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1114 if (!(data & DMA_FRCD_F))
1115 break;
1116
1117 fault_reason = dma_frcd_fault_reason(data);
1118 type = dma_frcd_type(data);
1119
1120 data = readl(iommu->reg + reg +
1121 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1122 source_id = dma_frcd_source_id(data);
1123
1124 guest_addr = dmar_readq(iommu->reg + reg +
1125 fault_index * PRIMARY_FAULT_REG_LEN);
1126 guest_addr = dma_frcd_page_addr(guest_addr);
1127 /* clear the fault */
1128 writel(DMA_FRCD_F, iommu->reg + reg +
1129 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1130
1131 spin_unlock_irqrestore(&iommu->register_lock, flag);
1132
1133 iommu_page_fault_do_one(iommu, type, fault_reason,
1134 source_id, guest_addr);
1135
1136 fault_index++;
1137 if (fault_index > cap_num_fault_regs(iommu->cap))
1138 fault_index = 0;
1139 spin_lock_irqsave(&iommu->register_lock, flag);
1140 }
1141clear_overflow:
1142 /* clear primary fault overflow */
1143 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1144 if (fault_status & DMA_FSTS_PFO)
1145 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1146
1147 spin_unlock_irqrestore(&iommu->register_lock, flag);
1148 return IRQ_HANDLED;
1149}
1150
1151int dmar_set_interrupt(struct intel_iommu *iommu)
1152{
1153 int irq, ret;
1154
1155 irq = create_irq();
1156 if (!irq) {
1157 printk(KERN_ERR "IOMMU: no free vectors\n");
1158 return -EINVAL;
1159 }
1160
1161 set_irq_data(irq, iommu);
1162 iommu->irq = irq;
1163
1164 ret = arch_setup_dmar_msi(irq);
1165 if (ret) {
1166 set_irq_data(irq, NULL);
1167 iommu->irq = 0;
1168 destroy_irq(irq);
1169 return 0;
1170 }
1171
1172 /* Force fault register is cleared */
1173 iommu_page_fault(irq, iommu);
1174
1175 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1176 if (ret)
1177 printk(KERN_ERR "IOMMU: can't request irq\n");
1178 return ret;
1179}
1180
ba395927
KA
1181static int iommu_init_domains(struct intel_iommu *iommu)
1182{
1183 unsigned long ndomains;
1184 unsigned long nlongs;
1185
1186 ndomains = cap_ndoms(iommu->cap);
1187 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1188 nlongs = BITS_TO_LONGS(ndomains);
1189
1190 /* TBD: there might be 64K domains,
1191 * consider other allocation for future chip
1192 */
1193 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1194 if (!iommu->domain_ids) {
1195 printk(KERN_ERR "Allocating domain id array failed\n");
1196 return -ENOMEM;
1197 }
1198 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1199 GFP_KERNEL);
1200 if (!iommu->domains) {
1201 printk(KERN_ERR "Allocating domain array failed\n");
1202 kfree(iommu->domain_ids);
1203 return -ENOMEM;
1204 }
1205
e61d98d8
SS
1206 spin_lock_init(&iommu->lock);
1207
ba395927
KA
1208 /*
1209 * if Caching mode is set, then invalid translations are tagged
1210 * with domainid 0. Hence we need to pre-allocate it.
1211 */
1212 if (cap_caching_mode(iommu->cap))
1213 set_bit(0, iommu->domain_ids);
1214 return 0;
1215}
ba395927 1216
ba395927
KA
1217
1218static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1219static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1220
1221void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1222{
1223 struct dmar_domain *domain;
1224 int i;
c7151a8d 1225 unsigned long flags;
ba395927 1226
ba395927
KA
1227 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1228 for (; i < cap_ndoms(iommu->cap); ) {
1229 domain = iommu->domains[i];
1230 clear_bit(i, iommu->domain_ids);
c7151a8d
WH
1231
1232 spin_lock_irqsave(&domain->iommu_lock, flags);
5e98c4b1
WH
1233 if (--domain->iommu_count == 0) {
1234 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1235 vm_domain_exit(domain);
1236 else
1237 domain_exit(domain);
1238 }
c7151a8d
WH
1239 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1240
ba395927
KA
1241 i = find_next_bit(iommu->domain_ids,
1242 cap_ndoms(iommu->cap), i+1);
1243 }
1244
1245 if (iommu->gcmd & DMA_GCMD_TE)
1246 iommu_disable_translation(iommu);
1247
1248 if (iommu->irq) {
1249 set_irq_data(iommu->irq, NULL);
1250 /* This will mask the irq */
1251 free_irq(iommu->irq, iommu);
1252 destroy_irq(iommu->irq);
1253 }
1254
1255 kfree(iommu->domains);
1256 kfree(iommu->domain_ids);
1257
d9630fe9
WH
1258 g_iommus[iommu->seq_id] = NULL;
1259
1260 /* if all iommus are freed, free g_iommus */
1261 for (i = 0; i < g_num_of_iommus; i++) {
1262 if (g_iommus[i])
1263 break;
1264 }
1265
1266 if (i == g_num_of_iommus)
1267 kfree(g_iommus);
1268
ba395927
KA
1269 /* free context mapping */
1270 free_context_table(iommu);
ba395927
KA
1271}
1272
1273static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1274{
1275 unsigned long num;
1276 unsigned long ndomains;
1277 struct dmar_domain *domain;
1278 unsigned long flags;
1279
1280 domain = alloc_domain_mem();
1281 if (!domain)
1282 return NULL;
1283
1284 ndomains = cap_ndoms(iommu->cap);
1285
1286 spin_lock_irqsave(&iommu->lock, flags);
1287 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1288 if (num >= ndomains) {
1289 spin_unlock_irqrestore(&iommu->lock, flags);
1290 free_domain_mem(domain);
1291 printk(KERN_ERR "IOMMU: no free domain ids\n");
1292 return NULL;
1293 }
1294
1295 set_bit(num, iommu->domain_ids);
1296 domain->id = num;
8c11e798
WH
1297 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1298 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1299 domain->flags = 0;
ba395927
KA
1300 iommu->domains[num] = domain;
1301 spin_unlock_irqrestore(&iommu->lock, flags);
1302
1303 return domain;
1304}
1305
1306static void iommu_free_domain(struct dmar_domain *domain)
1307{
1308 unsigned long flags;
8c11e798
WH
1309 struct intel_iommu *iommu;
1310
1311 iommu = domain_get_iommu(domain);
ba395927 1312
8c11e798
WH
1313 spin_lock_irqsave(&iommu->lock, flags);
1314 clear_bit(domain->id, iommu->domain_ids);
1315 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1316}
1317
1318static struct iova_domain reserved_iova_list;
8a443df4
MG
1319static struct lock_class_key reserved_alloc_key;
1320static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1321
1322static void dmar_init_reserved_ranges(void)
1323{
1324 struct pci_dev *pdev = NULL;
1325 struct iova *iova;
1326 int i;
1327 u64 addr, size;
1328
f661197e 1329 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1330
8a443df4
MG
1331 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1332 &reserved_alloc_key);
1333 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1334 &reserved_rbtree_key);
1335
ba395927
KA
1336 /* IOAPIC ranges shouldn't be accessed by DMA */
1337 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1338 IOVA_PFN(IOAPIC_RANGE_END));
1339 if (!iova)
1340 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1341
1342 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1343 for_each_pci_dev(pdev) {
1344 struct resource *r;
1345
1346 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1347 r = &pdev->resource[i];
1348 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1349 continue;
1350 addr = r->start;
5b6985ce 1351 addr &= PAGE_MASK;
ba395927 1352 size = r->end - addr;
5b6985ce 1353 size = PAGE_ALIGN(size);
ba395927
KA
1354 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1355 IOVA_PFN(size + addr) - 1);
1356 if (!iova)
1357 printk(KERN_ERR "Reserve iova failed\n");
1358 }
1359 }
1360
1361}
1362
1363static void domain_reserve_special_ranges(struct dmar_domain *domain)
1364{
1365 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1366}
1367
1368static inline int guestwidth_to_adjustwidth(int gaw)
1369{
1370 int agaw;
1371 int r = (gaw - 12) % 9;
1372
1373 if (r == 0)
1374 agaw = gaw;
1375 else
1376 agaw = gaw + 9 - r;
1377 if (agaw > 64)
1378 agaw = 64;
1379 return agaw;
1380}
1381
1382static int domain_init(struct dmar_domain *domain, int guest_width)
1383{
1384 struct intel_iommu *iommu;
1385 int adjust_width, agaw;
1386 unsigned long sagaw;
1387
f661197e 1388 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927 1389 spin_lock_init(&domain->mapping_lock);
c7151a8d 1390 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1391
1392 domain_reserve_special_ranges(domain);
1393
1394 /* calculate AGAW */
8c11e798 1395 iommu = domain_get_iommu(domain);
ba395927
KA
1396 if (guest_width > cap_mgaw(iommu->cap))
1397 guest_width = cap_mgaw(iommu->cap);
1398 domain->gaw = guest_width;
1399 adjust_width = guestwidth_to_adjustwidth(guest_width);
1400 agaw = width_to_agaw(adjust_width);
1401 sagaw = cap_sagaw(iommu->cap);
1402 if (!test_bit(agaw, &sagaw)) {
1403 /* hardware doesn't support it, choose a bigger one */
1404 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1405 agaw = find_next_bit(&sagaw, 5, agaw);
1406 if (agaw >= 5)
1407 return -ENODEV;
1408 }
1409 domain->agaw = agaw;
1410 INIT_LIST_HEAD(&domain->devices);
1411
8e604097
WH
1412 if (ecap_coherent(iommu->ecap))
1413 domain->iommu_coherency = 1;
1414 else
1415 domain->iommu_coherency = 0;
1416
c7151a8d
WH
1417 domain->iommu_count = 1;
1418
ba395927
KA
1419 /* always allocate the top pgd */
1420 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1421 if (!domain->pgd)
1422 return -ENOMEM;
5b6985ce 1423 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1424 return 0;
1425}
1426
1427static void domain_exit(struct dmar_domain *domain)
1428{
1429 u64 end;
1430
1431 /* Domain 0 is reserved, so dont process it */
1432 if (!domain)
1433 return;
1434
1435 domain_remove_dev_info(domain);
1436 /* destroy iovas */
1437 put_iova_domain(&domain->iovad);
1438 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1439 end = end & (~PAGE_MASK);
ba395927
KA
1440
1441 /* clear ptes */
1442 dma_pte_clear_range(domain, 0, end);
1443
1444 /* free page tables */
1445 dma_pte_free_pagetable(domain, 0, end);
1446
1447 iommu_free_domain(domain);
1448 free_domain_mem(domain);
1449}
1450
1451static int domain_context_mapping_one(struct dmar_domain *domain,
1452 u8 bus, u8 devfn)
1453{
1454 struct context_entry *context;
ba395927 1455 unsigned long flags;
5331fe6f 1456 struct intel_iommu *iommu;
ba395927
KA
1457
1458 pr_debug("Set context mapping for %02x:%02x.%d\n",
1459 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1460 BUG_ON(!domain->pgd);
5331fe6f
WH
1461
1462 iommu = device_to_iommu(bus, devfn);
1463 if (!iommu)
1464 return -ENODEV;
1465
ba395927
KA
1466 context = device_to_context_entry(iommu, bus, devfn);
1467 if (!context)
1468 return -ENOMEM;
1469 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1470 if (context_present(context)) {
ba395927
KA
1471 spin_unlock_irqrestore(&iommu->lock, flags);
1472 return 0;
1473 }
1474
c07e7d21
MM
1475 context_set_domain_id(context, domain->id);
1476 context_set_address_width(context, domain->agaw);
1477 context_set_address_root(context, virt_to_phys(domain->pgd));
1478 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1479 context_set_fault_enable(context);
1480 context_set_present(context);
5331fe6f 1481 domain_flush_cache(domain, context, sizeof(*context));
ba395927
KA
1482
1483 /* it's a non-present to present mapping */
a77b67d4
YS
1484 if (iommu->flush.flush_context(iommu, domain->id,
1485 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1486 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1487 iommu_flush_write_buffer(iommu);
1488 else
a77b67d4
YS
1489 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1490
ba395927 1491 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1492
1493 spin_lock_irqsave(&domain->iommu_lock, flags);
1494 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1495 domain->iommu_count++;
1496 domain_update_iommu_coherency(domain);
1497 }
1498 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1499 return 0;
1500}
1501
1502static int
1503domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1504{
1505 int ret;
1506 struct pci_dev *tmp, *parent;
1507
1508 ret = domain_context_mapping_one(domain, pdev->bus->number,
1509 pdev->devfn);
1510 if (ret)
1511 return ret;
1512
1513 /* dependent device mapping */
1514 tmp = pci_find_upstream_pcie_bridge(pdev);
1515 if (!tmp)
1516 return 0;
1517 /* Secondary interface's bus number and devfn 0 */
1518 parent = pdev->bus->self;
1519 while (parent != tmp) {
1520 ret = domain_context_mapping_one(domain, parent->bus->number,
1521 parent->devfn);
1522 if (ret)
1523 return ret;
1524 parent = parent->bus->self;
1525 }
1526 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1527 return domain_context_mapping_one(domain,
1528 tmp->subordinate->number, 0);
1529 else /* this is a legacy PCI bridge */
1530 return domain_context_mapping_one(domain,
1531 tmp->bus->number, tmp->devfn);
1532}
1533
5331fe6f 1534static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1535{
1536 int ret;
1537 struct pci_dev *tmp, *parent;
5331fe6f
WH
1538 struct intel_iommu *iommu;
1539
1540 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1541 if (!iommu)
1542 return -ENODEV;
ba395927 1543
8c11e798 1544 ret = device_context_mapped(iommu,
ba395927
KA
1545 pdev->bus->number, pdev->devfn);
1546 if (!ret)
1547 return ret;
1548 /* dependent device mapping */
1549 tmp = pci_find_upstream_pcie_bridge(pdev);
1550 if (!tmp)
1551 return ret;
1552 /* Secondary interface's bus number and devfn 0 */
1553 parent = pdev->bus->self;
1554 while (parent != tmp) {
8c11e798 1555 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1556 parent->devfn);
1557 if (!ret)
1558 return ret;
1559 parent = parent->bus->self;
1560 }
1561 if (tmp->is_pcie)
8c11e798 1562 return device_context_mapped(iommu,
ba395927
KA
1563 tmp->subordinate->number, 0);
1564 else
8c11e798 1565 return device_context_mapped(iommu,
ba395927
KA
1566 tmp->bus->number, tmp->devfn);
1567}
1568
1569static int
1570domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1571 u64 hpa, size_t size, int prot)
1572{
1573 u64 start_pfn, end_pfn;
1574 struct dma_pte *pte;
1575 int index;
5b6985ce
FY
1576 int addr_width = agaw_to_width(domain->agaw);
1577
1578 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1579
1580 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1581 return -EINVAL;
5b6985ce
FY
1582 iova &= PAGE_MASK;
1583 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1584 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1585 index = 0;
1586 while (start_pfn < end_pfn) {
5b6985ce 1587 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1588 if (!pte)
1589 return -ENOMEM;
1590 /* We don't need lock here, nobody else
1591 * touches the iova range
1592 */
19c239ce
MM
1593 BUG_ON(dma_pte_addr(pte));
1594 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1595 dma_set_pte_prot(pte, prot);
5331fe6f 1596 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
1597 start_pfn++;
1598 index++;
1599 }
1600 return 0;
1601}
1602
c7151a8d 1603static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1604{
c7151a8d
WH
1605 if (!iommu)
1606 return;
8c11e798
WH
1607
1608 clear_context_table(iommu, bus, devfn);
1609 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1610 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1611 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1612 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1613}
1614
1615static void domain_remove_dev_info(struct dmar_domain *domain)
1616{
1617 struct device_domain_info *info;
1618 unsigned long flags;
c7151a8d 1619 struct intel_iommu *iommu;
ba395927
KA
1620
1621 spin_lock_irqsave(&device_domain_lock, flags);
1622 while (!list_empty(&domain->devices)) {
1623 info = list_entry(domain->devices.next,
1624 struct device_domain_info, link);
1625 list_del(&info->link);
1626 list_del(&info->global);
1627 if (info->dev)
358dd8ac 1628 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1629 spin_unlock_irqrestore(&device_domain_lock, flags);
1630
c7151a8d
WH
1631 iommu = device_to_iommu(info->bus, info->devfn);
1632 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1633 free_devinfo_mem(info);
1634
1635 spin_lock_irqsave(&device_domain_lock, flags);
1636 }
1637 spin_unlock_irqrestore(&device_domain_lock, flags);
1638}
1639
1640/*
1641 * find_domain
358dd8ac 1642 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1643 */
38717946 1644static struct dmar_domain *
ba395927
KA
1645find_domain(struct pci_dev *pdev)
1646{
1647 struct device_domain_info *info;
1648
1649 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1650 info = pdev->dev.archdata.iommu;
ba395927
KA
1651 if (info)
1652 return info->domain;
1653 return NULL;
1654}
1655
ba395927
KA
1656/* domain is initialized */
1657static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1658{
1659 struct dmar_domain *domain, *found = NULL;
1660 struct intel_iommu *iommu;
1661 struct dmar_drhd_unit *drhd;
1662 struct device_domain_info *info, *tmp;
1663 struct pci_dev *dev_tmp;
1664 unsigned long flags;
1665 int bus = 0, devfn = 0;
1666
1667 domain = find_domain(pdev);
1668 if (domain)
1669 return domain;
1670
1671 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1672 if (dev_tmp) {
1673 if (dev_tmp->is_pcie) {
1674 bus = dev_tmp->subordinate->number;
1675 devfn = 0;
1676 } else {
1677 bus = dev_tmp->bus->number;
1678 devfn = dev_tmp->devfn;
1679 }
1680 spin_lock_irqsave(&device_domain_lock, flags);
1681 list_for_each_entry(info, &device_domain_list, global) {
1682 if (info->bus == bus && info->devfn == devfn) {
1683 found = info->domain;
1684 break;
1685 }
1686 }
1687 spin_unlock_irqrestore(&device_domain_lock, flags);
1688 /* pcie-pci bridge already has a domain, uses it */
1689 if (found) {
1690 domain = found;
1691 goto found_domain;
1692 }
1693 }
1694
1695 /* Allocate new domain for the device */
1696 drhd = dmar_find_matched_drhd_unit(pdev);
1697 if (!drhd) {
1698 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1699 pci_name(pdev));
1700 return NULL;
1701 }
1702 iommu = drhd->iommu;
1703
1704 domain = iommu_alloc_domain(iommu);
1705 if (!domain)
1706 goto error;
1707
1708 if (domain_init(domain, gaw)) {
1709 domain_exit(domain);
1710 goto error;
1711 }
1712
1713 /* register pcie-to-pci device */
1714 if (dev_tmp) {
1715 info = alloc_devinfo_mem();
1716 if (!info) {
1717 domain_exit(domain);
1718 goto error;
1719 }
1720 info->bus = bus;
1721 info->devfn = devfn;
1722 info->dev = NULL;
1723 info->domain = domain;
1724 /* This domain is shared by devices under p2p bridge */
3b5410e7 1725 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1726
1727 /* pcie-to-pci bridge already has a domain, uses it */
1728 found = NULL;
1729 spin_lock_irqsave(&device_domain_lock, flags);
1730 list_for_each_entry(tmp, &device_domain_list, global) {
1731 if (tmp->bus == bus && tmp->devfn == devfn) {
1732 found = tmp->domain;
1733 break;
1734 }
1735 }
1736 if (found) {
1737 free_devinfo_mem(info);
1738 domain_exit(domain);
1739 domain = found;
1740 } else {
1741 list_add(&info->link, &domain->devices);
1742 list_add(&info->global, &device_domain_list);
1743 }
1744 spin_unlock_irqrestore(&device_domain_lock, flags);
1745 }
1746
1747found_domain:
1748 info = alloc_devinfo_mem();
1749 if (!info)
1750 goto error;
1751 info->bus = pdev->bus->number;
1752 info->devfn = pdev->devfn;
1753 info->dev = pdev;
1754 info->domain = domain;
1755 spin_lock_irqsave(&device_domain_lock, flags);
1756 /* somebody is fast */
1757 found = find_domain(pdev);
1758 if (found != NULL) {
1759 spin_unlock_irqrestore(&device_domain_lock, flags);
1760 if (found != domain) {
1761 domain_exit(domain);
1762 domain = found;
1763 }
1764 free_devinfo_mem(info);
1765 return domain;
1766 }
1767 list_add(&info->link, &domain->devices);
1768 list_add(&info->global, &device_domain_list);
358dd8ac 1769 pdev->dev.archdata.iommu = info;
ba395927
KA
1770 spin_unlock_irqrestore(&device_domain_lock, flags);
1771 return domain;
1772error:
1773 /* recheck it here, maybe others set it */
1774 return find_domain(pdev);
1775}
1776
5b6985ce
FY
1777static int iommu_prepare_identity_map(struct pci_dev *pdev,
1778 unsigned long long start,
1779 unsigned long long end)
ba395927
KA
1780{
1781 struct dmar_domain *domain;
1782 unsigned long size;
5b6985ce 1783 unsigned long long base;
ba395927
KA
1784 int ret;
1785
1786 printk(KERN_INFO
1787 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1788 pci_name(pdev), start, end);
1789 /* page table init */
1790 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1791 if (!domain)
1792 return -ENOMEM;
1793
1794 /* The address might not be aligned */
5b6985ce 1795 base = start & PAGE_MASK;
ba395927 1796 size = end - base;
5b6985ce 1797 size = PAGE_ALIGN(size);
ba395927
KA
1798 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1799 IOVA_PFN(base + size) - 1)) {
1800 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1801 ret = -ENOMEM;
1802 goto error;
1803 }
1804
1805 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1806 size, base, pci_name(pdev));
1807 /*
1808 * RMRR range might have overlap with physical memory range,
1809 * clear it first
1810 */
1811 dma_pte_clear_range(domain, base, base + size);
1812
1813 ret = domain_page_mapping(domain, base, base, size,
1814 DMA_PTE_READ|DMA_PTE_WRITE);
1815 if (ret)
1816 goto error;
1817
1818 /* context entry init */
1819 ret = domain_context_mapping(domain, pdev);
1820 if (!ret)
1821 return 0;
1822error:
1823 domain_exit(domain);
1824 return ret;
1825
1826}
1827
1828static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1829 struct pci_dev *pdev)
1830{
358dd8ac 1831 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1832 return 0;
1833 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1834 rmrr->end_address + 1);
1835}
1836
e820482c 1837#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1838struct iommu_prepare_data {
1839 struct pci_dev *pdev;
1840 int ret;
1841};
1842
1843static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1844 unsigned long end_pfn, void *datax)
1845{
1846 struct iommu_prepare_data *data;
1847
1848 data = (struct iommu_prepare_data *)datax;
1849
1850 data->ret = iommu_prepare_identity_map(data->pdev,
1851 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1852 return data->ret;
1853
1854}
1855
1856static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1857{
1858 int nid;
1859 struct iommu_prepare_data data;
1860
1861 data.pdev = pdev;
1862 data.ret = 0;
1863
1864 for_each_online_node(nid) {
1865 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1866 if (data.ret)
1867 return data.ret;
1868 }
1869 return data.ret;
1870}
1871
e820482c
KA
1872static void __init iommu_prepare_gfx_mapping(void)
1873{
1874 struct pci_dev *pdev = NULL;
e820482c
KA
1875 int ret;
1876
1877 for_each_pci_dev(pdev) {
358dd8ac 1878 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1879 !IS_GFX_DEVICE(pdev))
1880 continue;
1881 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1882 pci_name(pdev));
d52d53b8
YL
1883 ret = iommu_prepare_with_active_regions(pdev);
1884 if (ret)
1885 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1886 }
1887}
2abd7e16
MM
1888#else /* !CONFIG_DMAR_GFX_WA */
1889static inline void iommu_prepare_gfx_mapping(void)
1890{
1891 return;
1892}
e820482c
KA
1893#endif
1894
49a0429e
KA
1895#ifdef CONFIG_DMAR_FLOPPY_WA
1896static inline void iommu_prepare_isa(void)
1897{
1898 struct pci_dev *pdev;
1899 int ret;
1900
1901 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1902 if (!pdev)
1903 return;
1904
1905 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1906 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1907
1908 if (ret)
1909 printk("IOMMU: Failed to create 0-64M identity map, "
1910 "floppy might not work\n");
1911
1912}
1913#else
1914static inline void iommu_prepare_isa(void)
1915{
1916 return;
1917}
1918#endif /* !CONFIG_DMAR_FLPY_WA */
1919
519a0549 1920static int __init init_dmars(void)
ba395927
KA
1921{
1922 struct dmar_drhd_unit *drhd;
1923 struct dmar_rmrr_unit *rmrr;
1924 struct pci_dev *pdev;
1925 struct intel_iommu *iommu;
80b20dd8 1926 int i, ret, unit = 0;
ba395927
KA
1927
1928 /*
1929 * for each drhd
1930 * allocate root
1931 * initialize and program root entry to not present
1932 * endfor
1933 */
1934 for_each_drhd_unit(drhd) {
5e0d2a6f 1935 g_num_of_iommus++;
1936 /*
1937 * lock not needed as this is only incremented in the single
1938 * threaded kernel __init code path all other access are read
1939 * only
1940 */
1941 }
1942
d9630fe9
WH
1943 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1944 GFP_KERNEL);
1945 if (!g_iommus) {
1946 printk(KERN_ERR "Allocating global iommu array failed\n");
1947 ret = -ENOMEM;
1948 goto error;
1949 }
1950
80b20dd8 1951 deferred_flush = kzalloc(g_num_of_iommus *
1952 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1953 if (!deferred_flush) {
d9630fe9 1954 kfree(g_iommus);
5e0d2a6f 1955 ret = -ENOMEM;
1956 goto error;
1957 }
1958
5e0d2a6f 1959 for_each_drhd_unit(drhd) {
1960 if (drhd->ignored)
1961 continue;
1886e8a9
SS
1962
1963 iommu = drhd->iommu;
d9630fe9 1964 g_iommus[iommu->seq_id] = iommu;
ba395927 1965
e61d98d8
SS
1966 ret = iommu_init_domains(iommu);
1967 if (ret)
1968 goto error;
1969
ba395927
KA
1970 /*
1971 * TBD:
1972 * we could share the same root & context tables
1973 * amoung all IOMMU's. Need to Split it later.
1974 */
1975 ret = iommu_alloc_root_entry(iommu);
1976 if (ret) {
1977 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1978 goto error;
1979 }
1980 }
1981
a77b67d4
YS
1982 for_each_drhd_unit(drhd) {
1983 if (drhd->ignored)
1984 continue;
1985
1986 iommu = drhd->iommu;
1987 if (dmar_enable_qi(iommu)) {
1988 /*
1989 * Queued Invalidate not enabled, use Register Based
1990 * Invalidate
1991 */
1992 iommu->flush.flush_context = __iommu_flush_context;
1993 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1994 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1995 "invalidation\n",
1996 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1997 } else {
1998 iommu->flush.flush_context = qi_flush_context;
1999 iommu->flush.flush_iotlb = qi_flush_iotlb;
2000 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2001 "invalidation\n",
2002 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2003 }
2004 }
2005
ba395927
KA
2006 /*
2007 * For each rmrr
2008 * for each dev attached to rmrr
2009 * do
2010 * locate drhd for dev, alloc domain for dev
2011 * allocate free domain
2012 * allocate page table entries for rmrr
2013 * if context not allocated for bus
2014 * allocate and init context
2015 * set present in root table for this bus
2016 * init context with domain, translation etc
2017 * endfor
2018 * endfor
2019 */
2020 for_each_rmrr_units(rmrr) {
ba395927
KA
2021 for (i = 0; i < rmrr->devices_cnt; i++) {
2022 pdev = rmrr->devices[i];
2023 /* some BIOS lists non-exist devices in DMAR table */
2024 if (!pdev)
2025 continue;
2026 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2027 if (ret)
2028 printk(KERN_ERR
2029 "IOMMU: mapping reserved region failed\n");
2030 }
2031 }
2032
e820482c
KA
2033 iommu_prepare_gfx_mapping();
2034
49a0429e
KA
2035 iommu_prepare_isa();
2036
ba395927
KA
2037 /*
2038 * for each drhd
2039 * enable fault log
2040 * global invalidate context cache
2041 * global invalidate iotlb
2042 * enable translation
2043 */
2044 for_each_drhd_unit(drhd) {
2045 if (drhd->ignored)
2046 continue;
2047 iommu = drhd->iommu;
2048 sprintf (iommu->name, "dmar%d", unit++);
2049
2050 iommu_flush_write_buffer(iommu);
2051
3460a6d9
KA
2052 ret = dmar_set_interrupt(iommu);
2053 if (ret)
2054 goto error;
2055
ba395927
KA
2056 iommu_set_root_entry(iommu);
2057
a77b67d4
YS
2058 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2059 0);
2060 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2061 0);
f8bab735 2062 iommu_disable_protect_mem_regions(iommu);
2063
ba395927
KA
2064 ret = iommu_enable_translation(iommu);
2065 if (ret)
2066 goto error;
2067 }
2068
2069 return 0;
2070error:
2071 for_each_drhd_unit(drhd) {
2072 if (drhd->ignored)
2073 continue;
2074 iommu = drhd->iommu;
2075 free_iommu(iommu);
2076 }
d9630fe9 2077 kfree(g_iommus);
ba395927
KA
2078 return ret;
2079}
2080
2081static inline u64 aligned_size(u64 host_addr, size_t size)
2082{
2083 u64 addr;
5b6985ce
FY
2084 addr = (host_addr & (~PAGE_MASK)) + size;
2085 return PAGE_ALIGN(addr);
ba395927
KA
2086}
2087
2088struct iova *
f76aec76 2089iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2090{
ba395927
KA
2091 struct iova *piova;
2092
2093 /* Make sure it's in range */
ba395927 2094 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2095 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2096 return NULL;
2097
2098 piova = alloc_iova(&domain->iovad,
5b6985ce 2099 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2100 return piova;
2101}
2102
f76aec76
KA
2103static struct iova *
2104__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2105 size_t size, u64 dma_mask)
ba395927 2106{
ba395927 2107 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2108 struct iova *iova = NULL;
ba395927 2109
bb9e6d65
FT
2110 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2111 iova = iommu_alloc_iova(domain, size, dma_mask);
2112 else {
ba395927
KA
2113 /*
2114 * First try to allocate an io virtual address in
2115 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2116 * from higher range
ba395927 2117 */
f76aec76 2118 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2119 if (!iova)
bb9e6d65 2120 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2121 }
2122
2123 if (!iova) {
2124 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2125 return NULL;
2126 }
2127
2128 return iova;
2129}
2130
2131static struct dmar_domain *
2132get_valid_domain_for_dev(struct pci_dev *pdev)
2133{
2134 struct dmar_domain *domain;
2135 int ret;
2136
2137 domain = get_domain_for_dev(pdev,
2138 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2139 if (!domain) {
2140 printk(KERN_ERR
2141 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2142 return NULL;
ba395927
KA
2143 }
2144
2145 /* make sure context mapping is ok */
5331fe6f 2146 if (unlikely(!domain_context_mapped(pdev))) {
ba395927 2147 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2148 if (ret) {
2149 printk(KERN_ERR
2150 "Domain context map for %s failed",
2151 pci_name(pdev));
4fe05bbc 2152 return NULL;
f76aec76 2153 }
ba395927
KA
2154 }
2155
f76aec76
KA
2156 return domain;
2157}
2158
bb9e6d65
FT
2159static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2160 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2161{
2162 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2163 struct dmar_domain *domain;
5b6985ce 2164 phys_addr_t start_paddr;
f76aec76
KA
2165 struct iova *iova;
2166 int prot = 0;
6865f0d1 2167 int ret;
8c11e798 2168 struct intel_iommu *iommu;
f76aec76
KA
2169
2170 BUG_ON(dir == DMA_NONE);
358dd8ac 2171 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2172 return paddr;
f76aec76
KA
2173
2174 domain = get_valid_domain_for_dev(pdev);
2175 if (!domain)
2176 return 0;
2177
8c11e798 2178 iommu = domain_get_iommu(domain);
6865f0d1 2179 size = aligned_size((u64)paddr, size);
f76aec76 2180
bb9e6d65 2181 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2182 if (!iova)
2183 goto error;
2184
5b6985ce 2185 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2186
ba395927
KA
2187 /*
2188 * Check if DMAR supports zero-length reads on write only
2189 * mappings..
2190 */
2191 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2192 !cap_zlr(iommu->cap))
ba395927
KA
2193 prot |= DMA_PTE_READ;
2194 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2195 prot |= DMA_PTE_WRITE;
2196 /*
6865f0d1 2197 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2198 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2199 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2200 * is not a big problem
2201 */
6865f0d1 2202 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2203 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2204 if (ret)
2205 goto error;
2206
f76aec76 2207 /* it's a non-present to present mapping */
8c11e798 2208 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2209 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2210 if (ret)
8c11e798 2211 iommu_flush_write_buffer(iommu);
f76aec76 2212
5b6985ce 2213 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2214
ba395927 2215error:
f76aec76
KA
2216 if (iova)
2217 __free_iova(&domain->iovad, iova);
ba395927 2218 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2219 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2220 return 0;
2221}
2222
bb9e6d65
FT
2223dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2224 size_t size, int dir)
2225{
2226 return __intel_map_single(hwdev, paddr, size, dir,
2227 to_pci_dev(hwdev)->dma_mask);
2228}
2229
5e0d2a6f 2230static void flush_unmaps(void)
2231{
80b20dd8 2232 int i, j;
5e0d2a6f 2233
5e0d2a6f 2234 timer_on = 0;
2235
2236 /* just flush them all */
2237 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2238 struct intel_iommu *iommu = g_iommus[i];
2239 if (!iommu)
2240 continue;
c42d9f32 2241
a2bb8459 2242 if (deferred_flush[i].next) {
a77b67d4
YS
2243 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2244 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2245 for (j = 0; j < deferred_flush[i].next; j++) {
2246 __free_iova(&deferred_flush[i].domain[j]->iovad,
2247 deferred_flush[i].iova[j]);
2248 }
2249 deferred_flush[i].next = 0;
2250 }
5e0d2a6f 2251 }
2252
5e0d2a6f 2253 list_size = 0;
5e0d2a6f 2254}
2255
2256static void flush_unmaps_timeout(unsigned long data)
2257{
80b20dd8 2258 unsigned long flags;
2259
2260 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2261 flush_unmaps();
80b20dd8 2262 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2263}
2264
2265static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2266{
2267 unsigned long flags;
80b20dd8 2268 int next, iommu_id;
8c11e798 2269 struct intel_iommu *iommu;
5e0d2a6f 2270
2271 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2272 if (list_size == HIGH_WATER_MARK)
2273 flush_unmaps();
2274
8c11e798
WH
2275 iommu = domain_get_iommu(dom);
2276 iommu_id = iommu->seq_id;
c42d9f32 2277
80b20dd8 2278 next = deferred_flush[iommu_id].next;
2279 deferred_flush[iommu_id].domain[next] = dom;
2280 deferred_flush[iommu_id].iova[next] = iova;
2281 deferred_flush[iommu_id].next++;
5e0d2a6f 2282
2283 if (!timer_on) {
2284 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2285 timer_on = 1;
2286 }
2287 list_size++;
2288 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2289}
2290
5b6985ce
FY
2291void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2292 int dir)
ba395927 2293{
ba395927 2294 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2295 struct dmar_domain *domain;
2296 unsigned long start_addr;
ba395927 2297 struct iova *iova;
8c11e798 2298 struct intel_iommu *iommu;
ba395927 2299
358dd8ac 2300 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2301 return;
ba395927
KA
2302 domain = find_domain(pdev);
2303 BUG_ON(!domain);
2304
8c11e798
WH
2305 iommu = domain_get_iommu(domain);
2306
ba395927 2307 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2308 if (!iova)
ba395927 2309 return;
ba395927 2310
5b6985ce 2311 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2312 size = aligned_size((u64)dev_addr, size);
ba395927 2313
f76aec76 2314 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2315 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2316
f76aec76
KA
2317 /* clear the whole page */
2318 dma_pte_clear_range(domain, start_addr, start_addr + size);
2319 /* free page tables */
2320 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2321 if (intel_iommu_strict) {
8c11e798 2322 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2323 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2324 iommu_flush_write_buffer(iommu);
5e0d2a6f 2325 /* free iova */
2326 __free_iova(&domain->iovad, iova);
2327 } else {
2328 add_unmap(domain, iova);
2329 /*
2330 * queue up the release of the unmap to save the 1/6th of the
2331 * cpu used up by the iotlb flush operation...
2332 */
5e0d2a6f 2333 }
ba395927
KA
2334}
2335
5b6985ce
FY
2336void *intel_alloc_coherent(struct device *hwdev, size_t size,
2337 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2338{
2339 void *vaddr;
2340 int order;
2341
5b6985ce 2342 size = PAGE_ALIGN(size);
ba395927
KA
2343 order = get_order(size);
2344 flags &= ~(GFP_DMA | GFP_DMA32);
2345
2346 vaddr = (void *)__get_free_pages(flags, order);
2347 if (!vaddr)
2348 return NULL;
2349 memset(vaddr, 0, size);
2350
bb9e6d65
FT
2351 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2352 DMA_BIDIRECTIONAL,
2353 hwdev->coherent_dma_mask);
ba395927
KA
2354 if (*dma_handle)
2355 return vaddr;
2356 free_pages((unsigned long)vaddr, order);
2357 return NULL;
2358}
2359
5b6985ce
FY
2360void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2361 dma_addr_t dma_handle)
ba395927
KA
2362{
2363 int order;
2364
5b6985ce 2365 size = PAGE_ALIGN(size);
ba395927
KA
2366 order = get_order(size);
2367
2368 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2369 free_pages((unsigned long)vaddr, order);
2370}
2371
12d4d40e 2372#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2373
2374void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2375 int nelems, int dir)
ba395927
KA
2376{
2377 int i;
2378 struct pci_dev *pdev = to_pci_dev(hwdev);
2379 struct dmar_domain *domain;
f76aec76
KA
2380 unsigned long start_addr;
2381 struct iova *iova;
2382 size_t size = 0;
2383 void *addr;
c03ab37c 2384 struct scatterlist *sg;
8c11e798 2385 struct intel_iommu *iommu;
ba395927 2386
358dd8ac 2387 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2388 return;
2389
2390 domain = find_domain(pdev);
8c11e798
WH
2391 BUG_ON(!domain);
2392
2393 iommu = domain_get_iommu(domain);
ba395927 2394
c03ab37c 2395 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2396 if (!iova)
2397 return;
c03ab37c 2398 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2399 addr = SG_ENT_VIRT_ADDRESS(sg);
2400 size += aligned_size((u64)addr, sg->length);
2401 }
2402
5b6985ce 2403 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2404
2405 /* clear the whole page */
2406 dma_pte_clear_range(domain, start_addr, start_addr + size);
2407 /* free page tables */
2408 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2409
8c11e798 2410 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2411 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2412 iommu_flush_write_buffer(iommu);
f76aec76
KA
2413
2414 /* free iova */
2415 __free_iova(&domain->iovad, iova);
ba395927
KA
2416}
2417
ba395927 2418static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2419 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2420{
2421 int i;
c03ab37c 2422 struct scatterlist *sg;
ba395927 2423
c03ab37c 2424 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2425 BUG_ON(!sg_page(sg));
c03ab37c
FT
2426 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2427 sg->dma_length = sg->length;
ba395927
KA
2428 }
2429 return nelems;
2430}
2431
5b6985ce
FY
2432int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2433 int dir)
ba395927
KA
2434{
2435 void *addr;
2436 int i;
ba395927
KA
2437 struct pci_dev *pdev = to_pci_dev(hwdev);
2438 struct dmar_domain *domain;
f76aec76
KA
2439 size_t size = 0;
2440 int prot = 0;
2441 size_t offset = 0;
2442 struct iova *iova = NULL;
2443 int ret;
c03ab37c 2444 struct scatterlist *sg;
f76aec76 2445 unsigned long start_addr;
8c11e798 2446 struct intel_iommu *iommu;
ba395927
KA
2447
2448 BUG_ON(dir == DMA_NONE);
358dd8ac 2449 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2450 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2451
f76aec76
KA
2452 domain = get_valid_domain_for_dev(pdev);
2453 if (!domain)
2454 return 0;
2455
8c11e798
WH
2456 iommu = domain_get_iommu(domain);
2457
c03ab37c 2458 for_each_sg(sglist, sg, nelems, i) {
ba395927 2459 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2460 addr = (void *)virt_to_phys(addr);
2461 size += aligned_size((u64)addr, sg->length);
2462 }
2463
bb9e6d65 2464 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2465 if (!iova) {
c03ab37c 2466 sglist->dma_length = 0;
f76aec76
KA
2467 return 0;
2468 }
2469
2470 /*
2471 * Check if DMAR supports zero-length reads on write only
2472 * mappings..
2473 */
2474 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2475 !cap_zlr(iommu->cap))
f76aec76
KA
2476 prot |= DMA_PTE_READ;
2477 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2478 prot |= DMA_PTE_WRITE;
2479
5b6985ce 2480 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2481 offset = 0;
c03ab37c 2482 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2483 addr = SG_ENT_VIRT_ADDRESS(sg);
2484 addr = (void *)virt_to_phys(addr);
2485 size = aligned_size((u64)addr, sg->length);
2486 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2487 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2488 size, prot);
2489 if (ret) {
2490 /* clear the page */
2491 dma_pte_clear_range(domain, start_addr,
2492 start_addr + offset);
2493 /* free page tables */
2494 dma_pte_free_pagetable(domain, start_addr,
2495 start_addr + offset);
2496 /* free iova */
2497 __free_iova(&domain->iovad, iova);
ba395927
KA
2498 return 0;
2499 }
f76aec76 2500 sg->dma_address = start_addr + offset +
5b6985ce 2501 ((u64)addr & (~PAGE_MASK));
ba395927 2502 sg->dma_length = sg->length;
f76aec76 2503 offset += size;
ba395927
KA
2504 }
2505
ba395927 2506 /* it's a non-present to present mapping */
8c11e798 2507 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2508 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2509 iommu_flush_write_buffer(iommu);
ba395927
KA
2510 return nelems;
2511}
2512
2513static struct dma_mapping_ops intel_dma_ops = {
2514 .alloc_coherent = intel_alloc_coherent,
2515 .free_coherent = intel_free_coherent,
2516 .map_single = intel_map_single,
2517 .unmap_single = intel_unmap_single,
2518 .map_sg = intel_map_sg,
2519 .unmap_sg = intel_unmap_sg,
2520};
2521
2522static inline int iommu_domain_cache_init(void)
2523{
2524 int ret = 0;
2525
2526 iommu_domain_cache = kmem_cache_create("iommu_domain",
2527 sizeof(struct dmar_domain),
2528 0,
2529 SLAB_HWCACHE_ALIGN,
2530
2531 NULL);
2532 if (!iommu_domain_cache) {
2533 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2534 ret = -ENOMEM;
2535 }
2536
2537 return ret;
2538}
2539
2540static inline int iommu_devinfo_cache_init(void)
2541{
2542 int ret = 0;
2543
2544 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2545 sizeof(struct device_domain_info),
2546 0,
2547 SLAB_HWCACHE_ALIGN,
ba395927
KA
2548 NULL);
2549 if (!iommu_devinfo_cache) {
2550 printk(KERN_ERR "Couldn't create devinfo cache\n");
2551 ret = -ENOMEM;
2552 }
2553
2554 return ret;
2555}
2556
2557static inline int iommu_iova_cache_init(void)
2558{
2559 int ret = 0;
2560
2561 iommu_iova_cache = kmem_cache_create("iommu_iova",
2562 sizeof(struct iova),
2563 0,
2564 SLAB_HWCACHE_ALIGN,
ba395927
KA
2565 NULL);
2566 if (!iommu_iova_cache) {
2567 printk(KERN_ERR "Couldn't create iova cache\n");
2568 ret = -ENOMEM;
2569 }
2570
2571 return ret;
2572}
2573
2574static int __init iommu_init_mempool(void)
2575{
2576 int ret;
2577 ret = iommu_iova_cache_init();
2578 if (ret)
2579 return ret;
2580
2581 ret = iommu_domain_cache_init();
2582 if (ret)
2583 goto domain_error;
2584
2585 ret = iommu_devinfo_cache_init();
2586 if (!ret)
2587 return ret;
2588
2589 kmem_cache_destroy(iommu_domain_cache);
2590domain_error:
2591 kmem_cache_destroy(iommu_iova_cache);
2592
2593 return -ENOMEM;
2594}
2595
2596static void __init iommu_exit_mempool(void)
2597{
2598 kmem_cache_destroy(iommu_devinfo_cache);
2599 kmem_cache_destroy(iommu_domain_cache);
2600 kmem_cache_destroy(iommu_iova_cache);
2601
2602}
2603
ba395927
KA
2604static void __init init_no_remapping_devices(void)
2605{
2606 struct dmar_drhd_unit *drhd;
2607
2608 for_each_drhd_unit(drhd) {
2609 if (!drhd->include_all) {
2610 int i;
2611 for (i = 0; i < drhd->devices_cnt; i++)
2612 if (drhd->devices[i] != NULL)
2613 break;
2614 /* ignore DMAR unit if no pci devices exist */
2615 if (i == drhd->devices_cnt)
2616 drhd->ignored = 1;
2617 }
2618 }
2619
2620 if (dmar_map_gfx)
2621 return;
2622
2623 for_each_drhd_unit(drhd) {
2624 int i;
2625 if (drhd->ignored || drhd->include_all)
2626 continue;
2627
2628 for (i = 0; i < drhd->devices_cnt; i++)
2629 if (drhd->devices[i] &&
2630 !IS_GFX_DEVICE(drhd->devices[i]))
2631 break;
2632
2633 if (i < drhd->devices_cnt)
2634 continue;
2635
2636 /* bypass IOMMU if it is just for gfx devices */
2637 drhd->ignored = 1;
2638 for (i = 0; i < drhd->devices_cnt; i++) {
2639 if (!drhd->devices[i])
2640 continue;
358dd8ac 2641 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2642 }
2643 }
2644}
2645
2646int __init intel_iommu_init(void)
2647{
2648 int ret = 0;
2649
ba395927
KA
2650 if (dmar_table_init())
2651 return -ENODEV;
2652
1886e8a9
SS
2653 if (dmar_dev_scope_init())
2654 return -ENODEV;
2655
2ae21010
SS
2656 /*
2657 * Check the need for DMA-remapping initialization now.
2658 * Above initialization will also be used by Interrupt-remapping.
2659 */
2660 if (no_iommu || swiotlb || dmar_disabled)
2661 return -ENODEV;
2662
ba395927
KA
2663 iommu_init_mempool();
2664 dmar_init_reserved_ranges();
2665
2666 init_no_remapping_devices();
2667
2668 ret = init_dmars();
2669 if (ret) {
2670 printk(KERN_ERR "IOMMU: dmar init failed\n");
2671 put_iova_domain(&reserved_iova_list);
2672 iommu_exit_mempool();
2673 return ret;
2674 }
2675 printk(KERN_INFO
2676 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2677
5e0d2a6f 2678 init_timer(&unmap_timer);
ba395927
KA
2679 force_iommu = 1;
2680 dma_ops = &intel_dma_ops;
2681 return 0;
2682}
e820482c 2683
c7151a8d
WH
2684static int vm_domain_add_dev_info(struct dmar_domain *domain,
2685 struct pci_dev *pdev)
2686{
2687 struct device_domain_info *info;
2688 unsigned long flags;
2689
2690 info = alloc_devinfo_mem();
2691 if (!info)
2692 return -ENOMEM;
2693
2694 info->bus = pdev->bus->number;
2695 info->devfn = pdev->devfn;
2696 info->dev = pdev;
2697 info->domain = domain;
2698
2699 spin_lock_irqsave(&device_domain_lock, flags);
2700 list_add(&info->link, &domain->devices);
2701 list_add(&info->global, &device_domain_list);
2702 pdev->dev.archdata.iommu = info;
2703 spin_unlock_irqrestore(&device_domain_lock, flags);
2704
2705 return 0;
2706}
2707
2708static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2709 struct pci_dev *pdev)
2710{
2711 struct device_domain_info *info;
2712 struct intel_iommu *iommu;
2713 unsigned long flags;
2714 int found = 0;
2715 struct list_head *entry, *tmp;
2716
2717 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2718 if (!iommu)
2719 return;
2720
2721 spin_lock_irqsave(&device_domain_lock, flags);
2722 list_for_each_safe(entry, tmp, &domain->devices) {
2723 info = list_entry(entry, struct device_domain_info, link);
2724 if (info->bus == pdev->bus->number &&
2725 info->devfn == pdev->devfn) {
2726 list_del(&info->link);
2727 list_del(&info->global);
2728 if (info->dev)
2729 info->dev->dev.archdata.iommu = NULL;
2730 spin_unlock_irqrestore(&device_domain_lock, flags);
2731
2732 iommu_detach_dev(iommu, info->bus, info->devfn);
2733 free_devinfo_mem(info);
2734
2735 spin_lock_irqsave(&device_domain_lock, flags);
2736
2737 if (found)
2738 break;
2739 else
2740 continue;
2741 }
2742
2743 /* if there is no other devices under the same iommu
2744 * owned by this domain, clear this iommu in iommu_bmp
2745 * update iommu count and coherency
2746 */
2747 if (device_to_iommu(info->bus, info->devfn) == iommu)
2748 found = 1;
2749 }
2750
2751 if (found == 0) {
2752 unsigned long tmp_flags;
2753 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2754 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2755 domain->iommu_count--;
2756 domain_update_iommu_coherency(domain);
2757 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2758 }
2759
2760 spin_unlock_irqrestore(&device_domain_lock, flags);
2761}
2762
2763static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2764{
2765 struct device_domain_info *info;
2766 struct intel_iommu *iommu;
2767 unsigned long flags1, flags2;
2768
2769 spin_lock_irqsave(&device_domain_lock, flags1);
2770 while (!list_empty(&domain->devices)) {
2771 info = list_entry(domain->devices.next,
2772 struct device_domain_info, link);
2773 list_del(&info->link);
2774 list_del(&info->global);
2775 if (info->dev)
2776 info->dev->dev.archdata.iommu = NULL;
2777
2778 spin_unlock_irqrestore(&device_domain_lock, flags1);
2779
2780 iommu = device_to_iommu(info->bus, info->devfn);
2781 iommu_detach_dev(iommu, info->bus, info->devfn);
2782
2783 /* clear this iommu in iommu_bmp, update iommu count
2784 * and coherency
2785 */
2786 spin_lock_irqsave(&domain->iommu_lock, flags2);
2787 if (test_and_clear_bit(iommu->seq_id,
2788 &domain->iommu_bmp)) {
2789 domain->iommu_count--;
2790 domain_update_iommu_coherency(domain);
2791 }
2792 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2793
2794 free_devinfo_mem(info);
2795 spin_lock_irqsave(&device_domain_lock, flags1);
2796 }
2797 spin_unlock_irqrestore(&device_domain_lock, flags1);
2798}
2799
5e98c4b1
WH
2800/* domain id for virtual machine, it won't be set in context */
2801static unsigned long vm_domid;
2802
2803static struct dmar_domain *iommu_alloc_vm_domain(void)
2804{
2805 struct dmar_domain *domain;
2806
2807 domain = alloc_domain_mem();
2808 if (!domain)
2809 return NULL;
2810
2811 domain->id = vm_domid++;
2812 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2813 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2814
2815 return domain;
2816}
2817
2818static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2819{
2820 int adjust_width;
2821
2822 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2823 spin_lock_init(&domain->mapping_lock);
2824 spin_lock_init(&domain->iommu_lock);
2825
2826 domain_reserve_special_ranges(domain);
2827
2828 /* calculate AGAW */
2829 domain->gaw = guest_width;
2830 adjust_width = guestwidth_to_adjustwidth(guest_width);
2831 domain->agaw = width_to_agaw(adjust_width);
2832
2833 INIT_LIST_HEAD(&domain->devices);
2834
2835 domain->iommu_count = 0;
2836 domain->iommu_coherency = 0;
2837
2838 /* always allocate the top pgd */
2839 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2840 if (!domain->pgd)
2841 return -ENOMEM;
2842 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2843 return 0;
2844}
2845
2846static void iommu_free_vm_domain(struct dmar_domain *domain)
2847{
2848 unsigned long flags;
2849 struct dmar_drhd_unit *drhd;
2850 struct intel_iommu *iommu;
2851 unsigned long i;
2852 unsigned long ndomains;
2853
2854 for_each_drhd_unit(drhd) {
2855 if (drhd->ignored)
2856 continue;
2857 iommu = drhd->iommu;
2858
2859 ndomains = cap_ndoms(iommu->cap);
2860 i = find_first_bit(iommu->domain_ids, ndomains);
2861 for (; i < ndomains; ) {
2862 if (iommu->domains[i] == domain) {
2863 spin_lock_irqsave(&iommu->lock, flags);
2864 clear_bit(i, iommu->domain_ids);
2865 iommu->domains[i] = NULL;
2866 spin_unlock_irqrestore(&iommu->lock, flags);
2867 break;
2868 }
2869 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2870 }
2871 }
2872}
2873
2874static void vm_domain_exit(struct dmar_domain *domain)
2875{
2876 u64 end;
2877
2878 /* Domain 0 is reserved, so dont process it */
2879 if (!domain)
2880 return;
2881
2882 vm_domain_remove_all_dev_info(domain);
2883 /* destroy iovas */
2884 put_iova_domain(&domain->iovad);
2885 end = DOMAIN_MAX_ADDR(domain->gaw);
2886 end = end & (~VTD_PAGE_MASK);
2887
2888 /* clear ptes */
2889 dma_pte_clear_range(domain, 0, end);
2890
2891 /* free page tables */
2892 dma_pte_free_pagetable(domain, 0, end);
2893
2894 iommu_free_vm_domain(domain);
2895 free_domain_mem(domain);
2896}
2897
38717946
KA
2898void intel_iommu_domain_exit(struct dmar_domain *domain)
2899{
2900 u64 end;
2901
2902 /* Domain 0 is reserved, so dont process it */
2903 if (!domain)
2904 return;
2905
2906 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2907 end = end & (~VTD_PAGE_MASK);
38717946
KA
2908
2909 /* clear ptes */
2910 dma_pte_clear_range(domain, 0, end);
2911
2912 /* free page tables */
2913 dma_pte_free_pagetable(domain, 0, end);
2914
2915 iommu_free_domain(domain);
2916 free_domain_mem(domain);
2917}
2918EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2919
2920struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2921{
2922 struct dmar_drhd_unit *drhd;
2923 struct dmar_domain *domain;
2924 struct intel_iommu *iommu;
2925
2926 drhd = dmar_find_matched_drhd_unit(pdev);
2927 if (!drhd) {
2928 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2929 return NULL;
2930 }
2931
2932 iommu = drhd->iommu;
2933 if (!iommu) {
2934 printk(KERN_ERR
2935 "intel_iommu_domain_alloc: iommu == NULL\n");
2936 return NULL;
2937 }
2938 domain = iommu_alloc_domain(iommu);
2939 if (!domain) {
2940 printk(KERN_ERR
2941 "intel_iommu_domain_alloc: domain == NULL\n");
2942 return NULL;
2943 }
2944 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2945 printk(KERN_ERR
2946 "intel_iommu_domain_alloc: domain_init() failed\n");
2947 intel_iommu_domain_exit(domain);
2948 return NULL;
2949 }
2950 return domain;
2951}
2952EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2953
2954int intel_iommu_context_mapping(
2955 struct dmar_domain *domain, struct pci_dev *pdev)
2956{
2957 int rc;
2958 rc = domain_context_mapping(domain, pdev);
2959 return rc;
2960}
2961EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2962
2963int intel_iommu_page_mapping(
2964 struct dmar_domain *domain, dma_addr_t iova,
2965 u64 hpa, size_t size, int prot)
2966{
2967 int rc;
2968 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2969 return rc;
2970}
2971EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2972
2973void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2974{
c7151a8d
WH
2975 struct intel_iommu *iommu;
2976
2977 iommu = device_to_iommu(bus, devfn);
2978 iommu_detach_dev(iommu, bus, devfn);
38717946
KA
2979}
2980EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2981
2982struct dmar_domain *
2983intel_iommu_find_domain(struct pci_dev *pdev)
2984{
2985 return find_domain(pdev);
2986}
2987EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2988
2989int intel_iommu_found(void)
2990{
2991 return g_num_of_iommus;
2992}
2993EXPORT_SYMBOL_GPL(intel_iommu_found);
2994
2995u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2996{
2997 struct dma_pte *pte;
2998 u64 pfn;
2999
3000 pfn = 0;
3001 pte = addr_to_dma_pte(domain, iova);
3002
3003 if (pte)
19c239ce 3004 pfn = dma_pte_addr(pte);
38717946 3005
5b6985ce 3006 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
3007}
3008EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
This page took 0.409018 seconds and 5 git commands to generate.