intel-iommu: move context entry defs out from dma_remapping.h
[deliverable/linux.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
46b08e1a
MM
60/*
61 * 0: Present
62 * 1-11: Reserved
63 * 12-63: Context Ptr (12 - (haw-1))
64 * 64-127: Reserved
65 */
66struct root_entry {
67 u64 val;
68 u64 rsvd1;
69};
70#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71static inline bool root_present(struct root_entry *root)
72{
73 return (root->val & 1);
74}
75static inline void set_root_present(struct root_entry *root)
76{
77 root->val |= 1;
78}
79static inline void set_root_value(struct root_entry *root, unsigned long value)
80{
81 root->val |= value & VTD_PAGE_MASK;
82}
83
84static inline struct context_entry *
85get_context_addr_from_root(struct root_entry *root)
86{
87 return (struct context_entry *)
88 (root_present(root)?phys_to_virt(
89 root->val & VTD_PAGE_MASK) :
90 NULL);
91}
92
7a8fc25e
MM
93/*
94 * low 64 bits:
95 * 0: present
96 * 1: fault processing disable
97 * 2-3: translation type
98 * 12-63: address space root
99 * high 64 bits:
100 * 0-2: address width
101 * 3-6: aval
102 * 8-23: domain id
103 */
104struct context_entry {
105 u64 lo;
106 u64 hi;
107};
108#define context_present(c) ((c).lo & 1)
109#define context_fault_disable(c) (((c).lo >> 1) & 1)
110#define context_translation_type(c) (((c).lo >> 2) & 3)
111#define context_address_root(c) ((c).lo & VTD_PAGE_MASK)
112#define context_address_width(c) ((c).hi & 7)
113#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
114
115#define context_set_present(c) do {(c).lo |= 1;} while (0)
116#define context_set_fault_enable(c) \
117 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
118#define context_set_translation_type(c, val) \
119 do { \
120 (c).lo &= (((u64)-1) << 4) | 3; \
121 (c).lo |= ((val) & 3) << 2; \
122 } while (0)
123#define CONTEXT_TT_MULTI_LEVEL 0
124#define context_set_address_root(c, val) \
125 do {(c).lo |= (val) & VTD_PAGE_MASK; } while (0)
126#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
127#define context_set_domain_id(c, val) \
128 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
129#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
130
5e0d2a6f 131static void flush_unmaps_timeout(unsigned long data);
132
133DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
134
80b20dd8 135#define HIGH_WATER_MARK 250
136struct deferred_flush_tables {
137 int next;
138 struct iova *iova[HIGH_WATER_MARK];
139 struct dmar_domain *domain[HIGH_WATER_MARK];
140};
141
142static struct deferred_flush_tables *deferred_flush;
143
5e0d2a6f 144/* bitmap for indexing intel_iommus */
5e0d2a6f 145static int g_num_of_iommus;
146
147static DEFINE_SPINLOCK(async_umap_flush_lock);
148static LIST_HEAD(unmaps_to_do);
149
150static int timer_on;
151static long list_size;
5e0d2a6f 152
ba395927
KA
153static void domain_remove_dev_info(struct dmar_domain *domain);
154
2ae21010 155int dmar_disabled;
ba395927 156static int __initdata dmar_map_gfx = 1;
7d3b03ce 157static int dmar_forcedac;
5e0d2a6f 158static int intel_iommu_strict;
ba395927
KA
159
160#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
161static DEFINE_SPINLOCK(device_domain_lock);
162static LIST_HEAD(device_domain_list);
163
164static int __init intel_iommu_setup(char *str)
165{
166 if (!str)
167 return -EINVAL;
168 while (*str) {
169 if (!strncmp(str, "off", 3)) {
170 dmar_disabled = 1;
171 printk(KERN_INFO"Intel-IOMMU: disabled\n");
172 } else if (!strncmp(str, "igfx_off", 8)) {
173 dmar_map_gfx = 0;
174 printk(KERN_INFO
175 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 176 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 177 printk(KERN_INFO
7d3b03ce
KA
178 "Intel-IOMMU: Forcing DAC for PCI devices\n");
179 dmar_forcedac = 1;
5e0d2a6f 180 } else if (!strncmp(str, "strict", 6)) {
181 printk(KERN_INFO
182 "Intel-IOMMU: disable batched IOTLB flush\n");
183 intel_iommu_strict = 1;
ba395927
KA
184 }
185
186 str += strcspn(str, ",");
187 while (*str == ',')
188 str++;
189 }
190 return 0;
191}
192__setup("intel_iommu=", intel_iommu_setup);
193
194static struct kmem_cache *iommu_domain_cache;
195static struct kmem_cache *iommu_devinfo_cache;
196static struct kmem_cache *iommu_iova_cache;
197
eb3fa7cb
KA
198static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
199{
200 unsigned int flags;
201 void *vaddr;
202
203 /* trying to avoid low memory issues */
204 flags = current->flags & PF_MEMALLOC;
205 current->flags |= PF_MEMALLOC;
206 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
207 current->flags &= (~PF_MEMALLOC | flags);
208 return vaddr;
209}
210
211
ba395927
KA
212static inline void *alloc_pgtable_page(void)
213{
eb3fa7cb
KA
214 unsigned int flags;
215 void *vaddr;
216
217 /* trying to avoid low memory issues */
218 flags = current->flags & PF_MEMALLOC;
219 current->flags |= PF_MEMALLOC;
220 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
221 current->flags &= (~PF_MEMALLOC | flags);
222 return vaddr;
ba395927
KA
223}
224
225static inline void free_pgtable_page(void *vaddr)
226{
227 free_page((unsigned long)vaddr);
228}
229
230static inline void *alloc_domain_mem(void)
231{
eb3fa7cb 232 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
233}
234
38717946 235static void free_domain_mem(void *vaddr)
ba395927
KA
236{
237 kmem_cache_free(iommu_domain_cache, vaddr);
238}
239
240static inline void * alloc_devinfo_mem(void)
241{
eb3fa7cb 242 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
243}
244
245static inline void free_devinfo_mem(void *vaddr)
246{
247 kmem_cache_free(iommu_devinfo_cache, vaddr);
248}
249
250struct iova *alloc_iova_mem(void)
251{
eb3fa7cb 252 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
253}
254
255void free_iova_mem(struct iova *iova)
256{
257 kmem_cache_free(iommu_iova_cache, iova);
258}
259
ba395927
KA
260/* Gets context entry for a given bus and devfn */
261static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
262 u8 bus, u8 devfn)
263{
264 struct root_entry *root;
265 struct context_entry *context;
266 unsigned long phy_addr;
267 unsigned long flags;
268
269 spin_lock_irqsave(&iommu->lock, flags);
270 root = &iommu->root_entry[bus];
271 context = get_context_addr_from_root(root);
272 if (!context) {
273 context = (struct context_entry *)alloc_pgtable_page();
274 if (!context) {
275 spin_unlock_irqrestore(&iommu->lock, flags);
276 return NULL;
277 }
5b6985ce 278 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
279 phy_addr = virt_to_phys((void *)context);
280 set_root_value(root, phy_addr);
281 set_root_present(root);
282 __iommu_flush_cache(iommu, root, sizeof(*root));
283 }
284 spin_unlock_irqrestore(&iommu->lock, flags);
285 return &context[devfn];
286}
287
288static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
289{
290 struct root_entry *root;
291 struct context_entry *context;
292 int ret;
293 unsigned long flags;
294
295 spin_lock_irqsave(&iommu->lock, flags);
296 root = &iommu->root_entry[bus];
297 context = get_context_addr_from_root(root);
298 if (!context) {
299 ret = 0;
300 goto out;
301 }
302 ret = context_present(context[devfn]);
303out:
304 spin_unlock_irqrestore(&iommu->lock, flags);
305 return ret;
306}
307
308static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
309{
310 struct root_entry *root;
311 struct context_entry *context;
312 unsigned long flags;
313
314 spin_lock_irqsave(&iommu->lock, flags);
315 root = &iommu->root_entry[bus];
316 context = get_context_addr_from_root(root);
317 if (context) {
318 context_clear_entry(context[devfn]);
319 __iommu_flush_cache(iommu, &context[devfn], \
320 sizeof(*context));
321 }
322 spin_unlock_irqrestore(&iommu->lock, flags);
323}
324
325static void free_context_table(struct intel_iommu *iommu)
326{
327 struct root_entry *root;
328 int i;
329 unsigned long flags;
330 struct context_entry *context;
331
332 spin_lock_irqsave(&iommu->lock, flags);
333 if (!iommu->root_entry) {
334 goto out;
335 }
336 for (i = 0; i < ROOT_ENTRY_NR; i++) {
337 root = &iommu->root_entry[i];
338 context = get_context_addr_from_root(root);
339 if (context)
340 free_pgtable_page(context);
341 }
342 free_pgtable_page(iommu->root_entry);
343 iommu->root_entry = NULL;
344out:
345 spin_unlock_irqrestore(&iommu->lock, flags);
346}
347
348/* page table handling */
349#define LEVEL_STRIDE (9)
350#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
351
352static inline int agaw_to_level(int agaw)
353{
354 return agaw + 2;
355}
356
357static inline int agaw_to_width(int agaw)
358{
359 return 30 + agaw * LEVEL_STRIDE;
360
361}
362
363static inline int width_to_agaw(int width)
364{
365 return (width - 30) / LEVEL_STRIDE;
366}
367
368static inline unsigned int level_to_offset_bits(int level)
369{
370 return (12 + (level - 1) * LEVEL_STRIDE);
371}
372
373static inline int address_level_offset(u64 addr, int level)
374{
375 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
376}
377
378static inline u64 level_mask(int level)
379{
380 return ((u64)-1 << level_to_offset_bits(level));
381}
382
383static inline u64 level_size(int level)
384{
385 return ((u64)1 << level_to_offset_bits(level));
386}
387
388static inline u64 align_to_level(u64 addr, int level)
389{
390 return ((addr + level_size(level) - 1) & level_mask(level));
391}
392
393static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
394{
395 int addr_width = agaw_to_width(domain->agaw);
396 struct dma_pte *parent, *pte = NULL;
397 int level = agaw_to_level(domain->agaw);
398 int offset;
399 unsigned long flags;
400
401 BUG_ON(!domain->pgd);
402
403 addr &= (((u64)1) << addr_width) - 1;
404 parent = domain->pgd;
405
406 spin_lock_irqsave(&domain->mapping_lock, flags);
407 while (level > 0) {
408 void *tmp_page;
409
410 offset = address_level_offset(addr, level);
411 pte = &parent[offset];
412 if (level == 1)
413 break;
414
415 if (!dma_pte_present(*pte)) {
416 tmp_page = alloc_pgtable_page();
417
418 if (!tmp_page) {
419 spin_unlock_irqrestore(&domain->mapping_lock,
420 flags);
421 return NULL;
422 }
423 __iommu_flush_cache(domain->iommu, tmp_page,
5b6985ce 424 PAGE_SIZE);
ba395927
KA
425 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
426 /*
427 * high level table always sets r/w, last level page
428 * table control read/write
429 */
430 dma_set_pte_readable(*pte);
431 dma_set_pte_writable(*pte);
432 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
433 }
434 parent = phys_to_virt(dma_pte_addr(*pte));
435 level--;
436 }
437
438 spin_unlock_irqrestore(&domain->mapping_lock, flags);
439 return pte;
440}
441
442/* return address's pte at specific level */
443static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
444 int level)
445{
446 struct dma_pte *parent, *pte = NULL;
447 int total = agaw_to_level(domain->agaw);
448 int offset;
449
450 parent = domain->pgd;
451 while (level <= total) {
452 offset = address_level_offset(addr, total);
453 pte = &parent[offset];
454 if (level == total)
455 return pte;
456
457 if (!dma_pte_present(*pte))
458 break;
459 parent = phys_to_virt(dma_pte_addr(*pte));
460 total--;
461 }
462 return NULL;
463}
464
465/* clear one page's page table */
466static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
467{
468 struct dma_pte *pte = NULL;
469
470 /* get last level pte */
471 pte = dma_addr_level_pte(domain, addr, 1);
472
473 if (pte) {
474 dma_clear_pte(*pte);
475 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
476 }
477}
478
479/* clear last level pte, a tlb flush should be followed */
480static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
481{
482 int addr_width = agaw_to_width(domain->agaw);
483
484 start &= (((u64)1) << addr_width) - 1;
485 end &= (((u64)1) << addr_width) - 1;
486 /* in case it's partial page */
5b6985ce
FY
487 start = PAGE_ALIGN(start);
488 end &= PAGE_MASK;
ba395927
KA
489
490 /* we don't need lock here, nobody else touches the iova range */
491 while (start < end) {
492 dma_pte_clear_one(domain, start);
5b6985ce 493 start += VTD_PAGE_SIZE;
ba395927
KA
494 }
495}
496
497/* free page table pages. last level pte should already be cleared */
498static void dma_pte_free_pagetable(struct dmar_domain *domain,
499 u64 start, u64 end)
500{
501 int addr_width = agaw_to_width(domain->agaw);
502 struct dma_pte *pte;
503 int total = agaw_to_level(domain->agaw);
504 int level;
505 u64 tmp;
506
507 start &= (((u64)1) << addr_width) - 1;
508 end &= (((u64)1) << addr_width) - 1;
509
510 /* we don't need lock here, nobody else touches the iova range */
511 level = 2;
512 while (level <= total) {
513 tmp = align_to_level(start, level);
514 if (tmp >= end || (tmp + level_size(level) > end))
515 return;
516
517 while (tmp < end) {
518 pte = dma_addr_level_pte(domain, tmp, level);
519 if (pte) {
520 free_pgtable_page(
521 phys_to_virt(dma_pte_addr(*pte)));
522 dma_clear_pte(*pte);
523 __iommu_flush_cache(domain->iommu,
524 pte, sizeof(*pte));
525 }
526 tmp += level_size(level);
527 }
528 level++;
529 }
530 /* free pgd */
531 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
532 free_pgtable_page(domain->pgd);
533 domain->pgd = NULL;
534 }
535}
536
537/* iommu handling */
538static int iommu_alloc_root_entry(struct intel_iommu *iommu)
539{
540 struct root_entry *root;
541 unsigned long flags;
542
543 root = (struct root_entry *)alloc_pgtable_page();
544 if (!root)
545 return -ENOMEM;
546
5b6985ce 547 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
548
549 spin_lock_irqsave(&iommu->lock, flags);
550 iommu->root_entry = root;
551 spin_unlock_irqrestore(&iommu->lock, flags);
552
553 return 0;
554}
555
ba395927
KA
556static void iommu_set_root_entry(struct intel_iommu *iommu)
557{
558 void *addr;
559 u32 cmd, sts;
560 unsigned long flag;
561
562 addr = iommu->root_entry;
563
564 spin_lock_irqsave(&iommu->register_lock, flag);
565 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
566
567 cmd = iommu->gcmd | DMA_GCMD_SRTP;
568 writel(cmd, iommu->reg + DMAR_GCMD_REG);
569
570 /* Make sure hardware complete it */
571 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
572 readl, (sts & DMA_GSTS_RTPS), sts);
573
574 spin_unlock_irqrestore(&iommu->register_lock, flag);
575}
576
577static void iommu_flush_write_buffer(struct intel_iommu *iommu)
578{
579 u32 val;
580 unsigned long flag;
581
582 if (!cap_rwbf(iommu->cap))
583 return;
584 val = iommu->gcmd | DMA_GCMD_WBF;
585
586 spin_lock_irqsave(&iommu->register_lock, flag);
587 writel(val, iommu->reg + DMAR_GCMD_REG);
588
589 /* Make sure hardware complete it */
590 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
591 readl, (!(val & DMA_GSTS_WBFS)), val);
592
593 spin_unlock_irqrestore(&iommu->register_lock, flag);
594}
595
596/* return value determine if we need a write buffer flush */
597static int __iommu_flush_context(struct intel_iommu *iommu,
598 u16 did, u16 source_id, u8 function_mask, u64 type,
599 int non_present_entry_flush)
600{
601 u64 val = 0;
602 unsigned long flag;
603
604 /*
605 * In the non-present entry flush case, if hardware doesn't cache
606 * non-present entry we do nothing and if hardware cache non-present
607 * entry, we flush entries of domain 0 (the domain id is used to cache
608 * any non-present entries)
609 */
610 if (non_present_entry_flush) {
611 if (!cap_caching_mode(iommu->cap))
612 return 1;
613 else
614 did = 0;
615 }
616
617 switch (type) {
618 case DMA_CCMD_GLOBAL_INVL:
619 val = DMA_CCMD_GLOBAL_INVL;
620 break;
621 case DMA_CCMD_DOMAIN_INVL:
622 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
623 break;
624 case DMA_CCMD_DEVICE_INVL:
625 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
626 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
627 break;
628 default:
629 BUG();
630 }
631 val |= DMA_CCMD_ICC;
632
633 spin_lock_irqsave(&iommu->register_lock, flag);
634 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
635
636 /* Make sure hardware complete it */
637 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
638 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
639
640 spin_unlock_irqrestore(&iommu->register_lock, flag);
641
4d235ba6 642 /* flush context entry will implicitly flush write buffer */
ba395927
KA
643 return 0;
644}
645
ba395927
KA
646/* return value determine if we need a write buffer flush */
647static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
648 u64 addr, unsigned int size_order, u64 type,
649 int non_present_entry_flush)
650{
651 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
652 u64 val = 0, val_iva = 0;
653 unsigned long flag;
654
655 /*
656 * In the non-present entry flush case, if hardware doesn't cache
657 * non-present entry we do nothing and if hardware cache non-present
658 * entry, we flush entries of domain 0 (the domain id is used to cache
659 * any non-present entries)
660 */
661 if (non_present_entry_flush) {
662 if (!cap_caching_mode(iommu->cap))
663 return 1;
664 else
665 did = 0;
666 }
667
668 switch (type) {
669 case DMA_TLB_GLOBAL_FLUSH:
670 /* global flush doesn't need set IVA_REG */
671 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
672 break;
673 case DMA_TLB_DSI_FLUSH:
674 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
675 break;
676 case DMA_TLB_PSI_FLUSH:
677 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
678 /* Note: always flush non-leaf currently */
679 val_iva = size_order | addr;
680 break;
681 default:
682 BUG();
683 }
684 /* Note: set drain read/write */
685#if 0
686 /*
687 * This is probably to be super secure.. Looks like we can
688 * ignore it without any impact.
689 */
690 if (cap_read_drain(iommu->cap))
691 val |= DMA_TLB_READ_DRAIN;
692#endif
693 if (cap_write_drain(iommu->cap))
694 val |= DMA_TLB_WRITE_DRAIN;
695
696 spin_lock_irqsave(&iommu->register_lock, flag);
697 /* Note: Only uses first TLB reg currently */
698 if (val_iva)
699 dmar_writeq(iommu->reg + tlb_offset, val_iva);
700 dmar_writeq(iommu->reg + tlb_offset + 8, val);
701
702 /* Make sure hardware complete it */
703 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
704 dmar_readq, (!(val & DMA_TLB_IVT)), val);
705
706 spin_unlock_irqrestore(&iommu->register_lock, flag);
707
708 /* check IOTLB invalidation granularity */
709 if (DMA_TLB_IAIG(val) == 0)
710 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
711 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
712 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
713 (unsigned long long)DMA_TLB_IIRG(type),
714 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 715 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
716 return 0;
717}
718
ba395927
KA
719static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
720 u64 addr, unsigned int pages, int non_present_entry_flush)
721{
f76aec76 722 unsigned int mask;
ba395927 723
5b6985ce 724 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
725 BUG_ON(pages == 0);
726
727 /* Fallback to domain selective flush if no PSI support */
728 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
729 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
730 DMA_TLB_DSI_FLUSH,
731 non_present_entry_flush);
ba395927
KA
732
733 /*
734 * PSI requires page size to be 2 ^ x, and the base address is naturally
735 * aligned to the size
736 */
f76aec76 737 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 738 /* Fallback to domain selective flush if size is too big */
f76aec76 739 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
740 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
741 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 742
a77b67d4
YS
743 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
744 DMA_TLB_PSI_FLUSH,
745 non_present_entry_flush);
ba395927
KA
746}
747
f8bab735 748static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
749{
750 u32 pmen;
751 unsigned long flags;
752
753 spin_lock_irqsave(&iommu->register_lock, flags);
754 pmen = readl(iommu->reg + DMAR_PMEN_REG);
755 pmen &= ~DMA_PMEN_EPM;
756 writel(pmen, iommu->reg + DMAR_PMEN_REG);
757
758 /* wait for the protected region status bit to clear */
759 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
760 readl, !(pmen & DMA_PMEN_PRS), pmen);
761
762 spin_unlock_irqrestore(&iommu->register_lock, flags);
763}
764
ba395927
KA
765static int iommu_enable_translation(struct intel_iommu *iommu)
766{
767 u32 sts;
768 unsigned long flags;
769
770 spin_lock_irqsave(&iommu->register_lock, flags);
771 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
772
773 /* Make sure hardware complete it */
774 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
775 readl, (sts & DMA_GSTS_TES), sts);
776
777 iommu->gcmd |= DMA_GCMD_TE;
778 spin_unlock_irqrestore(&iommu->register_lock, flags);
779 return 0;
780}
781
782static int iommu_disable_translation(struct intel_iommu *iommu)
783{
784 u32 sts;
785 unsigned long flag;
786
787 spin_lock_irqsave(&iommu->register_lock, flag);
788 iommu->gcmd &= ~DMA_GCMD_TE;
789 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
790
791 /* Make sure hardware complete it */
792 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
793 readl, (!(sts & DMA_GSTS_TES)), sts);
794
795 spin_unlock_irqrestore(&iommu->register_lock, flag);
796 return 0;
797}
798
3460a6d9
KA
799/* iommu interrupt handling. Most stuff are MSI-like. */
800
d94afc6c 801static const char *fault_reason_strings[] =
3460a6d9
KA
802{
803 "Software",
804 "Present bit in root entry is clear",
805 "Present bit in context entry is clear",
806 "Invalid context entry",
807 "Access beyond MGAW",
808 "PTE Write access is not set",
809 "PTE Read access is not set",
810 "Next page table ptr is invalid",
811 "Root table address invalid",
812 "Context table ptr is invalid",
813 "non-zero reserved fields in RTP",
814 "non-zero reserved fields in CTP",
815 "non-zero reserved fields in PTE",
3460a6d9 816};
f8bab735 817#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 818
d94afc6c 819const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 820{
d94afc6c 821 if (fault_reason > MAX_FAULT_REASON_IDX)
822 return "Unknown";
3460a6d9
KA
823 else
824 return fault_reason_strings[fault_reason];
825}
826
827void dmar_msi_unmask(unsigned int irq)
828{
829 struct intel_iommu *iommu = get_irq_data(irq);
830 unsigned long flag;
831
832 /* unmask it */
833 spin_lock_irqsave(&iommu->register_lock, flag);
834 writel(0, iommu->reg + DMAR_FECTL_REG);
835 /* Read a reg to force flush the post write */
836 readl(iommu->reg + DMAR_FECTL_REG);
837 spin_unlock_irqrestore(&iommu->register_lock, flag);
838}
839
840void dmar_msi_mask(unsigned int irq)
841{
842 unsigned long flag;
843 struct intel_iommu *iommu = get_irq_data(irq);
844
845 /* mask it */
846 spin_lock_irqsave(&iommu->register_lock, flag);
847 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
848 /* Read a reg to force flush the post write */
849 readl(iommu->reg + DMAR_FECTL_REG);
850 spin_unlock_irqrestore(&iommu->register_lock, flag);
851}
852
853void dmar_msi_write(int irq, struct msi_msg *msg)
854{
855 struct intel_iommu *iommu = get_irq_data(irq);
856 unsigned long flag;
857
858 spin_lock_irqsave(&iommu->register_lock, flag);
859 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
860 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
861 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
862 spin_unlock_irqrestore(&iommu->register_lock, flag);
863}
864
865void dmar_msi_read(int irq, struct msi_msg *msg)
866{
867 struct intel_iommu *iommu = get_irq_data(irq);
868 unsigned long flag;
869
870 spin_lock_irqsave(&iommu->register_lock, flag);
871 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
872 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
873 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
874 spin_unlock_irqrestore(&iommu->register_lock, flag);
875}
876
877static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 878 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 879{
d94afc6c 880 const char *reason;
3460a6d9
KA
881
882 reason = dmar_get_fault_reason(fault_reason);
883
884 printk(KERN_ERR
885 "DMAR:[%s] Request device [%02x:%02x.%d] "
886 "fault addr %llx \n"
887 "DMAR:[fault reason %02d] %s\n",
888 (type ? "DMA Read" : "DMA Write"),
889 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
890 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
891 return 0;
892}
893
894#define PRIMARY_FAULT_REG_LEN (16)
895static irqreturn_t iommu_page_fault(int irq, void *dev_id)
896{
897 struct intel_iommu *iommu = dev_id;
898 int reg, fault_index;
899 u32 fault_status;
900 unsigned long flag;
901
902 spin_lock_irqsave(&iommu->register_lock, flag);
903 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
904
905 /* TBD: ignore advanced fault log currently */
906 if (!(fault_status & DMA_FSTS_PPF))
907 goto clear_overflow;
908
909 fault_index = dma_fsts_fault_record_index(fault_status);
910 reg = cap_fault_reg_offset(iommu->cap);
911 while (1) {
912 u8 fault_reason;
913 u16 source_id;
914 u64 guest_addr;
915 int type;
916 u32 data;
917
918 /* highest 32 bits */
919 data = readl(iommu->reg + reg +
920 fault_index * PRIMARY_FAULT_REG_LEN + 12);
921 if (!(data & DMA_FRCD_F))
922 break;
923
924 fault_reason = dma_frcd_fault_reason(data);
925 type = dma_frcd_type(data);
926
927 data = readl(iommu->reg + reg +
928 fault_index * PRIMARY_FAULT_REG_LEN + 8);
929 source_id = dma_frcd_source_id(data);
930
931 guest_addr = dmar_readq(iommu->reg + reg +
932 fault_index * PRIMARY_FAULT_REG_LEN);
933 guest_addr = dma_frcd_page_addr(guest_addr);
934 /* clear the fault */
935 writel(DMA_FRCD_F, iommu->reg + reg +
936 fault_index * PRIMARY_FAULT_REG_LEN + 12);
937
938 spin_unlock_irqrestore(&iommu->register_lock, flag);
939
940 iommu_page_fault_do_one(iommu, type, fault_reason,
941 source_id, guest_addr);
942
943 fault_index++;
944 if (fault_index > cap_num_fault_regs(iommu->cap))
945 fault_index = 0;
946 spin_lock_irqsave(&iommu->register_lock, flag);
947 }
948clear_overflow:
949 /* clear primary fault overflow */
950 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
951 if (fault_status & DMA_FSTS_PFO)
952 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
953
954 spin_unlock_irqrestore(&iommu->register_lock, flag);
955 return IRQ_HANDLED;
956}
957
958int dmar_set_interrupt(struct intel_iommu *iommu)
959{
960 int irq, ret;
961
962 irq = create_irq();
963 if (!irq) {
964 printk(KERN_ERR "IOMMU: no free vectors\n");
965 return -EINVAL;
966 }
967
968 set_irq_data(irq, iommu);
969 iommu->irq = irq;
970
971 ret = arch_setup_dmar_msi(irq);
972 if (ret) {
973 set_irq_data(irq, NULL);
974 iommu->irq = 0;
975 destroy_irq(irq);
976 return 0;
977 }
978
979 /* Force fault register is cleared */
980 iommu_page_fault(irq, iommu);
981
982 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
983 if (ret)
984 printk(KERN_ERR "IOMMU: can't request irq\n");
985 return ret;
986}
987
ba395927
KA
988static int iommu_init_domains(struct intel_iommu *iommu)
989{
990 unsigned long ndomains;
991 unsigned long nlongs;
992
993 ndomains = cap_ndoms(iommu->cap);
994 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
995 nlongs = BITS_TO_LONGS(ndomains);
996
997 /* TBD: there might be 64K domains,
998 * consider other allocation for future chip
999 */
1000 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1001 if (!iommu->domain_ids) {
1002 printk(KERN_ERR "Allocating domain id array failed\n");
1003 return -ENOMEM;
1004 }
1005 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1006 GFP_KERNEL);
1007 if (!iommu->domains) {
1008 printk(KERN_ERR "Allocating domain array failed\n");
1009 kfree(iommu->domain_ids);
1010 return -ENOMEM;
1011 }
1012
e61d98d8
SS
1013 spin_lock_init(&iommu->lock);
1014
ba395927
KA
1015 /*
1016 * if Caching mode is set, then invalid translations are tagged
1017 * with domainid 0. Hence we need to pre-allocate it.
1018 */
1019 if (cap_caching_mode(iommu->cap))
1020 set_bit(0, iommu->domain_ids);
1021 return 0;
1022}
ba395927 1023
ba395927
KA
1024
1025static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1026
1027void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1028{
1029 struct dmar_domain *domain;
1030 int i;
1031
ba395927
KA
1032 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1033 for (; i < cap_ndoms(iommu->cap); ) {
1034 domain = iommu->domains[i];
1035 clear_bit(i, iommu->domain_ids);
1036 domain_exit(domain);
1037 i = find_next_bit(iommu->domain_ids,
1038 cap_ndoms(iommu->cap), i+1);
1039 }
1040
1041 if (iommu->gcmd & DMA_GCMD_TE)
1042 iommu_disable_translation(iommu);
1043
1044 if (iommu->irq) {
1045 set_irq_data(iommu->irq, NULL);
1046 /* This will mask the irq */
1047 free_irq(iommu->irq, iommu);
1048 destroy_irq(iommu->irq);
1049 }
1050
1051 kfree(iommu->domains);
1052 kfree(iommu->domain_ids);
1053
1054 /* free context mapping */
1055 free_context_table(iommu);
ba395927
KA
1056}
1057
1058static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1059{
1060 unsigned long num;
1061 unsigned long ndomains;
1062 struct dmar_domain *domain;
1063 unsigned long flags;
1064
1065 domain = alloc_domain_mem();
1066 if (!domain)
1067 return NULL;
1068
1069 ndomains = cap_ndoms(iommu->cap);
1070
1071 spin_lock_irqsave(&iommu->lock, flags);
1072 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1073 if (num >= ndomains) {
1074 spin_unlock_irqrestore(&iommu->lock, flags);
1075 free_domain_mem(domain);
1076 printk(KERN_ERR "IOMMU: no free domain ids\n");
1077 return NULL;
1078 }
1079
1080 set_bit(num, iommu->domain_ids);
1081 domain->id = num;
1082 domain->iommu = iommu;
1083 iommu->domains[num] = domain;
1084 spin_unlock_irqrestore(&iommu->lock, flags);
1085
1086 return domain;
1087}
1088
1089static void iommu_free_domain(struct dmar_domain *domain)
1090{
1091 unsigned long flags;
1092
1093 spin_lock_irqsave(&domain->iommu->lock, flags);
1094 clear_bit(domain->id, domain->iommu->domain_ids);
1095 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1096}
1097
1098static struct iova_domain reserved_iova_list;
8a443df4
MG
1099static struct lock_class_key reserved_alloc_key;
1100static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1101
1102static void dmar_init_reserved_ranges(void)
1103{
1104 struct pci_dev *pdev = NULL;
1105 struct iova *iova;
1106 int i;
1107 u64 addr, size;
1108
f661197e 1109 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1110
8a443df4
MG
1111 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1112 &reserved_alloc_key);
1113 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1114 &reserved_rbtree_key);
1115
ba395927
KA
1116 /* IOAPIC ranges shouldn't be accessed by DMA */
1117 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1118 IOVA_PFN(IOAPIC_RANGE_END));
1119 if (!iova)
1120 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1121
1122 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1123 for_each_pci_dev(pdev) {
1124 struct resource *r;
1125
1126 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1127 r = &pdev->resource[i];
1128 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1129 continue;
1130 addr = r->start;
5b6985ce 1131 addr &= PAGE_MASK;
ba395927 1132 size = r->end - addr;
5b6985ce 1133 size = PAGE_ALIGN(size);
ba395927
KA
1134 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1135 IOVA_PFN(size + addr) - 1);
1136 if (!iova)
1137 printk(KERN_ERR "Reserve iova failed\n");
1138 }
1139 }
1140
1141}
1142
1143static void domain_reserve_special_ranges(struct dmar_domain *domain)
1144{
1145 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1146}
1147
1148static inline int guestwidth_to_adjustwidth(int gaw)
1149{
1150 int agaw;
1151 int r = (gaw - 12) % 9;
1152
1153 if (r == 0)
1154 agaw = gaw;
1155 else
1156 agaw = gaw + 9 - r;
1157 if (agaw > 64)
1158 agaw = 64;
1159 return agaw;
1160}
1161
1162static int domain_init(struct dmar_domain *domain, int guest_width)
1163{
1164 struct intel_iommu *iommu;
1165 int adjust_width, agaw;
1166 unsigned long sagaw;
1167
f661197e 1168 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1169 spin_lock_init(&domain->mapping_lock);
1170
1171 domain_reserve_special_ranges(domain);
1172
1173 /* calculate AGAW */
1174 iommu = domain->iommu;
1175 if (guest_width > cap_mgaw(iommu->cap))
1176 guest_width = cap_mgaw(iommu->cap);
1177 domain->gaw = guest_width;
1178 adjust_width = guestwidth_to_adjustwidth(guest_width);
1179 agaw = width_to_agaw(adjust_width);
1180 sagaw = cap_sagaw(iommu->cap);
1181 if (!test_bit(agaw, &sagaw)) {
1182 /* hardware doesn't support it, choose a bigger one */
1183 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1184 agaw = find_next_bit(&sagaw, 5, agaw);
1185 if (agaw >= 5)
1186 return -ENODEV;
1187 }
1188 domain->agaw = agaw;
1189 INIT_LIST_HEAD(&domain->devices);
1190
1191 /* always allocate the top pgd */
1192 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1193 if (!domain->pgd)
1194 return -ENOMEM;
5b6985ce 1195 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1196 return 0;
1197}
1198
1199static void domain_exit(struct dmar_domain *domain)
1200{
1201 u64 end;
1202
1203 /* Domain 0 is reserved, so dont process it */
1204 if (!domain)
1205 return;
1206
1207 domain_remove_dev_info(domain);
1208 /* destroy iovas */
1209 put_iova_domain(&domain->iovad);
1210 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1211 end = end & (~PAGE_MASK);
ba395927
KA
1212
1213 /* clear ptes */
1214 dma_pte_clear_range(domain, 0, end);
1215
1216 /* free page tables */
1217 dma_pte_free_pagetable(domain, 0, end);
1218
1219 iommu_free_domain(domain);
1220 free_domain_mem(domain);
1221}
1222
1223static int domain_context_mapping_one(struct dmar_domain *domain,
1224 u8 bus, u8 devfn)
1225{
1226 struct context_entry *context;
1227 struct intel_iommu *iommu = domain->iommu;
1228 unsigned long flags;
1229
1230 pr_debug("Set context mapping for %02x:%02x.%d\n",
1231 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1232 BUG_ON(!domain->pgd);
1233 context = device_to_context_entry(iommu, bus, devfn);
1234 if (!context)
1235 return -ENOMEM;
1236 spin_lock_irqsave(&iommu->lock, flags);
1237 if (context_present(*context)) {
1238 spin_unlock_irqrestore(&iommu->lock, flags);
1239 return 0;
1240 }
1241
1242 context_set_domain_id(*context, domain->id);
1243 context_set_address_width(*context, domain->agaw);
1244 context_set_address_root(*context, virt_to_phys(domain->pgd));
1245 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1246 context_set_fault_enable(*context);
1247 context_set_present(*context);
1248 __iommu_flush_cache(iommu, context, sizeof(*context));
1249
1250 /* it's a non-present to present mapping */
a77b67d4
YS
1251 if (iommu->flush.flush_context(iommu, domain->id,
1252 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1253 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1254 iommu_flush_write_buffer(iommu);
1255 else
a77b67d4
YS
1256 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1257
ba395927
KA
1258 spin_unlock_irqrestore(&iommu->lock, flags);
1259 return 0;
1260}
1261
1262static int
1263domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1264{
1265 int ret;
1266 struct pci_dev *tmp, *parent;
1267
1268 ret = domain_context_mapping_one(domain, pdev->bus->number,
1269 pdev->devfn);
1270 if (ret)
1271 return ret;
1272
1273 /* dependent device mapping */
1274 tmp = pci_find_upstream_pcie_bridge(pdev);
1275 if (!tmp)
1276 return 0;
1277 /* Secondary interface's bus number and devfn 0 */
1278 parent = pdev->bus->self;
1279 while (parent != tmp) {
1280 ret = domain_context_mapping_one(domain, parent->bus->number,
1281 parent->devfn);
1282 if (ret)
1283 return ret;
1284 parent = parent->bus->self;
1285 }
1286 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1287 return domain_context_mapping_one(domain,
1288 tmp->subordinate->number, 0);
1289 else /* this is a legacy PCI bridge */
1290 return domain_context_mapping_one(domain,
1291 tmp->bus->number, tmp->devfn);
1292}
1293
1294static int domain_context_mapped(struct dmar_domain *domain,
1295 struct pci_dev *pdev)
1296{
1297 int ret;
1298 struct pci_dev *tmp, *parent;
1299
1300 ret = device_context_mapped(domain->iommu,
1301 pdev->bus->number, pdev->devfn);
1302 if (!ret)
1303 return ret;
1304 /* dependent device mapping */
1305 tmp = pci_find_upstream_pcie_bridge(pdev);
1306 if (!tmp)
1307 return ret;
1308 /* Secondary interface's bus number and devfn 0 */
1309 parent = pdev->bus->self;
1310 while (parent != tmp) {
1311 ret = device_context_mapped(domain->iommu, parent->bus->number,
1312 parent->devfn);
1313 if (!ret)
1314 return ret;
1315 parent = parent->bus->self;
1316 }
1317 if (tmp->is_pcie)
1318 return device_context_mapped(domain->iommu,
1319 tmp->subordinate->number, 0);
1320 else
1321 return device_context_mapped(domain->iommu,
1322 tmp->bus->number, tmp->devfn);
1323}
1324
1325static int
1326domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1327 u64 hpa, size_t size, int prot)
1328{
1329 u64 start_pfn, end_pfn;
1330 struct dma_pte *pte;
1331 int index;
5b6985ce
FY
1332 int addr_width = agaw_to_width(domain->agaw);
1333
1334 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1335
1336 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1337 return -EINVAL;
5b6985ce
FY
1338 iova &= PAGE_MASK;
1339 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1340 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1341 index = 0;
1342 while (start_pfn < end_pfn) {
5b6985ce 1343 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1344 if (!pte)
1345 return -ENOMEM;
1346 /* We don't need lock here, nobody else
1347 * touches the iova range
1348 */
1349 BUG_ON(dma_pte_addr(*pte));
5b6985ce 1350 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
ba395927
KA
1351 dma_set_pte_prot(*pte, prot);
1352 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1353 start_pfn++;
1354 index++;
1355 }
1356 return 0;
1357}
1358
1359static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1360{
1361 clear_context_table(domain->iommu, bus, devfn);
a77b67d4
YS
1362 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1363 DMA_CCMD_GLOBAL_INVL, 0);
1364 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1365 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1366}
1367
1368static void domain_remove_dev_info(struct dmar_domain *domain)
1369{
1370 struct device_domain_info *info;
1371 unsigned long flags;
1372
1373 spin_lock_irqsave(&device_domain_lock, flags);
1374 while (!list_empty(&domain->devices)) {
1375 info = list_entry(domain->devices.next,
1376 struct device_domain_info, link);
1377 list_del(&info->link);
1378 list_del(&info->global);
1379 if (info->dev)
358dd8ac 1380 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1381 spin_unlock_irqrestore(&device_domain_lock, flags);
1382
1383 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1384 free_devinfo_mem(info);
1385
1386 spin_lock_irqsave(&device_domain_lock, flags);
1387 }
1388 spin_unlock_irqrestore(&device_domain_lock, flags);
1389}
1390
1391/*
1392 * find_domain
358dd8ac 1393 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1394 */
38717946 1395static struct dmar_domain *
ba395927
KA
1396find_domain(struct pci_dev *pdev)
1397{
1398 struct device_domain_info *info;
1399
1400 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1401 info = pdev->dev.archdata.iommu;
ba395927
KA
1402 if (info)
1403 return info->domain;
1404 return NULL;
1405}
1406
ba395927
KA
1407/* domain is initialized */
1408static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1409{
1410 struct dmar_domain *domain, *found = NULL;
1411 struct intel_iommu *iommu;
1412 struct dmar_drhd_unit *drhd;
1413 struct device_domain_info *info, *tmp;
1414 struct pci_dev *dev_tmp;
1415 unsigned long flags;
1416 int bus = 0, devfn = 0;
1417
1418 domain = find_domain(pdev);
1419 if (domain)
1420 return domain;
1421
1422 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1423 if (dev_tmp) {
1424 if (dev_tmp->is_pcie) {
1425 bus = dev_tmp->subordinate->number;
1426 devfn = 0;
1427 } else {
1428 bus = dev_tmp->bus->number;
1429 devfn = dev_tmp->devfn;
1430 }
1431 spin_lock_irqsave(&device_domain_lock, flags);
1432 list_for_each_entry(info, &device_domain_list, global) {
1433 if (info->bus == bus && info->devfn == devfn) {
1434 found = info->domain;
1435 break;
1436 }
1437 }
1438 spin_unlock_irqrestore(&device_domain_lock, flags);
1439 /* pcie-pci bridge already has a domain, uses it */
1440 if (found) {
1441 domain = found;
1442 goto found_domain;
1443 }
1444 }
1445
1446 /* Allocate new domain for the device */
1447 drhd = dmar_find_matched_drhd_unit(pdev);
1448 if (!drhd) {
1449 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1450 pci_name(pdev));
1451 return NULL;
1452 }
1453 iommu = drhd->iommu;
1454
1455 domain = iommu_alloc_domain(iommu);
1456 if (!domain)
1457 goto error;
1458
1459 if (domain_init(domain, gaw)) {
1460 domain_exit(domain);
1461 goto error;
1462 }
1463
1464 /* register pcie-to-pci device */
1465 if (dev_tmp) {
1466 info = alloc_devinfo_mem();
1467 if (!info) {
1468 domain_exit(domain);
1469 goto error;
1470 }
1471 info->bus = bus;
1472 info->devfn = devfn;
1473 info->dev = NULL;
1474 info->domain = domain;
1475 /* This domain is shared by devices under p2p bridge */
1476 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1477
1478 /* pcie-to-pci bridge already has a domain, uses it */
1479 found = NULL;
1480 spin_lock_irqsave(&device_domain_lock, flags);
1481 list_for_each_entry(tmp, &device_domain_list, global) {
1482 if (tmp->bus == bus && tmp->devfn == devfn) {
1483 found = tmp->domain;
1484 break;
1485 }
1486 }
1487 if (found) {
1488 free_devinfo_mem(info);
1489 domain_exit(domain);
1490 domain = found;
1491 } else {
1492 list_add(&info->link, &domain->devices);
1493 list_add(&info->global, &device_domain_list);
1494 }
1495 spin_unlock_irqrestore(&device_domain_lock, flags);
1496 }
1497
1498found_domain:
1499 info = alloc_devinfo_mem();
1500 if (!info)
1501 goto error;
1502 info->bus = pdev->bus->number;
1503 info->devfn = pdev->devfn;
1504 info->dev = pdev;
1505 info->domain = domain;
1506 spin_lock_irqsave(&device_domain_lock, flags);
1507 /* somebody is fast */
1508 found = find_domain(pdev);
1509 if (found != NULL) {
1510 spin_unlock_irqrestore(&device_domain_lock, flags);
1511 if (found != domain) {
1512 domain_exit(domain);
1513 domain = found;
1514 }
1515 free_devinfo_mem(info);
1516 return domain;
1517 }
1518 list_add(&info->link, &domain->devices);
1519 list_add(&info->global, &device_domain_list);
358dd8ac 1520 pdev->dev.archdata.iommu = info;
ba395927
KA
1521 spin_unlock_irqrestore(&device_domain_lock, flags);
1522 return domain;
1523error:
1524 /* recheck it here, maybe others set it */
1525 return find_domain(pdev);
1526}
1527
5b6985ce
FY
1528static int iommu_prepare_identity_map(struct pci_dev *pdev,
1529 unsigned long long start,
1530 unsigned long long end)
ba395927
KA
1531{
1532 struct dmar_domain *domain;
1533 unsigned long size;
5b6985ce 1534 unsigned long long base;
ba395927
KA
1535 int ret;
1536
1537 printk(KERN_INFO
1538 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1539 pci_name(pdev), start, end);
1540 /* page table init */
1541 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1542 if (!domain)
1543 return -ENOMEM;
1544
1545 /* The address might not be aligned */
5b6985ce 1546 base = start & PAGE_MASK;
ba395927 1547 size = end - base;
5b6985ce 1548 size = PAGE_ALIGN(size);
ba395927
KA
1549 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1550 IOVA_PFN(base + size) - 1)) {
1551 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1552 ret = -ENOMEM;
1553 goto error;
1554 }
1555
1556 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1557 size, base, pci_name(pdev));
1558 /*
1559 * RMRR range might have overlap with physical memory range,
1560 * clear it first
1561 */
1562 dma_pte_clear_range(domain, base, base + size);
1563
1564 ret = domain_page_mapping(domain, base, base, size,
1565 DMA_PTE_READ|DMA_PTE_WRITE);
1566 if (ret)
1567 goto error;
1568
1569 /* context entry init */
1570 ret = domain_context_mapping(domain, pdev);
1571 if (!ret)
1572 return 0;
1573error:
1574 domain_exit(domain);
1575 return ret;
1576
1577}
1578
1579static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1580 struct pci_dev *pdev)
1581{
358dd8ac 1582 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1583 return 0;
1584 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1585 rmrr->end_address + 1);
1586}
1587
e820482c 1588#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1589struct iommu_prepare_data {
1590 struct pci_dev *pdev;
1591 int ret;
1592};
1593
1594static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1595 unsigned long end_pfn, void *datax)
1596{
1597 struct iommu_prepare_data *data;
1598
1599 data = (struct iommu_prepare_data *)datax;
1600
1601 data->ret = iommu_prepare_identity_map(data->pdev,
1602 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1603 return data->ret;
1604
1605}
1606
1607static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1608{
1609 int nid;
1610 struct iommu_prepare_data data;
1611
1612 data.pdev = pdev;
1613 data.ret = 0;
1614
1615 for_each_online_node(nid) {
1616 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1617 if (data.ret)
1618 return data.ret;
1619 }
1620 return data.ret;
1621}
1622
e820482c
KA
1623static void __init iommu_prepare_gfx_mapping(void)
1624{
1625 struct pci_dev *pdev = NULL;
e820482c
KA
1626 int ret;
1627
1628 for_each_pci_dev(pdev) {
358dd8ac 1629 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1630 !IS_GFX_DEVICE(pdev))
1631 continue;
1632 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1633 pci_name(pdev));
d52d53b8
YL
1634 ret = iommu_prepare_with_active_regions(pdev);
1635 if (ret)
1636 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1637 }
1638}
1639#endif
1640
49a0429e
KA
1641#ifdef CONFIG_DMAR_FLOPPY_WA
1642static inline void iommu_prepare_isa(void)
1643{
1644 struct pci_dev *pdev;
1645 int ret;
1646
1647 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1648 if (!pdev)
1649 return;
1650
1651 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1652 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1653
1654 if (ret)
1655 printk("IOMMU: Failed to create 0-64M identity map, "
1656 "floppy might not work\n");
1657
1658}
1659#else
1660static inline void iommu_prepare_isa(void)
1661{
1662 return;
1663}
1664#endif /* !CONFIG_DMAR_FLPY_WA */
1665
519a0549 1666static int __init init_dmars(void)
ba395927
KA
1667{
1668 struct dmar_drhd_unit *drhd;
1669 struct dmar_rmrr_unit *rmrr;
1670 struct pci_dev *pdev;
1671 struct intel_iommu *iommu;
80b20dd8 1672 int i, ret, unit = 0;
ba395927
KA
1673
1674 /*
1675 * for each drhd
1676 * allocate root
1677 * initialize and program root entry to not present
1678 * endfor
1679 */
1680 for_each_drhd_unit(drhd) {
5e0d2a6f 1681 g_num_of_iommus++;
1682 /*
1683 * lock not needed as this is only incremented in the single
1684 * threaded kernel __init code path all other access are read
1685 * only
1686 */
1687 }
1688
80b20dd8 1689 deferred_flush = kzalloc(g_num_of_iommus *
1690 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1691 if (!deferred_flush) {
5e0d2a6f 1692 ret = -ENOMEM;
1693 goto error;
1694 }
1695
5e0d2a6f 1696 for_each_drhd_unit(drhd) {
1697 if (drhd->ignored)
1698 continue;
1886e8a9
SS
1699
1700 iommu = drhd->iommu;
ba395927 1701
e61d98d8
SS
1702 ret = iommu_init_domains(iommu);
1703 if (ret)
1704 goto error;
1705
ba395927
KA
1706 /*
1707 * TBD:
1708 * we could share the same root & context tables
1709 * amoung all IOMMU's. Need to Split it later.
1710 */
1711 ret = iommu_alloc_root_entry(iommu);
1712 if (ret) {
1713 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1714 goto error;
1715 }
1716 }
1717
a77b67d4
YS
1718 for_each_drhd_unit(drhd) {
1719 if (drhd->ignored)
1720 continue;
1721
1722 iommu = drhd->iommu;
1723 if (dmar_enable_qi(iommu)) {
1724 /*
1725 * Queued Invalidate not enabled, use Register Based
1726 * Invalidate
1727 */
1728 iommu->flush.flush_context = __iommu_flush_context;
1729 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1730 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1731 "invalidation\n",
1732 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1733 } else {
1734 iommu->flush.flush_context = qi_flush_context;
1735 iommu->flush.flush_iotlb = qi_flush_iotlb;
1736 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1737 "invalidation\n",
1738 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1739 }
1740 }
1741
ba395927
KA
1742 /*
1743 * For each rmrr
1744 * for each dev attached to rmrr
1745 * do
1746 * locate drhd for dev, alloc domain for dev
1747 * allocate free domain
1748 * allocate page table entries for rmrr
1749 * if context not allocated for bus
1750 * allocate and init context
1751 * set present in root table for this bus
1752 * init context with domain, translation etc
1753 * endfor
1754 * endfor
1755 */
1756 for_each_rmrr_units(rmrr) {
ba395927
KA
1757 for (i = 0; i < rmrr->devices_cnt; i++) {
1758 pdev = rmrr->devices[i];
1759 /* some BIOS lists non-exist devices in DMAR table */
1760 if (!pdev)
1761 continue;
1762 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1763 if (ret)
1764 printk(KERN_ERR
1765 "IOMMU: mapping reserved region failed\n");
1766 }
1767 }
1768
e820482c
KA
1769 iommu_prepare_gfx_mapping();
1770
49a0429e
KA
1771 iommu_prepare_isa();
1772
ba395927
KA
1773 /*
1774 * for each drhd
1775 * enable fault log
1776 * global invalidate context cache
1777 * global invalidate iotlb
1778 * enable translation
1779 */
1780 for_each_drhd_unit(drhd) {
1781 if (drhd->ignored)
1782 continue;
1783 iommu = drhd->iommu;
1784 sprintf (iommu->name, "dmar%d", unit++);
1785
1786 iommu_flush_write_buffer(iommu);
1787
3460a6d9
KA
1788 ret = dmar_set_interrupt(iommu);
1789 if (ret)
1790 goto error;
1791
ba395927
KA
1792 iommu_set_root_entry(iommu);
1793
a77b67d4
YS
1794 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1795 0);
1796 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1797 0);
f8bab735 1798 iommu_disable_protect_mem_regions(iommu);
1799
ba395927
KA
1800 ret = iommu_enable_translation(iommu);
1801 if (ret)
1802 goto error;
1803 }
1804
1805 return 0;
1806error:
1807 for_each_drhd_unit(drhd) {
1808 if (drhd->ignored)
1809 continue;
1810 iommu = drhd->iommu;
1811 free_iommu(iommu);
1812 }
1813 return ret;
1814}
1815
1816static inline u64 aligned_size(u64 host_addr, size_t size)
1817{
1818 u64 addr;
5b6985ce
FY
1819 addr = (host_addr & (~PAGE_MASK)) + size;
1820 return PAGE_ALIGN(addr);
ba395927
KA
1821}
1822
1823struct iova *
f76aec76 1824iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1825{
ba395927
KA
1826 struct iova *piova;
1827
1828 /* Make sure it's in range */
ba395927 1829 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1830 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1831 return NULL;
1832
1833 piova = alloc_iova(&domain->iovad,
5b6985ce 1834 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
1835 return piova;
1836}
1837
f76aec76
KA
1838static struct iova *
1839__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 1840 size_t size, u64 dma_mask)
ba395927 1841{
ba395927 1842 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1843 struct iova *iova = NULL;
ba395927 1844
bb9e6d65
FT
1845 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1846 iova = iommu_alloc_iova(domain, size, dma_mask);
1847 else {
ba395927
KA
1848 /*
1849 * First try to allocate an io virtual address in
1850 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1851 * from higher range
ba395927 1852 */
f76aec76 1853 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1854 if (!iova)
bb9e6d65 1855 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
1856 }
1857
1858 if (!iova) {
1859 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1860 return NULL;
1861 }
1862
1863 return iova;
1864}
1865
1866static struct dmar_domain *
1867get_valid_domain_for_dev(struct pci_dev *pdev)
1868{
1869 struct dmar_domain *domain;
1870 int ret;
1871
1872 domain = get_domain_for_dev(pdev,
1873 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1874 if (!domain) {
1875 printk(KERN_ERR
1876 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1877 return NULL;
ba395927
KA
1878 }
1879
1880 /* make sure context mapping is ok */
1881 if (unlikely(!domain_context_mapped(domain, pdev))) {
1882 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1883 if (ret) {
1884 printk(KERN_ERR
1885 "Domain context map for %s failed",
1886 pci_name(pdev));
4fe05bbc 1887 return NULL;
f76aec76 1888 }
ba395927
KA
1889 }
1890
f76aec76
KA
1891 return domain;
1892}
1893
bb9e6d65
FT
1894static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1895 size_t size, int dir, u64 dma_mask)
f76aec76
KA
1896{
1897 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 1898 struct dmar_domain *domain;
5b6985ce 1899 phys_addr_t start_paddr;
f76aec76
KA
1900 struct iova *iova;
1901 int prot = 0;
6865f0d1 1902 int ret;
f76aec76
KA
1903
1904 BUG_ON(dir == DMA_NONE);
358dd8ac 1905 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 1906 return paddr;
f76aec76
KA
1907
1908 domain = get_valid_domain_for_dev(pdev);
1909 if (!domain)
1910 return 0;
1911
6865f0d1 1912 size = aligned_size((u64)paddr, size);
f76aec76 1913
bb9e6d65 1914 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
1915 if (!iova)
1916 goto error;
1917
5b6985ce 1918 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 1919
ba395927
KA
1920 /*
1921 * Check if DMAR supports zero-length reads on write only
1922 * mappings..
1923 */
1924 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1925 !cap_zlr(domain->iommu->cap))
1926 prot |= DMA_PTE_READ;
1927 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1928 prot |= DMA_PTE_WRITE;
1929 /*
6865f0d1 1930 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 1931 * page. Note: if two part of one page are separately mapped, we
6865f0d1 1932 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
1933 * is not a big problem
1934 */
6865f0d1 1935 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 1936 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
1937 if (ret)
1938 goto error;
1939
f76aec76
KA
1940 /* it's a non-present to present mapping */
1941 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 1942 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76
KA
1943 if (ret)
1944 iommu_flush_write_buffer(domain->iommu);
1945
5b6985ce 1946 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 1947
ba395927 1948error:
f76aec76
KA
1949 if (iova)
1950 __free_iova(&domain->iovad, iova);
ba395927 1951 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 1952 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
1953 return 0;
1954}
1955
bb9e6d65
FT
1956dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1957 size_t size, int dir)
1958{
1959 return __intel_map_single(hwdev, paddr, size, dir,
1960 to_pci_dev(hwdev)->dma_mask);
1961}
1962
5e0d2a6f 1963static void flush_unmaps(void)
1964{
80b20dd8 1965 int i, j;
5e0d2a6f 1966
5e0d2a6f 1967 timer_on = 0;
1968
1969 /* just flush them all */
1970 for (i = 0; i < g_num_of_iommus; i++) {
80b20dd8 1971 if (deferred_flush[i].next) {
c42d9f32
SS
1972 struct intel_iommu *iommu =
1973 deferred_flush[i].domain[0]->iommu;
1974
a77b67d4
YS
1975 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1976 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 1977 for (j = 0; j < deferred_flush[i].next; j++) {
1978 __free_iova(&deferred_flush[i].domain[j]->iovad,
1979 deferred_flush[i].iova[j]);
1980 }
1981 deferred_flush[i].next = 0;
1982 }
5e0d2a6f 1983 }
1984
5e0d2a6f 1985 list_size = 0;
5e0d2a6f 1986}
1987
1988static void flush_unmaps_timeout(unsigned long data)
1989{
80b20dd8 1990 unsigned long flags;
1991
1992 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 1993 flush_unmaps();
80b20dd8 1994 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 1995}
1996
1997static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1998{
1999 unsigned long flags;
80b20dd8 2000 int next, iommu_id;
5e0d2a6f 2001
2002 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2003 if (list_size == HIGH_WATER_MARK)
2004 flush_unmaps();
2005
c42d9f32
SS
2006 iommu_id = dom->iommu->seq_id;
2007
80b20dd8 2008 next = deferred_flush[iommu_id].next;
2009 deferred_flush[iommu_id].domain[next] = dom;
2010 deferred_flush[iommu_id].iova[next] = iova;
2011 deferred_flush[iommu_id].next++;
5e0d2a6f 2012
2013 if (!timer_on) {
2014 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2015 timer_on = 1;
2016 }
2017 list_size++;
2018 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2019}
2020
5b6985ce
FY
2021void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2022 int dir)
ba395927 2023{
ba395927 2024 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2025 struct dmar_domain *domain;
2026 unsigned long start_addr;
ba395927
KA
2027 struct iova *iova;
2028
358dd8ac 2029 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2030 return;
ba395927
KA
2031 domain = find_domain(pdev);
2032 BUG_ON(!domain);
2033
2034 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2035 if (!iova)
ba395927 2036 return;
ba395927 2037
5b6985ce 2038 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2039 size = aligned_size((u64)dev_addr, size);
ba395927 2040
f76aec76 2041 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2042 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2043
f76aec76
KA
2044 /* clear the whole page */
2045 dma_pte_clear_range(domain, start_addr, start_addr + size);
2046 /* free page tables */
2047 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2048 if (intel_iommu_strict) {
2049 if (iommu_flush_iotlb_psi(domain->iommu,
5b6985ce 2050 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
5e0d2a6f 2051 iommu_flush_write_buffer(domain->iommu);
2052 /* free iova */
2053 __free_iova(&domain->iovad, iova);
2054 } else {
2055 add_unmap(domain, iova);
2056 /*
2057 * queue up the release of the unmap to save the 1/6th of the
2058 * cpu used up by the iotlb flush operation...
2059 */
5e0d2a6f 2060 }
ba395927
KA
2061}
2062
5b6985ce
FY
2063void *intel_alloc_coherent(struct device *hwdev, size_t size,
2064 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2065{
2066 void *vaddr;
2067 int order;
2068
5b6985ce 2069 size = PAGE_ALIGN(size);
ba395927
KA
2070 order = get_order(size);
2071 flags &= ~(GFP_DMA | GFP_DMA32);
2072
2073 vaddr = (void *)__get_free_pages(flags, order);
2074 if (!vaddr)
2075 return NULL;
2076 memset(vaddr, 0, size);
2077
bb9e6d65
FT
2078 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2079 DMA_BIDIRECTIONAL,
2080 hwdev->coherent_dma_mask);
ba395927
KA
2081 if (*dma_handle)
2082 return vaddr;
2083 free_pages((unsigned long)vaddr, order);
2084 return NULL;
2085}
2086
5b6985ce
FY
2087void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2088 dma_addr_t dma_handle)
ba395927
KA
2089{
2090 int order;
2091
5b6985ce 2092 size = PAGE_ALIGN(size);
ba395927
KA
2093 order = get_order(size);
2094
2095 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2096 free_pages((unsigned long)vaddr, order);
2097}
2098
12d4d40e 2099#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2100
2101void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2102 int nelems, int dir)
ba395927
KA
2103{
2104 int i;
2105 struct pci_dev *pdev = to_pci_dev(hwdev);
2106 struct dmar_domain *domain;
f76aec76
KA
2107 unsigned long start_addr;
2108 struct iova *iova;
2109 size_t size = 0;
2110 void *addr;
c03ab37c 2111 struct scatterlist *sg;
ba395927 2112
358dd8ac 2113 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2114 return;
2115
2116 domain = find_domain(pdev);
ba395927 2117
c03ab37c 2118 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2119 if (!iova)
2120 return;
c03ab37c 2121 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2122 addr = SG_ENT_VIRT_ADDRESS(sg);
2123 size += aligned_size((u64)addr, sg->length);
2124 }
2125
5b6985ce 2126 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2127
2128 /* clear the whole page */
2129 dma_pte_clear_range(domain, start_addr, start_addr + size);
2130 /* free page tables */
2131 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2132
2133 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
5b6985ce 2134 size >> VTD_PAGE_SHIFT, 0))
ba395927 2135 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2136
2137 /* free iova */
2138 __free_iova(&domain->iovad, iova);
ba395927
KA
2139}
2140
ba395927 2141static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2142 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2143{
2144 int i;
c03ab37c 2145 struct scatterlist *sg;
ba395927 2146
c03ab37c 2147 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2148 BUG_ON(!sg_page(sg));
c03ab37c
FT
2149 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2150 sg->dma_length = sg->length;
ba395927
KA
2151 }
2152 return nelems;
2153}
2154
5b6985ce
FY
2155int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2156 int dir)
ba395927
KA
2157{
2158 void *addr;
2159 int i;
ba395927
KA
2160 struct pci_dev *pdev = to_pci_dev(hwdev);
2161 struct dmar_domain *domain;
f76aec76
KA
2162 size_t size = 0;
2163 int prot = 0;
2164 size_t offset = 0;
2165 struct iova *iova = NULL;
2166 int ret;
c03ab37c 2167 struct scatterlist *sg;
f76aec76 2168 unsigned long start_addr;
ba395927
KA
2169
2170 BUG_ON(dir == DMA_NONE);
358dd8ac 2171 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2172 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2173
f76aec76
KA
2174 domain = get_valid_domain_for_dev(pdev);
2175 if (!domain)
2176 return 0;
2177
c03ab37c 2178 for_each_sg(sglist, sg, nelems, i) {
ba395927 2179 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2180 addr = (void *)virt_to_phys(addr);
2181 size += aligned_size((u64)addr, sg->length);
2182 }
2183
bb9e6d65 2184 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2185 if (!iova) {
c03ab37c 2186 sglist->dma_length = 0;
f76aec76
KA
2187 return 0;
2188 }
2189
2190 /*
2191 * Check if DMAR supports zero-length reads on write only
2192 * mappings..
2193 */
2194 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2195 !cap_zlr(domain->iommu->cap))
2196 prot |= DMA_PTE_READ;
2197 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2198 prot |= DMA_PTE_WRITE;
2199
5b6985ce 2200 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2201 offset = 0;
c03ab37c 2202 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2203 addr = SG_ENT_VIRT_ADDRESS(sg);
2204 addr = (void *)virt_to_phys(addr);
2205 size = aligned_size((u64)addr, sg->length);
2206 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2207 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2208 size, prot);
2209 if (ret) {
2210 /* clear the page */
2211 dma_pte_clear_range(domain, start_addr,
2212 start_addr + offset);
2213 /* free page tables */
2214 dma_pte_free_pagetable(domain, start_addr,
2215 start_addr + offset);
2216 /* free iova */
2217 __free_iova(&domain->iovad, iova);
ba395927
KA
2218 return 0;
2219 }
f76aec76 2220 sg->dma_address = start_addr + offset +
5b6985ce 2221 ((u64)addr & (~PAGE_MASK));
ba395927 2222 sg->dma_length = sg->length;
f76aec76 2223 offset += size;
ba395927
KA
2224 }
2225
ba395927 2226 /* it's a non-present to present mapping */
f76aec76 2227 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 2228 start_addr, offset >> VTD_PAGE_SHIFT, 1))
ba395927
KA
2229 iommu_flush_write_buffer(domain->iommu);
2230 return nelems;
2231}
2232
2233static struct dma_mapping_ops intel_dma_ops = {
2234 .alloc_coherent = intel_alloc_coherent,
2235 .free_coherent = intel_free_coherent,
2236 .map_single = intel_map_single,
2237 .unmap_single = intel_unmap_single,
2238 .map_sg = intel_map_sg,
2239 .unmap_sg = intel_unmap_sg,
2240};
2241
2242static inline int iommu_domain_cache_init(void)
2243{
2244 int ret = 0;
2245
2246 iommu_domain_cache = kmem_cache_create("iommu_domain",
2247 sizeof(struct dmar_domain),
2248 0,
2249 SLAB_HWCACHE_ALIGN,
2250
2251 NULL);
2252 if (!iommu_domain_cache) {
2253 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2254 ret = -ENOMEM;
2255 }
2256
2257 return ret;
2258}
2259
2260static inline int iommu_devinfo_cache_init(void)
2261{
2262 int ret = 0;
2263
2264 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2265 sizeof(struct device_domain_info),
2266 0,
2267 SLAB_HWCACHE_ALIGN,
ba395927
KA
2268 NULL);
2269 if (!iommu_devinfo_cache) {
2270 printk(KERN_ERR "Couldn't create devinfo cache\n");
2271 ret = -ENOMEM;
2272 }
2273
2274 return ret;
2275}
2276
2277static inline int iommu_iova_cache_init(void)
2278{
2279 int ret = 0;
2280
2281 iommu_iova_cache = kmem_cache_create("iommu_iova",
2282 sizeof(struct iova),
2283 0,
2284 SLAB_HWCACHE_ALIGN,
ba395927
KA
2285 NULL);
2286 if (!iommu_iova_cache) {
2287 printk(KERN_ERR "Couldn't create iova cache\n");
2288 ret = -ENOMEM;
2289 }
2290
2291 return ret;
2292}
2293
2294static int __init iommu_init_mempool(void)
2295{
2296 int ret;
2297 ret = iommu_iova_cache_init();
2298 if (ret)
2299 return ret;
2300
2301 ret = iommu_domain_cache_init();
2302 if (ret)
2303 goto domain_error;
2304
2305 ret = iommu_devinfo_cache_init();
2306 if (!ret)
2307 return ret;
2308
2309 kmem_cache_destroy(iommu_domain_cache);
2310domain_error:
2311 kmem_cache_destroy(iommu_iova_cache);
2312
2313 return -ENOMEM;
2314}
2315
2316static void __init iommu_exit_mempool(void)
2317{
2318 kmem_cache_destroy(iommu_devinfo_cache);
2319 kmem_cache_destroy(iommu_domain_cache);
2320 kmem_cache_destroy(iommu_iova_cache);
2321
2322}
2323
ba395927
KA
2324static void __init init_no_remapping_devices(void)
2325{
2326 struct dmar_drhd_unit *drhd;
2327
2328 for_each_drhd_unit(drhd) {
2329 if (!drhd->include_all) {
2330 int i;
2331 for (i = 0; i < drhd->devices_cnt; i++)
2332 if (drhd->devices[i] != NULL)
2333 break;
2334 /* ignore DMAR unit if no pci devices exist */
2335 if (i == drhd->devices_cnt)
2336 drhd->ignored = 1;
2337 }
2338 }
2339
2340 if (dmar_map_gfx)
2341 return;
2342
2343 for_each_drhd_unit(drhd) {
2344 int i;
2345 if (drhd->ignored || drhd->include_all)
2346 continue;
2347
2348 for (i = 0; i < drhd->devices_cnt; i++)
2349 if (drhd->devices[i] &&
2350 !IS_GFX_DEVICE(drhd->devices[i]))
2351 break;
2352
2353 if (i < drhd->devices_cnt)
2354 continue;
2355
2356 /* bypass IOMMU if it is just for gfx devices */
2357 drhd->ignored = 1;
2358 for (i = 0; i < drhd->devices_cnt; i++) {
2359 if (!drhd->devices[i])
2360 continue;
358dd8ac 2361 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2362 }
2363 }
2364}
2365
2366int __init intel_iommu_init(void)
2367{
2368 int ret = 0;
2369
ba395927
KA
2370 if (dmar_table_init())
2371 return -ENODEV;
2372
1886e8a9
SS
2373 if (dmar_dev_scope_init())
2374 return -ENODEV;
2375
2ae21010
SS
2376 /*
2377 * Check the need for DMA-remapping initialization now.
2378 * Above initialization will also be used by Interrupt-remapping.
2379 */
2380 if (no_iommu || swiotlb || dmar_disabled)
2381 return -ENODEV;
2382
ba395927
KA
2383 iommu_init_mempool();
2384 dmar_init_reserved_ranges();
2385
2386 init_no_remapping_devices();
2387
2388 ret = init_dmars();
2389 if (ret) {
2390 printk(KERN_ERR "IOMMU: dmar init failed\n");
2391 put_iova_domain(&reserved_iova_list);
2392 iommu_exit_mempool();
2393 return ret;
2394 }
2395 printk(KERN_INFO
2396 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2397
5e0d2a6f 2398 init_timer(&unmap_timer);
ba395927
KA
2399 force_iommu = 1;
2400 dma_ops = &intel_dma_ops;
2401 return 0;
2402}
e820482c 2403
38717946
KA
2404void intel_iommu_domain_exit(struct dmar_domain *domain)
2405{
2406 u64 end;
2407
2408 /* Domain 0 is reserved, so dont process it */
2409 if (!domain)
2410 return;
2411
2412 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2413 end = end & (~VTD_PAGE_MASK);
38717946
KA
2414
2415 /* clear ptes */
2416 dma_pte_clear_range(domain, 0, end);
2417
2418 /* free page tables */
2419 dma_pte_free_pagetable(domain, 0, end);
2420
2421 iommu_free_domain(domain);
2422 free_domain_mem(domain);
2423}
2424EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2425
2426struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2427{
2428 struct dmar_drhd_unit *drhd;
2429 struct dmar_domain *domain;
2430 struct intel_iommu *iommu;
2431
2432 drhd = dmar_find_matched_drhd_unit(pdev);
2433 if (!drhd) {
2434 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2435 return NULL;
2436 }
2437
2438 iommu = drhd->iommu;
2439 if (!iommu) {
2440 printk(KERN_ERR
2441 "intel_iommu_domain_alloc: iommu == NULL\n");
2442 return NULL;
2443 }
2444 domain = iommu_alloc_domain(iommu);
2445 if (!domain) {
2446 printk(KERN_ERR
2447 "intel_iommu_domain_alloc: domain == NULL\n");
2448 return NULL;
2449 }
2450 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2451 printk(KERN_ERR
2452 "intel_iommu_domain_alloc: domain_init() failed\n");
2453 intel_iommu_domain_exit(domain);
2454 return NULL;
2455 }
2456 return domain;
2457}
2458EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2459
2460int intel_iommu_context_mapping(
2461 struct dmar_domain *domain, struct pci_dev *pdev)
2462{
2463 int rc;
2464 rc = domain_context_mapping(domain, pdev);
2465 return rc;
2466}
2467EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2468
2469int intel_iommu_page_mapping(
2470 struct dmar_domain *domain, dma_addr_t iova,
2471 u64 hpa, size_t size, int prot)
2472{
2473 int rc;
2474 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2475 return rc;
2476}
2477EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2478
2479void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2480{
2481 detach_domain_for_dev(domain, bus, devfn);
2482}
2483EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2484
2485struct dmar_domain *
2486intel_iommu_find_domain(struct pci_dev *pdev)
2487{
2488 return find_domain(pdev);
2489}
2490EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2491
2492int intel_iommu_found(void)
2493{
2494 return g_num_of_iommus;
2495}
2496EXPORT_SYMBOL_GPL(intel_iommu_found);
2497
2498u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2499{
2500 struct dma_pte *pte;
2501 u64 pfn;
2502
2503 pfn = 0;
2504 pte = addr_to_dma_pte(domain, iova);
2505
2506 if (pte)
2507 pfn = dma_pte_addr(*pte);
2508
5b6985ce 2509 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2510}
2511EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
This page took 0.273815 seconds and 5 git commands to generate.