intel-iommu: make init_dmars() static
[deliverable/linux.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
5e0d2a6f 56
57static void flush_unmaps_timeout(unsigned long data);
58
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60
80b20dd8 61#define HIGH_WATER_MARK 250
62struct deferred_flush_tables {
63 int next;
64 struct iova *iova[HIGH_WATER_MARK];
65 struct dmar_domain *domain[HIGH_WATER_MARK];
66};
67
68static struct deferred_flush_tables *deferred_flush;
69
5e0d2a6f 70/* bitmap for indexing intel_iommus */
5e0d2a6f 71static int g_num_of_iommus;
72
73static DEFINE_SPINLOCK(async_umap_flush_lock);
74static LIST_HEAD(unmaps_to_do);
75
76static int timer_on;
77static long list_size;
5e0d2a6f 78
ba395927
KA
79static void domain_remove_dev_info(struct dmar_domain *domain);
80
2ae21010 81int dmar_disabled;
ba395927 82static int __initdata dmar_map_gfx = 1;
7d3b03ce 83static int dmar_forcedac;
5e0d2a6f 84static int intel_iommu_strict;
ba395927
KA
85
86#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
87static DEFINE_SPINLOCK(device_domain_lock);
88static LIST_HEAD(device_domain_list);
89
90static int __init intel_iommu_setup(char *str)
91{
92 if (!str)
93 return -EINVAL;
94 while (*str) {
95 if (!strncmp(str, "off", 3)) {
96 dmar_disabled = 1;
97 printk(KERN_INFO"Intel-IOMMU: disabled\n");
98 } else if (!strncmp(str, "igfx_off", 8)) {
99 dmar_map_gfx = 0;
100 printk(KERN_INFO
101 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 102 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 103 printk(KERN_INFO
7d3b03ce
KA
104 "Intel-IOMMU: Forcing DAC for PCI devices\n");
105 dmar_forcedac = 1;
5e0d2a6f 106 } else if (!strncmp(str, "strict", 6)) {
107 printk(KERN_INFO
108 "Intel-IOMMU: disable batched IOTLB flush\n");
109 intel_iommu_strict = 1;
ba395927
KA
110 }
111
112 str += strcspn(str, ",");
113 while (*str == ',')
114 str++;
115 }
116 return 0;
117}
118__setup("intel_iommu=", intel_iommu_setup);
119
120static struct kmem_cache *iommu_domain_cache;
121static struct kmem_cache *iommu_devinfo_cache;
122static struct kmem_cache *iommu_iova_cache;
123
eb3fa7cb
KA
124static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
125{
126 unsigned int flags;
127 void *vaddr;
128
129 /* trying to avoid low memory issues */
130 flags = current->flags & PF_MEMALLOC;
131 current->flags |= PF_MEMALLOC;
132 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
133 current->flags &= (~PF_MEMALLOC | flags);
134 return vaddr;
135}
136
137
ba395927
KA
138static inline void *alloc_pgtable_page(void)
139{
eb3fa7cb
KA
140 unsigned int flags;
141 void *vaddr;
142
143 /* trying to avoid low memory issues */
144 flags = current->flags & PF_MEMALLOC;
145 current->flags |= PF_MEMALLOC;
146 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
147 current->flags &= (~PF_MEMALLOC | flags);
148 return vaddr;
ba395927
KA
149}
150
151static inline void free_pgtable_page(void *vaddr)
152{
153 free_page((unsigned long)vaddr);
154}
155
156static inline void *alloc_domain_mem(void)
157{
eb3fa7cb 158 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
159}
160
38717946 161static void free_domain_mem(void *vaddr)
ba395927
KA
162{
163 kmem_cache_free(iommu_domain_cache, vaddr);
164}
165
166static inline void * alloc_devinfo_mem(void)
167{
eb3fa7cb 168 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
169}
170
171static inline void free_devinfo_mem(void *vaddr)
172{
173 kmem_cache_free(iommu_devinfo_cache, vaddr);
174}
175
176struct iova *alloc_iova_mem(void)
177{
eb3fa7cb 178 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
179}
180
181void free_iova_mem(struct iova *iova)
182{
183 kmem_cache_free(iommu_iova_cache, iova);
184}
185
ba395927
KA
186/* Gets context entry for a given bus and devfn */
187static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
188 u8 bus, u8 devfn)
189{
190 struct root_entry *root;
191 struct context_entry *context;
192 unsigned long phy_addr;
193 unsigned long flags;
194
195 spin_lock_irqsave(&iommu->lock, flags);
196 root = &iommu->root_entry[bus];
197 context = get_context_addr_from_root(root);
198 if (!context) {
199 context = (struct context_entry *)alloc_pgtable_page();
200 if (!context) {
201 spin_unlock_irqrestore(&iommu->lock, flags);
202 return NULL;
203 }
5b6985ce 204 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
205 phy_addr = virt_to_phys((void *)context);
206 set_root_value(root, phy_addr);
207 set_root_present(root);
208 __iommu_flush_cache(iommu, root, sizeof(*root));
209 }
210 spin_unlock_irqrestore(&iommu->lock, flags);
211 return &context[devfn];
212}
213
214static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
215{
216 struct root_entry *root;
217 struct context_entry *context;
218 int ret;
219 unsigned long flags;
220
221 spin_lock_irqsave(&iommu->lock, flags);
222 root = &iommu->root_entry[bus];
223 context = get_context_addr_from_root(root);
224 if (!context) {
225 ret = 0;
226 goto out;
227 }
228 ret = context_present(context[devfn]);
229out:
230 spin_unlock_irqrestore(&iommu->lock, flags);
231 return ret;
232}
233
234static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
235{
236 struct root_entry *root;
237 struct context_entry *context;
238 unsigned long flags;
239
240 spin_lock_irqsave(&iommu->lock, flags);
241 root = &iommu->root_entry[bus];
242 context = get_context_addr_from_root(root);
243 if (context) {
244 context_clear_entry(context[devfn]);
245 __iommu_flush_cache(iommu, &context[devfn], \
246 sizeof(*context));
247 }
248 spin_unlock_irqrestore(&iommu->lock, flags);
249}
250
251static void free_context_table(struct intel_iommu *iommu)
252{
253 struct root_entry *root;
254 int i;
255 unsigned long flags;
256 struct context_entry *context;
257
258 spin_lock_irqsave(&iommu->lock, flags);
259 if (!iommu->root_entry) {
260 goto out;
261 }
262 for (i = 0; i < ROOT_ENTRY_NR; i++) {
263 root = &iommu->root_entry[i];
264 context = get_context_addr_from_root(root);
265 if (context)
266 free_pgtable_page(context);
267 }
268 free_pgtable_page(iommu->root_entry);
269 iommu->root_entry = NULL;
270out:
271 spin_unlock_irqrestore(&iommu->lock, flags);
272}
273
274/* page table handling */
275#define LEVEL_STRIDE (9)
276#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
277
278static inline int agaw_to_level(int agaw)
279{
280 return agaw + 2;
281}
282
283static inline int agaw_to_width(int agaw)
284{
285 return 30 + agaw * LEVEL_STRIDE;
286
287}
288
289static inline int width_to_agaw(int width)
290{
291 return (width - 30) / LEVEL_STRIDE;
292}
293
294static inline unsigned int level_to_offset_bits(int level)
295{
296 return (12 + (level - 1) * LEVEL_STRIDE);
297}
298
299static inline int address_level_offset(u64 addr, int level)
300{
301 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
302}
303
304static inline u64 level_mask(int level)
305{
306 return ((u64)-1 << level_to_offset_bits(level));
307}
308
309static inline u64 level_size(int level)
310{
311 return ((u64)1 << level_to_offset_bits(level));
312}
313
314static inline u64 align_to_level(u64 addr, int level)
315{
316 return ((addr + level_size(level) - 1) & level_mask(level));
317}
318
319static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
320{
321 int addr_width = agaw_to_width(domain->agaw);
322 struct dma_pte *parent, *pte = NULL;
323 int level = agaw_to_level(domain->agaw);
324 int offset;
325 unsigned long flags;
326
327 BUG_ON(!domain->pgd);
328
329 addr &= (((u64)1) << addr_width) - 1;
330 parent = domain->pgd;
331
332 spin_lock_irqsave(&domain->mapping_lock, flags);
333 while (level > 0) {
334 void *tmp_page;
335
336 offset = address_level_offset(addr, level);
337 pte = &parent[offset];
338 if (level == 1)
339 break;
340
341 if (!dma_pte_present(*pte)) {
342 tmp_page = alloc_pgtable_page();
343
344 if (!tmp_page) {
345 spin_unlock_irqrestore(&domain->mapping_lock,
346 flags);
347 return NULL;
348 }
349 __iommu_flush_cache(domain->iommu, tmp_page,
5b6985ce 350 PAGE_SIZE);
ba395927
KA
351 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
352 /*
353 * high level table always sets r/w, last level page
354 * table control read/write
355 */
356 dma_set_pte_readable(*pte);
357 dma_set_pte_writable(*pte);
358 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
359 }
360 parent = phys_to_virt(dma_pte_addr(*pte));
361 level--;
362 }
363
364 spin_unlock_irqrestore(&domain->mapping_lock, flags);
365 return pte;
366}
367
368/* return address's pte at specific level */
369static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
370 int level)
371{
372 struct dma_pte *parent, *pte = NULL;
373 int total = agaw_to_level(domain->agaw);
374 int offset;
375
376 parent = domain->pgd;
377 while (level <= total) {
378 offset = address_level_offset(addr, total);
379 pte = &parent[offset];
380 if (level == total)
381 return pte;
382
383 if (!dma_pte_present(*pte))
384 break;
385 parent = phys_to_virt(dma_pte_addr(*pte));
386 total--;
387 }
388 return NULL;
389}
390
391/* clear one page's page table */
392static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
393{
394 struct dma_pte *pte = NULL;
395
396 /* get last level pte */
397 pte = dma_addr_level_pte(domain, addr, 1);
398
399 if (pte) {
400 dma_clear_pte(*pte);
401 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
402 }
403}
404
405/* clear last level pte, a tlb flush should be followed */
406static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
407{
408 int addr_width = agaw_to_width(domain->agaw);
409
410 start &= (((u64)1) << addr_width) - 1;
411 end &= (((u64)1) << addr_width) - 1;
412 /* in case it's partial page */
5b6985ce
FY
413 start = PAGE_ALIGN(start);
414 end &= PAGE_MASK;
ba395927
KA
415
416 /* we don't need lock here, nobody else touches the iova range */
417 while (start < end) {
418 dma_pte_clear_one(domain, start);
5b6985ce 419 start += VTD_PAGE_SIZE;
ba395927
KA
420 }
421}
422
423/* free page table pages. last level pte should already be cleared */
424static void dma_pte_free_pagetable(struct dmar_domain *domain,
425 u64 start, u64 end)
426{
427 int addr_width = agaw_to_width(domain->agaw);
428 struct dma_pte *pte;
429 int total = agaw_to_level(domain->agaw);
430 int level;
431 u64 tmp;
432
433 start &= (((u64)1) << addr_width) - 1;
434 end &= (((u64)1) << addr_width) - 1;
435
436 /* we don't need lock here, nobody else touches the iova range */
437 level = 2;
438 while (level <= total) {
439 tmp = align_to_level(start, level);
440 if (tmp >= end || (tmp + level_size(level) > end))
441 return;
442
443 while (tmp < end) {
444 pte = dma_addr_level_pte(domain, tmp, level);
445 if (pte) {
446 free_pgtable_page(
447 phys_to_virt(dma_pte_addr(*pte)));
448 dma_clear_pte(*pte);
449 __iommu_flush_cache(domain->iommu,
450 pte, sizeof(*pte));
451 }
452 tmp += level_size(level);
453 }
454 level++;
455 }
456 /* free pgd */
457 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
458 free_pgtable_page(domain->pgd);
459 domain->pgd = NULL;
460 }
461}
462
463/* iommu handling */
464static int iommu_alloc_root_entry(struct intel_iommu *iommu)
465{
466 struct root_entry *root;
467 unsigned long flags;
468
469 root = (struct root_entry *)alloc_pgtable_page();
470 if (!root)
471 return -ENOMEM;
472
5b6985ce 473 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
474
475 spin_lock_irqsave(&iommu->lock, flags);
476 iommu->root_entry = root;
477 spin_unlock_irqrestore(&iommu->lock, flags);
478
479 return 0;
480}
481
ba395927
KA
482static void iommu_set_root_entry(struct intel_iommu *iommu)
483{
484 void *addr;
485 u32 cmd, sts;
486 unsigned long flag;
487
488 addr = iommu->root_entry;
489
490 spin_lock_irqsave(&iommu->register_lock, flag);
491 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
492
493 cmd = iommu->gcmd | DMA_GCMD_SRTP;
494 writel(cmd, iommu->reg + DMAR_GCMD_REG);
495
496 /* Make sure hardware complete it */
497 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
498 readl, (sts & DMA_GSTS_RTPS), sts);
499
500 spin_unlock_irqrestore(&iommu->register_lock, flag);
501}
502
503static void iommu_flush_write_buffer(struct intel_iommu *iommu)
504{
505 u32 val;
506 unsigned long flag;
507
508 if (!cap_rwbf(iommu->cap))
509 return;
510 val = iommu->gcmd | DMA_GCMD_WBF;
511
512 spin_lock_irqsave(&iommu->register_lock, flag);
513 writel(val, iommu->reg + DMAR_GCMD_REG);
514
515 /* Make sure hardware complete it */
516 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
517 readl, (!(val & DMA_GSTS_WBFS)), val);
518
519 spin_unlock_irqrestore(&iommu->register_lock, flag);
520}
521
522/* return value determine if we need a write buffer flush */
523static int __iommu_flush_context(struct intel_iommu *iommu,
524 u16 did, u16 source_id, u8 function_mask, u64 type,
525 int non_present_entry_flush)
526{
527 u64 val = 0;
528 unsigned long flag;
529
530 /*
531 * In the non-present entry flush case, if hardware doesn't cache
532 * non-present entry we do nothing and if hardware cache non-present
533 * entry, we flush entries of domain 0 (the domain id is used to cache
534 * any non-present entries)
535 */
536 if (non_present_entry_flush) {
537 if (!cap_caching_mode(iommu->cap))
538 return 1;
539 else
540 did = 0;
541 }
542
543 switch (type) {
544 case DMA_CCMD_GLOBAL_INVL:
545 val = DMA_CCMD_GLOBAL_INVL;
546 break;
547 case DMA_CCMD_DOMAIN_INVL:
548 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
549 break;
550 case DMA_CCMD_DEVICE_INVL:
551 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
552 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
553 break;
554 default:
555 BUG();
556 }
557 val |= DMA_CCMD_ICC;
558
559 spin_lock_irqsave(&iommu->register_lock, flag);
560 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
561
562 /* Make sure hardware complete it */
563 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
564 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
565
566 spin_unlock_irqrestore(&iommu->register_lock, flag);
567
4d235ba6 568 /* flush context entry will implicitly flush write buffer */
ba395927
KA
569 return 0;
570}
571
ba395927
KA
572/* return value determine if we need a write buffer flush */
573static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
574 u64 addr, unsigned int size_order, u64 type,
575 int non_present_entry_flush)
576{
577 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
578 u64 val = 0, val_iva = 0;
579 unsigned long flag;
580
581 /*
582 * In the non-present entry flush case, if hardware doesn't cache
583 * non-present entry we do nothing and if hardware cache non-present
584 * entry, we flush entries of domain 0 (the domain id is used to cache
585 * any non-present entries)
586 */
587 if (non_present_entry_flush) {
588 if (!cap_caching_mode(iommu->cap))
589 return 1;
590 else
591 did = 0;
592 }
593
594 switch (type) {
595 case DMA_TLB_GLOBAL_FLUSH:
596 /* global flush doesn't need set IVA_REG */
597 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
598 break;
599 case DMA_TLB_DSI_FLUSH:
600 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
601 break;
602 case DMA_TLB_PSI_FLUSH:
603 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
604 /* Note: always flush non-leaf currently */
605 val_iva = size_order | addr;
606 break;
607 default:
608 BUG();
609 }
610 /* Note: set drain read/write */
611#if 0
612 /*
613 * This is probably to be super secure.. Looks like we can
614 * ignore it without any impact.
615 */
616 if (cap_read_drain(iommu->cap))
617 val |= DMA_TLB_READ_DRAIN;
618#endif
619 if (cap_write_drain(iommu->cap))
620 val |= DMA_TLB_WRITE_DRAIN;
621
622 spin_lock_irqsave(&iommu->register_lock, flag);
623 /* Note: Only uses first TLB reg currently */
624 if (val_iva)
625 dmar_writeq(iommu->reg + tlb_offset, val_iva);
626 dmar_writeq(iommu->reg + tlb_offset + 8, val);
627
628 /* Make sure hardware complete it */
629 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
630 dmar_readq, (!(val & DMA_TLB_IVT)), val);
631
632 spin_unlock_irqrestore(&iommu->register_lock, flag);
633
634 /* check IOTLB invalidation granularity */
635 if (DMA_TLB_IAIG(val) == 0)
636 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
637 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
638 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
639 (unsigned long long)DMA_TLB_IIRG(type),
640 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 641 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
642 return 0;
643}
644
ba395927
KA
645static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
646 u64 addr, unsigned int pages, int non_present_entry_flush)
647{
f76aec76 648 unsigned int mask;
ba395927 649
5b6985ce 650 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
651 BUG_ON(pages == 0);
652
653 /* Fallback to domain selective flush if no PSI support */
654 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
655 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
656 DMA_TLB_DSI_FLUSH,
657 non_present_entry_flush);
ba395927
KA
658
659 /*
660 * PSI requires page size to be 2 ^ x, and the base address is naturally
661 * aligned to the size
662 */
f76aec76 663 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 664 /* Fallback to domain selective flush if size is too big */
f76aec76 665 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
666 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
667 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 668
a77b67d4
YS
669 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
670 DMA_TLB_PSI_FLUSH,
671 non_present_entry_flush);
ba395927
KA
672}
673
f8bab735 674static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
675{
676 u32 pmen;
677 unsigned long flags;
678
679 spin_lock_irqsave(&iommu->register_lock, flags);
680 pmen = readl(iommu->reg + DMAR_PMEN_REG);
681 pmen &= ~DMA_PMEN_EPM;
682 writel(pmen, iommu->reg + DMAR_PMEN_REG);
683
684 /* wait for the protected region status bit to clear */
685 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
686 readl, !(pmen & DMA_PMEN_PRS), pmen);
687
688 spin_unlock_irqrestore(&iommu->register_lock, flags);
689}
690
ba395927
KA
691static int iommu_enable_translation(struct intel_iommu *iommu)
692{
693 u32 sts;
694 unsigned long flags;
695
696 spin_lock_irqsave(&iommu->register_lock, flags);
697 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
698
699 /* Make sure hardware complete it */
700 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
701 readl, (sts & DMA_GSTS_TES), sts);
702
703 iommu->gcmd |= DMA_GCMD_TE;
704 spin_unlock_irqrestore(&iommu->register_lock, flags);
705 return 0;
706}
707
708static int iommu_disable_translation(struct intel_iommu *iommu)
709{
710 u32 sts;
711 unsigned long flag;
712
713 spin_lock_irqsave(&iommu->register_lock, flag);
714 iommu->gcmd &= ~DMA_GCMD_TE;
715 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
716
717 /* Make sure hardware complete it */
718 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
719 readl, (!(sts & DMA_GSTS_TES)), sts);
720
721 spin_unlock_irqrestore(&iommu->register_lock, flag);
722 return 0;
723}
724
3460a6d9
KA
725/* iommu interrupt handling. Most stuff are MSI-like. */
726
d94afc6c 727static const char *fault_reason_strings[] =
3460a6d9
KA
728{
729 "Software",
730 "Present bit in root entry is clear",
731 "Present bit in context entry is clear",
732 "Invalid context entry",
733 "Access beyond MGAW",
734 "PTE Write access is not set",
735 "PTE Read access is not set",
736 "Next page table ptr is invalid",
737 "Root table address invalid",
738 "Context table ptr is invalid",
739 "non-zero reserved fields in RTP",
740 "non-zero reserved fields in CTP",
741 "non-zero reserved fields in PTE",
3460a6d9 742};
f8bab735 743#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 744
d94afc6c 745const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 746{
d94afc6c 747 if (fault_reason > MAX_FAULT_REASON_IDX)
748 return "Unknown";
3460a6d9
KA
749 else
750 return fault_reason_strings[fault_reason];
751}
752
753void dmar_msi_unmask(unsigned int irq)
754{
755 struct intel_iommu *iommu = get_irq_data(irq);
756 unsigned long flag;
757
758 /* unmask it */
759 spin_lock_irqsave(&iommu->register_lock, flag);
760 writel(0, iommu->reg + DMAR_FECTL_REG);
761 /* Read a reg to force flush the post write */
762 readl(iommu->reg + DMAR_FECTL_REG);
763 spin_unlock_irqrestore(&iommu->register_lock, flag);
764}
765
766void dmar_msi_mask(unsigned int irq)
767{
768 unsigned long flag;
769 struct intel_iommu *iommu = get_irq_data(irq);
770
771 /* mask it */
772 spin_lock_irqsave(&iommu->register_lock, flag);
773 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
774 /* Read a reg to force flush the post write */
775 readl(iommu->reg + DMAR_FECTL_REG);
776 spin_unlock_irqrestore(&iommu->register_lock, flag);
777}
778
779void dmar_msi_write(int irq, struct msi_msg *msg)
780{
781 struct intel_iommu *iommu = get_irq_data(irq);
782 unsigned long flag;
783
784 spin_lock_irqsave(&iommu->register_lock, flag);
785 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
786 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
787 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
788 spin_unlock_irqrestore(&iommu->register_lock, flag);
789}
790
791void dmar_msi_read(int irq, struct msi_msg *msg)
792{
793 struct intel_iommu *iommu = get_irq_data(irq);
794 unsigned long flag;
795
796 spin_lock_irqsave(&iommu->register_lock, flag);
797 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
798 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
799 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
800 spin_unlock_irqrestore(&iommu->register_lock, flag);
801}
802
803static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 804 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 805{
d94afc6c 806 const char *reason;
3460a6d9
KA
807
808 reason = dmar_get_fault_reason(fault_reason);
809
810 printk(KERN_ERR
811 "DMAR:[%s] Request device [%02x:%02x.%d] "
812 "fault addr %llx \n"
813 "DMAR:[fault reason %02d] %s\n",
814 (type ? "DMA Read" : "DMA Write"),
815 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
816 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
817 return 0;
818}
819
820#define PRIMARY_FAULT_REG_LEN (16)
821static irqreturn_t iommu_page_fault(int irq, void *dev_id)
822{
823 struct intel_iommu *iommu = dev_id;
824 int reg, fault_index;
825 u32 fault_status;
826 unsigned long flag;
827
828 spin_lock_irqsave(&iommu->register_lock, flag);
829 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
830
831 /* TBD: ignore advanced fault log currently */
832 if (!(fault_status & DMA_FSTS_PPF))
833 goto clear_overflow;
834
835 fault_index = dma_fsts_fault_record_index(fault_status);
836 reg = cap_fault_reg_offset(iommu->cap);
837 while (1) {
838 u8 fault_reason;
839 u16 source_id;
840 u64 guest_addr;
841 int type;
842 u32 data;
843
844 /* highest 32 bits */
845 data = readl(iommu->reg + reg +
846 fault_index * PRIMARY_FAULT_REG_LEN + 12);
847 if (!(data & DMA_FRCD_F))
848 break;
849
850 fault_reason = dma_frcd_fault_reason(data);
851 type = dma_frcd_type(data);
852
853 data = readl(iommu->reg + reg +
854 fault_index * PRIMARY_FAULT_REG_LEN + 8);
855 source_id = dma_frcd_source_id(data);
856
857 guest_addr = dmar_readq(iommu->reg + reg +
858 fault_index * PRIMARY_FAULT_REG_LEN);
859 guest_addr = dma_frcd_page_addr(guest_addr);
860 /* clear the fault */
861 writel(DMA_FRCD_F, iommu->reg + reg +
862 fault_index * PRIMARY_FAULT_REG_LEN + 12);
863
864 spin_unlock_irqrestore(&iommu->register_lock, flag);
865
866 iommu_page_fault_do_one(iommu, type, fault_reason,
867 source_id, guest_addr);
868
869 fault_index++;
870 if (fault_index > cap_num_fault_regs(iommu->cap))
871 fault_index = 0;
872 spin_lock_irqsave(&iommu->register_lock, flag);
873 }
874clear_overflow:
875 /* clear primary fault overflow */
876 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
877 if (fault_status & DMA_FSTS_PFO)
878 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
879
880 spin_unlock_irqrestore(&iommu->register_lock, flag);
881 return IRQ_HANDLED;
882}
883
884int dmar_set_interrupt(struct intel_iommu *iommu)
885{
886 int irq, ret;
887
888 irq = create_irq();
889 if (!irq) {
890 printk(KERN_ERR "IOMMU: no free vectors\n");
891 return -EINVAL;
892 }
893
894 set_irq_data(irq, iommu);
895 iommu->irq = irq;
896
897 ret = arch_setup_dmar_msi(irq);
898 if (ret) {
899 set_irq_data(irq, NULL);
900 iommu->irq = 0;
901 destroy_irq(irq);
902 return 0;
903 }
904
905 /* Force fault register is cleared */
906 iommu_page_fault(irq, iommu);
907
908 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
909 if (ret)
910 printk(KERN_ERR "IOMMU: can't request irq\n");
911 return ret;
912}
913
ba395927
KA
914static int iommu_init_domains(struct intel_iommu *iommu)
915{
916 unsigned long ndomains;
917 unsigned long nlongs;
918
919 ndomains = cap_ndoms(iommu->cap);
920 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
921 nlongs = BITS_TO_LONGS(ndomains);
922
923 /* TBD: there might be 64K domains,
924 * consider other allocation for future chip
925 */
926 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
927 if (!iommu->domain_ids) {
928 printk(KERN_ERR "Allocating domain id array failed\n");
929 return -ENOMEM;
930 }
931 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
932 GFP_KERNEL);
933 if (!iommu->domains) {
934 printk(KERN_ERR "Allocating domain array failed\n");
935 kfree(iommu->domain_ids);
936 return -ENOMEM;
937 }
938
e61d98d8
SS
939 spin_lock_init(&iommu->lock);
940
ba395927
KA
941 /*
942 * if Caching mode is set, then invalid translations are tagged
943 * with domainid 0. Hence we need to pre-allocate it.
944 */
945 if (cap_caching_mode(iommu->cap))
946 set_bit(0, iommu->domain_ids);
947 return 0;
948}
ba395927 949
ba395927
KA
950
951static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
952
953void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
954{
955 struct dmar_domain *domain;
956 int i;
957
ba395927
KA
958 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
959 for (; i < cap_ndoms(iommu->cap); ) {
960 domain = iommu->domains[i];
961 clear_bit(i, iommu->domain_ids);
962 domain_exit(domain);
963 i = find_next_bit(iommu->domain_ids,
964 cap_ndoms(iommu->cap), i+1);
965 }
966
967 if (iommu->gcmd & DMA_GCMD_TE)
968 iommu_disable_translation(iommu);
969
970 if (iommu->irq) {
971 set_irq_data(iommu->irq, NULL);
972 /* This will mask the irq */
973 free_irq(iommu->irq, iommu);
974 destroy_irq(iommu->irq);
975 }
976
977 kfree(iommu->domains);
978 kfree(iommu->domain_ids);
979
980 /* free context mapping */
981 free_context_table(iommu);
ba395927
KA
982}
983
984static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
985{
986 unsigned long num;
987 unsigned long ndomains;
988 struct dmar_domain *domain;
989 unsigned long flags;
990
991 domain = alloc_domain_mem();
992 if (!domain)
993 return NULL;
994
995 ndomains = cap_ndoms(iommu->cap);
996
997 spin_lock_irqsave(&iommu->lock, flags);
998 num = find_first_zero_bit(iommu->domain_ids, ndomains);
999 if (num >= ndomains) {
1000 spin_unlock_irqrestore(&iommu->lock, flags);
1001 free_domain_mem(domain);
1002 printk(KERN_ERR "IOMMU: no free domain ids\n");
1003 return NULL;
1004 }
1005
1006 set_bit(num, iommu->domain_ids);
1007 domain->id = num;
1008 domain->iommu = iommu;
1009 iommu->domains[num] = domain;
1010 spin_unlock_irqrestore(&iommu->lock, flags);
1011
1012 return domain;
1013}
1014
1015static void iommu_free_domain(struct dmar_domain *domain)
1016{
1017 unsigned long flags;
1018
1019 spin_lock_irqsave(&domain->iommu->lock, flags);
1020 clear_bit(domain->id, domain->iommu->domain_ids);
1021 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1022}
1023
1024static struct iova_domain reserved_iova_list;
8a443df4
MG
1025static struct lock_class_key reserved_alloc_key;
1026static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1027
1028static void dmar_init_reserved_ranges(void)
1029{
1030 struct pci_dev *pdev = NULL;
1031 struct iova *iova;
1032 int i;
1033 u64 addr, size;
1034
f661197e 1035 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1036
8a443df4
MG
1037 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1038 &reserved_alloc_key);
1039 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1040 &reserved_rbtree_key);
1041
ba395927
KA
1042 /* IOAPIC ranges shouldn't be accessed by DMA */
1043 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1044 IOVA_PFN(IOAPIC_RANGE_END));
1045 if (!iova)
1046 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1047
1048 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1049 for_each_pci_dev(pdev) {
1050 struct resource *r;
1051
1052 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1053 r = &pdev->resource[i];
1054 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1055 continue;
1056 addr = r->start;
5b6985ce 1057 addr &= PAGE_MASK;
ba395927 1058 size = r->end - addr;
5b6985ce 1059 size = PAGE_ALIGN(size);
ba395927
KA
1060 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1061 IOVA_PFN(size + addr) - 1);
1062 if (!iova)
1063 printk(KERN_ERR "Reserve iova failed\n");
1064 }
1065 }
1066
1067}
1068
1069static void domain_reserve_special_ranges(struct dmar_domain *domain)
1070{
1071 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1072}
1073
1074static inline int guestwidth_to_adjustwidth(int gaw)
1075{
1076 int agaw;
1077 int r = (gaw - 12) % 9;
1078
1079 if (r == 0)
1080 agaw = gaw;
1081 else
1082 agaw = gaw + 9 - r;
1083 if (agaw > 64)
1084 agaw = 64;
1085 return agaw;
1086}
1087
1088static int domain_init(struct dmar_domain *domain, int guest_width)
1089{
1090 struct intel_iommu *iommu;
1091 int adjust_width, agaw;
1092 unsigned long sagaw;
1093
f661197e 1094 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1095 spin_lock_init(&domain->mapping_lock);
1096
1097 domain_reserve_special_ranges(domain);
1098
1099 /* calculate AGAW */
1100 iommu = domain->iommu;
1101 if (guest_width > cap_mgaw(iommu->cap))
1102 guest_width = cap_mgaw(iommu->cap);
1103 domain->gaw = guest_width;
1104 adjust_width = guestwidth_to_adjustwidth(guest_width);
1105 agaw = width_to_agaw(adjust_width);
1106 sagaw = cap_sagaw(iommu->cap);
1107 if (!test_bit(agaw, &sagaw)) {
1108 /* hardware doesn't support it, choose a bigger one */
1109 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1110 agaw = find_next_bit(&sagaw, 5, agaw);
1111 if (agaw >= 5)
1112 return -ENODEV;
1113 }
1114 domain->agaw = agaw;
1115 INIT_LIST_HEAD(&domain->devices);
1116
1117 /* always allocate the top pgd */
1118 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1119 if (!domain->pgd)
1120 return -ENOMEM;
5b6985ce 1121 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1122 return 0;
1123}
1124
1125static void domain_exit(struct dmar_domain *domain)
1126{
1127 u64 end;
1128
1129 /* Domain 0 is reserved, so dont process it */
1130 if (!domain)
1131 return;
1132
1133 domain_remove_dev_info(domain);
1134 /* destroy iovas */
1135 put_iova_domain(&domain->iovad);
1136 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1137 end = end & (~PAGE_MASK);
ba395927
KA
1138
1139 /* clear ptes */
1140 dma_pte_clear_range(domain, 0, end);
1141
1142 /* free page tables */
1143 dma_pte_free_pagetable(domain, 0, end);
1144
1145 iommu_free_domain(domain);
1146 free_domain_mem(domain);
1147}
1148
1149static int domain_context_mapping_one(struct dmar_domain *domain,
1150 u8 bus, u8 devfn)
1151{
1152 struct context_entry *context;
1153 struct intel_iommu *iommu = domain->iommu;
1154 unsigned long flags;
1155
1156 pr_debug("Set context mapping for %02x:%02x.%d\n",
1157 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1158 BUG_ON(!domain->pgd);
1159 context = device_to_context_entry(iommu, bus, devfn);
1160 if (!context)
1161 return -ENOMEM;
1162 spin_lock_irqsave(&iommu->lock, flags);
1163 if (context_present(*context)) {
1164 spin_unlock_irqrestore(&iommu->lock, flags);
1165 return 0;
1166 }
1167
1168 context_set_domain_id(*context, domain->id);
1169 context_set_address_width(*context, domain->agaw);
1170 context_set_address_root(*context, virt_to_phys(domain->pgd));
1171 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1172 context_set_fault_enable(*context);
1173 context_set_present(*context);
1174 __iommu_flush_cache(iommu, context, sizeof(*context));
1175
1176 /* it's a non-present to present mapping */
a77b67d4
YS
1177 if (iommu->flush.flush_context(iommu, domain->id,
1178 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1179 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1180 iommu_flush_write_buffer(iommu);
1181 else
a77b67d4
YS
1182 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1183
ba395927
KA
1184 spin_unlock_irqrestore(&iommu->lock, flags);
1185 return 0;
1186}
1187
1188static int
1189domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1190{
1191 int ret;
1192 struct pci_dev *tmp, *parent;
1193
1194 ret = domain_context_mapping_one(domain, pdev->bus->number,
1195 pdev->devfn);
1196 if (ret)
1197 return ret;
1198
1199 /* dependent device mapping */
1200 tmp = pci_find_upstream_pcie_bridge(pdev);
1201 if (!tmp)
1202 return 0;
1203 /* Secondary interface's bus number and devfn 0 */
1204 parent = pdev->bus->self;
1205 while (parent != tmp) {
1206 ret = domain_context_mapping_one(domain, parent->bus->number,
1207 parent->devfn);
1208 if (ret)
1209 return ret;
1210 parent = parent->bus->self;
1211 }
1212 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1213 return domain_context_mapping_one(domain,
1214 tmp->subordinate->number, 0);
1215 else /* this is a legacy PCI bridge */
1216 return domain_context_mapping_one(domain,
1217 tmp->bus->number, tmp->devfn);
1218}
1219
1220static int domain_context_mapped(struct dmar_domain *domain,
1221 struct pci_dev *pdev)
1222{
1223 int ret;
1224 struct pci_dev *tmp, *parent;
1225
1226 ret = device_context_mapped(domain->iommu,
1227 pdev->bus->number, pdev->devfn);
1228 if (!ret)
1229 return ret;
1230 /* dependent device mapping */
1231 tmp = pci_find_upstream_pcie_bridge(pdev);
1232 if (!tmp)
1233 return ret;
1234 /* Secondary interface's bus number and devfn 0 */
1235 parent = pdev->bus->self;
1236 while (parent != tmp) {
1237 ret = device_context_mapped(domain->iommu, parent->bus->number,
1238 parent->devfn);
1239 if (!ret)
1240 return ret;
1241 parent = parent->bus->self;
1242 }
1243 if (tmp->is_pcie)
1244 return device_context_mapped(domain->iommu,
1245 tmp->subordinate->number, 0);
1246 else
1247 return device_context_mapped(domain->iommu,
1248 tmp->bus->number, tmp->devfn);
1249}
1250
1251static int
1252domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1253 u64 hpa, size_t size, int prot)
1254{
1255 u64 start_pfn, end_pfn;
1256 struct dma_pte *pte;
1257 int index;
5b6985ce
FY
1258 int addr_width = agaw_to_width(domain->agaw);
1259
1260 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1261
1262 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1263 return -EINVAL;
5b6985ce
FY
1264 iova &= PAGE_MASK;
1265 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1266 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1267 index = 0;
1268 while (start_pfn < end_pfn) {
5b6985ce 1269 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1270 if (!pte)
1271 return -ENOMEM;
1272 /* We don't need lock here, nobody else
1273 * touches the iova range
1274 */
1275 BUG_ON(dma_pte_addr(*pte));
5b6985ce 1276 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
ba395927
KA
1277 dma_set_pte_prot(*pte, prot);
1278 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1279 start_pfn++;
1280 index++;
1281 }
1282 return 0;
1283}
1284
1285static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1286{
1287 clear_context_table(domain->iommu, bus, devfn);
a77b67d4
YS
1288 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1289 DMA_CCMD_GLOBAL_INVL, 0);
1290 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1291 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1292}
1293
1294static void domain_remove_dev_info(struct dmar_domain *domain)
1295{
1296 struct device_domain_info *info;
1297 unsigned long flags;
1298
1299 spin_lock_irqsave(&device_domain_lock, flags);
1300 while (!list_empty(&domain->devices)) {
1301 info = list_entry(domain->devices.next,
1302 struct device_domain_info, link);
1303 list_del(&info->link);
1304 list_del(&info->global);
1305 if (info->dev)
358dd8ac 1306 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1307 spin_unlock_irqrestore(&device_domain_lock, flags);
1308
1309 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1310 free_devinfo_mem(info);
1311
1312 spin_lock_irqsave(&device_domain_lock, flags);
1313 }
1314 spin_unlock_irqrestore(&device_domain_lock, flags);
1315}
1316
1317/*
1318 * find_domain
358dd8ac 1319 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1320 */
38717946 1321static struct dmar_domain *
ba395927
KA
1322find_domain(struct pci_dev *pdev)
1323{
1324 struct device_domain_info *info;
1325
1326 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1327 info = pdev->dev.archdata.iommu;
ba395927
KA
1328 if (info)
1329 return info->domain;
1330 return NULL;
1331}
1332
ba395927
KA
1333/* domain is initialized */
1334static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1335{
1336 struct dmar_domain *domain, *found = NULL;
1337 struct intel_iommu *iommu;
1338 struct dmar_drhd_unit *drhd;
1339 struct device_domain_info *info, *tmp;
1340 struct pci_dev *dev_tmp;
1341 unsigned long flags;
1342 int bus = 0, devfn = 0;
1343
1344 domain = find_domain(pdev);
1345 if (domain)
1346 return domain;
1347
1348 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1349 if (dev_tmp) {
1350 if (dev_tmp->is_pcie) {
1351 bus = dev_tmp->subordinate->number;
1352 devfn = 0;
1353 } else {
1354 bus = dev_tmp->bus->number;
1355 devfn = dev_tmp->devfn;
1356 }
1357 spin_lock_irqsave(&device_domain_lock, flags);
1358 list_for_each_entry(info, &device_domain_list, global) {
1359 if (info->bus == bus && info->devfn == devfn) {
1360 found = info->domain;
1361 break;
1362 }
1363 }
1364 spin_unlock_irqrestore(&device_domain_lock, flags);
1365 /* pcie-pci bridge already has a domain, uses it */
1366 if (found) {
1367 domain = found;
1368 goto found_domain;
1369 }
1370 }
1371
1372 /* Allocate new domain for the device */
1373 drhd = dmar_find_matched_drhd_unit(pdev);
1374 if (!drhd) {
1375 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1376 pci_name(pdev));
1377 return NULL;
1378 }
1379 iommu = drhd->iommu;
1380
1381 domain = iommu_alloc_domain(iommu);
1382 if (!domain)
1383 goto error;
1384
1385 if (domain_init(domain, gaw)) {
1386 domain_exit(domain);
1387 goto error;
1388 }
1389
1390 /* register pcie-to-pci device */
1391 if (dev_tmp) {
1392 info = alloc_devinfo_mem();
1393 if (!info) {
1394 domain_exit(domain);
1395 goto error;
1396 }
1397 info->bus = bus;
1398 info->devfn = devfn;
1399 info->dev = NULL;
1400 info->domain = domain;
1401 /* This domain is shared by devices under p2p bridge */
1402 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1403
1404 /* pcie-to-pci bridge already has a domain, uses it */
1405 found = NULL;
1406 spin_lock_irqsave(&device_domain_lock, flags);
1407 list_for_each_entry(tmp, &device_domain_list, global) {
1408 if (tmp->bus == bus && tmp->devfn == devfn) {
1409 found = tmp->domain;
1410 break;
1411 }
1412 }
1413 if (found) {
1414 free_devinfo_mem(info);
1415 domain_exit(domain);
1416 domain = found;
1417 } else {
1418 list_add(&info->link, &domain->devices);
1419 list_add(&info->global, &device_domain_list);
1420 }
1421 spin_unlock_irqrestore(&device_domain_lock, flags);
1422 }
1423
1424found_domain:
1425 info = alloc_devinfo_mem();
1426 if (!info)
1427 goto error;
1428 info->bus = pdev->bus->number;
1429 info->devfn = pdev->devfn;
1430 info->dev = pdev;
1431 info->domain = domain;
1432 spin_lock_irqsave(&device_domain_lock, flags);
1433 /* somebody is fast */
1434 found = find_domain(pdev);
1435 if (found != NULL) {
1436 spin_unlock_irqrestore(&device_domain_lock, flags);
1437 if (found != domain) {
1438 domain_exit(domain);
1439 domain = found;
1440 }
1441 free_devinfo_mem(info);
1442 return domain;
1443 }
1444 list_add(&info->link, &domain->devices);
1445 list_add(&info->global, &device_domain_list);
358dd8ac 1446 pdev->dev.archdata.iommu = info;
ba395927
KA
1447 spin_unlock_irqrestore(&device_domain_lock, flags);
1448 return domain;
1449error:
1450 /* recheck it here, maybe others set it */
1451 return find_domain(pdev);
1452}
1453
5b6985ce
FY
1454static int iommu_prepare_identity_map(struct pci_dev *pdev,
1455 unsigned long long start,
1456 unsigned long long end)
ba395927
KA
1457{
1458 struct dmar_domain *domain;
1459 unsigned long size;
5b6985ce 1460 unsigned long long base;
ba395927
KA
1461 int ret;
1462
1463 printk(KERN_INFO
1464 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1465 pci_name(pdev), start, end);
1466 /* page table init */
1467 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1468 if (!domain)
1469 return -ENOMEM;
1470
1471 /* The address might not be aligned */
5b6985ce 1472 base = start & PAGE_MASK;
ba395927 1473 size = end - base;
5b6985ce 1474 size = PAGE_ALIGN(size);
ba395927
KA
1475 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1476 IOVA_PFN(base + size) - 1)) {
1477 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1478 ret = -ENOMEM;
1479 goto error;
1480 }
1481
1482 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1483 size, base, pci_name(pdev));
1484 /*
1485 * RMRR range might have overlap with physical memory range,
1486 * clear it first
1487 */
1488 dma_pte_clear_range(domain, base, base + size);
1489
1490 ret = domain_page_mapping(domain, base, base, size,
1491 DMA_PTE_READ|DMA_PTE_WRITE);
1492 if (ret)
1493 goto error;
1494
1495 /* context entry init */
1496 ret = domain_context_mapping(domain, pdev);
1497 if (!ret)
1498 return 0;
1499error:
1500 domain_exit(domain);
1501 return ret;
1502
1503}
1504
1505static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1506 struct pci_dev *pdev)
1507{
358dd8ac 1508 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1509 return 0;
1510 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1511 rmrr->end_address + 1);
1512}
1513
e820482c 1514#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1515struct iommu_prepare_data {
1516 struct pci_dev *pdev;
1517 int ret;
1518};
1519
1520static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1521 unsigned long end_pfn, void *datax)
1522{
1523 struct iommu_prepare_data *data;
1524
1525 data = (struct iommu_prepare_data *)datax;
1526
1527 data->ret = iommu_prepare_identity_map(data->pdev,
1528 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1529 return data->ret;
1530
1531}
1532
1533static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1534{
1535 int nid;
1536 struct iommu_prepare_data data;
1537
1538 data.pdev = pdev;
1539 data.ret = 0;
1540
1541 for_each_online_node(nid) {
1542 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1543 if (data.ret)
1544 return data.ret;
1545 }
1546 return data.ret;
1547}
1548
e820482c
KA
1549static void __init iommu_prepare_gfx_mapping(void)
1550{
1551 struct pci_dev *pdev = NULL;
e820482c
KA
1552 int ret;
1553
1554 for_each_pci_dev(pdev) {
358dd8ac 1555 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1556 !IS_GFX_DEVICE(pdev))
1557 continue;
1558 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1559 pci_name(pdev));
d52d53b8
YL
1560 ret = iommu_prepare_with_active_regions(pdev);
1561 if (ret)
1562 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1563 }
1564}
1565#endif
1566
49a0429e
KA
1567#ifdef CONFIG_DMAR_FLOPPY_WA
1568static inline void iommu_prepare_isa(void)
1569{
1570 struct pci_dev *pdev;
1571 int ret;
1572
1573 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1574 if (!pdev)
1575 return;
1576
1577 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1578 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1579
1580 if (ret)
1581 printk("IOMMU: Failed to create 0-64M identity map, "
1582 "floppy might not work\n");
1583
1584}
1585#else
1586static inline void iommu_prepare_isa(void)
1587{
1588 return;
1589}
1590#endif /* !CONFIG_DMAR_FLPY_WA */
1591
519a0549 1592static int __init init_dmars(void)
ba395927
KA
1593{
1594 struct dmar_drhd_unit *drhd;
1595 struct dmar_rmrr_unit *rmrr;
1596 struct pci_dev *pdev;
1597 struct intel_iommu *iommu;
80b20dd8 1598 int i, ret, unit = 0;
ba395927
KA
1599
1600 /*
1601 * for each drhd
1602 * allocate root
1603 * initialize and program root entry to not present
1604 * endfor
1605 */
1606 for_each_drhd_unit(drhd) {
5e0d2a6f 1607 g_num_of_iommus++;
1608 /*
1609 * lock not needed as this is only incremented in the single
1610 * threaded kernel __init code path all other access are read
1611 * only
1612 */
1613 }
1614
80b20dd8 1615 deferred_flush = kzalloc(g_num_of_iommus *
1616 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1617 if (!deferred_flush) {
5e0d2a6f 1618 ret = -ENOMEM;
1619 goto error;
1620 }
1621
5e0d2a6f 1622 for_each_drhd_unit(drhd) {
1623 if (drhd->ignored)
1624 continue;
1886e8a9
SS
1625
1626 iommu = drhd->iommu;
ba395927 1627
e61d98d8
SS
1628 ret = iommu_init_domains(iommu);
1629 if (ret)
1630 goto error;
1631
ba395927
KA
1632 /*
1633 * TBD:
1634 * we could share the same root & context tables
1635 * amoung all IOMMU's. Need to Split it later.
1636 */
1637 ret = iommu_alloc_root_entry(iommu);
1638 if (ret) {
1639 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1640 goto error;
1641 }
1642 }
1643
a77b67d4
YS
1644 for_each_drhd_unit(drhd) {
1645 if (drhd->ignored)
1646 continue;
1647
1648 iommu = drhd->iommu;
1649 if (dmar_enable_qi(iommu)) {
1650 /*
1651 * Queued Invalidate not enabled, use Register Based
1652 * Invalidate
1653 */
1654 iommu->flush.flush_context = __iommu_flush_context;
1655 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1656 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1657 "invalidation\n",
1658 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1659 } else {
1660 iommu->flush.flush_context = qi_flush_context;
1661 iommu->flush.flush_iotlb = qi_flush_iotlb;
1662 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1663 "invalidation\n",
1664 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1665 }
1666 }
1667
ba395927
KA
1668 /*
1669 * For each rmrr
1670 * for each dev attached to rmrr
1671 * do
1672 * locate drhd for dev, alloc domain for dev
1673 * allocate free domain
1674 * allocate page table entries for rmrr
1675 * if context not allocated for bus
1676 * allocate and init context
1677 * set present in root table for this bus
1678 * init context with domain, translation etc
1679 * endfor
1680 * endfor
1681 */
1682 for_each_rmrr_units(rmrr) {
ba395927
KA
1683 for (i = 0; i < rmrr->devices_cnt; i++) {
1684 pdev = rmrr->devices[i];
1685 /* some BIOS lists non-exist devices in DMAR table */
1686 if (!pdev)
1687 continue;
1688 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1689 if (ret)
1690 printk(KERN_ERR
1691 "IOMMU: mapping reserved region failed\n");
1692 }
1693 }
1694
e820482c
KA
1695 iommu_prepare_gfx_mapping();
1696
49a0429e
KA
1697 iommu_prepare_isa();
1698
ba395927
KA
1699 /*
1700 * for each drhd
1701 * enable fault log
1702 * global invalidate context cache
1703 * global invalidate iotlb
1704 * enable translation
1705 */
1706 for_each_drhd_unit(drhd) {
1707 if (drhd->ignored)
1708 continue;
1709 iommu = drhd->iommu;
1710 sprintf (iommu->name, "dmar%d", unit++);
1711
1712 iommu_flush_write_buffer(iommu);
1713
3460a6d9
KA
1714 ret = dmar_set_interrupt(iommu);
1715 if (ret)
1716 goto error;
1717
ba395927
KA
1718 iommu_set_root_entry(iommu);
1719
a77b67d4
YS
1720 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1721 0);
1722 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1723 0);
f8bab735 1724 iommu_disable_protect_mem_regions(iommu);
1725
ba395927
KA
1726 ret = iommu_enable_translation(iommu);
1727 if (ret)
1728 goto error;
1729 }
1730
1731 return 0;
1732error:
1733 for_each_drhd_unit(drhd) {
1734 if (drhd->ignored)
1735 continue;
1736 iommu = drhd->iommu;
1737 free_iommu(iommu);
1738 }
1739 return ret;
1740}
1741
1742static inline u64 aligned_size(u64 host_addr, size_t size)
1743{
1744 u64 addr;
5b6985ce
FY
1745 addr = (host_addr & (~PAGE_MASK)) + size;
1746 return PAGE_ALIGN(addr);
ba395927
KA
1747}
1748
1749struct iova *
f76aec76 1750iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1751{
ba395927
KA
1752 struct iova *piova;
1753
1754 /* Make sure it's in range */
ba395927 1755 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1756 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1757 return NULL;
1758
1759 piova = alloc_iova(&domain->iovad,
5b6985ce 1760 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
1761 return piova;
1762}
1763
f76aec76
KA
1764static struct iova *
1765__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 1766 size_t size, u64 dma_mask)
ba395927 1767{
ba395927 1768 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1769 struct iova *iova = NULL;
ba395927 1770
bb9e6d65
FT
1771 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1772 iova = iommu_alloc_iova(domain, size, dma_mask);
1773 else {
ba395927
KA
1774 /*
1775 * First try to allocate an io virtual address in
1776 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1777 * from higher range
ba395927 1778 */
f76aec76 1779 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1780 if (!iova)
bb9e6d65 1781 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
1782 }
1783
1784 if (!iova) {
1785 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1786 return NULL;
1787 }
1788
1789 return iova;
1790}
1791
1792static struct dmar_domain *
1793get_valid_domain_for_dev(struct pci_dev *pdev)
1794{
1795 struct dmar_domain *domain;
1796 int ret;
1797
1798 domain = get_domain_for_dev(pdev,
1799 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1800 if (!domain) {
1801 printk(KERN_ERR
1802 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1803 return NULL;
ba395927
KA
1804 }
1805
1806 /* make sure context mapping is ok */
1807 if (unlikely(!domain_context_mapped(domain, pdev))) {
1808 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1809 if (ret) {
1810 printk(KERN_ERR
1811 "Domain context map for %s failed",
1812 pci_name(pdev));
4fe05bbc 1813 return NULL;
f76aec76 1814 }
ba395927
KA
1815 }
1816
f76aec76
KA
1817 return domain;
1818}
1819
bb9e6d65
FT
1820static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1821 size_t size, int dir, u64 dma_mask)
f76aec76
KA
1822{
1823 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 1824 struct dmar_domain *domain;
5b6985ce 1825 phys_addr_t start_paddr;
f76aec76
KA
1826 struct iova *iova;
1827 int prot = 0;
6865f0d1 1828 int ret;
f76aec76
KA
1829
1830 BUG_ON(dir == DMA_NONE);
358dd8ac 1831 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 1832 return paddr;
f76aec76
KA
1833
1834 domain = get_valid_domain_for_dev(pdev);
1835 if (!domain)
1836 return 0;
1837
6865f0d1 1838 size = aligned_size((u64)paddr, size);
f76aec76 1839
bb9e6d65 1840 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
1841 if (!iova)
1842 goto error;
1843
5b6985ce 1844 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 1845
ba395927
KA
1846 /*
1847 * Check if DMAR supports zero-length reads on write only
1848 * mappings..
1849 */
1850 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1851 !cap_zlr(domain->iommu->cap))
1852 prot |= DMA_PTE_READ;
1853 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1854 prot |= DMA_PTE_WRITE;
1855 /*
6865f0d1 1856 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 1857 * page. Note: if two part of one page are separately mapped, we
6865f0d1 1858 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
1859 * is not a big problem
1860 */
6865f0d1 1861 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 1862 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
1863 if (ret)
1864 goto error;
1865
f76aec76
KA
1866 /* it's a non-present to present mapping */
1867 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 1868 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76
KA
1869 if (ret)
1870 iommu_flush_write_buffer(domain->iommu);
1871
5b6985ce 1872 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 1873
ba395927 1874error:
f76aec76
KA
1875 if (iova)
1876 __free_iova(&domain->iovad, iova);
ba395927 1877 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 1878 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
1879 return 0;
1880}
1881
bb9e6d65
FT
1882dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1883 size_t size, int dir)
1884{
1885 return __intel_map_single(hwdev, paddr, size, dir,
1886 to_pci_dev(hwdev)->dma_mask);
1887}
1888
5e0d2a6f 1889static void flush_unmaps(void)
1890{
80b20dd8 1891 int i, j;
5e0d2a6f 1892
5e0d2a6f 1893 timer_on = 0;
1894
1895 /* just flush them all */
1896 for (i = 0; i < g_num_of_iommus; i++) {
80b20dd8 1897 if (deferred_flush[i].next) {
c42d9f32
SS
1898 struct intel_iommu *iommu =
1899 deferred_flush[i].domain[0]->iommu;
1900
a77b67d4
YS
1901 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1902 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 1903 for (j = 0; j < deferred_flush[i].next; j++) {
1904 __free_iova(&deferred_flush[i].domain[j]->iovad,
1905 deferred_flush[i].iova[j]);
1906 }
1907 deferred_flush[i].next = 0;
1908 }
5e0d2a6f 1909 }
1910
5e0d2a6f 1911 list_size = 0;
5e0d2a6f 1912}
1913
1914static void flush_unmaps_timeout(unsigned long data)
1915{
80b20dd8 1916 unsigned long flags;
1917
1918 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 1919 flush_unmaps();
80b20dd8 1920 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 1921}
1922
1923static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1924{
1925 unsigned long flags;
80b20dd8 1926 int next, iommu_id;
5e0d2a6f 1927
1928 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 1929 if (list_size == HIGH_WATER_MARK)
1930 flush_unmaps();
1931
c42d9f32
SS
1932 iommu_id = dom->iommu->seq_id;
1933
80b20dd8 1934 next = deferred_flush[iommu_id].next;
1935 deferred_flush[iommu_id].domain[next] = dom;
1936 deferred_flush[iommu_id].iova[next] = iova;
1937 deferred_flush[iommu_id].next++;
5e0d2a6f 1938
1939 if (!timer_on) {
1940 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1941 timer_on = 1;
1942 }
1943 list_size++;
1944 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1945}
1946
5b6985ce
FY
1947void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
1948 int dir)
ba395927 1949{
ba395927 1950 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
1951 struct dmar_domain *domain;
1952 unsigned long start_addr;
ba395927
KA
1953 struct iova *iova;
1954
358dd8ac 1955 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 1956 return;
ba395927
KA
1957 domain = find_domain(pdev);
1958 BUG_ON(!domain);
1959
1960 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 1961 if (!iova)
ba395927 1962 return;
ba395927 1963
5b6985ce 1964 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 1965 size = aligned_size((u64)dev_addr, size);
ba395927 1966
f76aec76 1967 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 1968 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 1969
f76aec76
KA
1970 /* clear the whole page */
1971 dma_pte_clear_range(domain, start_addr, start_addr + size);
1972 /* free page tables */
1973 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 1974 if (intel_iommu_strict) {
1975 if (iommu_flush_iotlb_psi(domain->iommu,
5b6985ce 1976 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
5e0d2a6f 1977 iommu_flush_write_buffer(domain->iommu);
1978 /* free iova */
1979 __free_iova(&domain->iovad, iova);
1980 } else {
1981 add_unmap(domain, iova);
1982 /*
1983 * queue up the release of the unmap to save the 1/6th of the
1984 * cpu used up by the iotlb flush operation...
1985 */
5e0d2a6f 1986 }
ba395927
KA
1987}
1988
5b6985ce
FY
1989void *intel_alloc_coherent(struct device *hwdev, size_t size,
1990 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
1991{
1992 void *vaddr;
1993 int order;
1994
5b6985ce 1995 size = PAGE_ALIGN(size);
ba395927
KA
1996 order = get_order(size);
1997 flags &= ~(GFP_DMA | GFP_DMA32);
1998
1999 vaddr = (void *)__get_free_pages(flags, order);
2000 if (!vaddr)
2001 return NULL;
2002 memset(vaddr, 0, size);
2003
bb9e6d65
FT
2004 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2005 DMA_BIDIRECTIONAL,
2006 hwdev->coherent_dma_mask);
ba395927
KA
2007 if (*dma_handle)
2008 return vaddr;
2009 free_pages((unsigned long)vaddr, order);
2010 return NULL;
2011}
2012
5b6985ce
FY
2013void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2014 dma_addr_t dma_handle)
ba395927
KA
2015{
2016 int order;
2017
5b6985ce 2018 size = PAGE_ALIGN(size);
ba395927
KA
2019 order = get_order(size);
2020
2021 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2022 free_pages((unsigned long)vaddr, order);
2023}
2024
12d4d40e 2025#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2026
2027void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2028 int nelems, int dir)
ba395927
KA
2029{
2030 int i;
2031 struct pci_dev *pdev = to_pci_dev(hwdev);
2032 struct dmar_domain *domain;
f76aec76
KA
2033 unsigned long start_addr;
2034 struct iova *iova;
2035 size_t size = 0;
2036 void *addr;
c03ab37c 2037 struct scatterlist *sg;
ba395927 2038
358dd8ac 2039 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2040 return;
2041
2042 domain = find_domain(pdev);
ba395927 2043
c03ab37c 2044 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2045 if (!iova)
2046 return;
c03ab37c 2047 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2048 addr = SG_ENT_VIRT_ADDRESS(sg);
2049 size += aligned_size((u64)addr, sg->length);
2050 }
2051
5b6985ce 2052 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2053
2054 /* clear the whole page */
2055 dma_pte_clear_range(domain, start_addr, start_addr + size);
2056 /* free page tables */
2057 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2058
2059 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
5b6985ce 2060 size >> VTD_PAGE_SHIFT, 0))
ba395927 2061 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2062
2063 /* free iova */
2064 __free_iova(&domain->iovad, iova);
ba395927
KA
2065}
2066
ba395927 2067static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2068 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2069{
2070 int i;
c03ab37c 2071 struct scatterlist *sg;
ba395927 2072
c03ab37c 2073 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2074 BUG_ON(!sg_page(sg));
c03ab37c
FT
2075 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2076 sg->dma_length = sg->length;
ba395927
KA
2077 }
2078 return nelems;
2079}
2080
5b6985ce
FY
2081int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2082 int dir)
ba395927
KA
2083{
2084 void *addr;
2085 int i;
ba395927
KA
2086 struct pci_dev *pdev = to_pci_dev(hwdev);
2087 struct dmar_domain *domain;
f76aec76
KA
2088 size_t size = 0;
2089 int prot = 0;
2090 size_t offset = 0;
2091 struct iova *iova = NULL;
2092 int ret;
c03ab37c 2093 struct scatterlist *sg;
f76aec76 2094 unsigned long start_addr;
ba395927
KA
2095
2096 BUG_ON(dir == DMA_NONE);
358dd8ac 2097 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2098 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2099
f76aec76
KA
2100 domain = get_valid_domain_for_dev(pdev);
2101 if (!domain)
2102 return 0;
2103
c03ab37c 2104 for_each_sg(sglist, sg, nelems, i) {
ba395927 2105 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2106 addr = (void *)virt_to_phys(addr);
2107 size += aligned_size((u64)addr, sg->length);
2108 }
2109
bb9e6d65 2110 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2111 if (!iova) {
c03ab37c 2112 sglist->dma_length = 0;
f76aec76
KA
2113 return 0;
2114 }
2115
2116 /*
2117 * Check if DMAR supports zero-length reads on write only
2118 * mappings..
2119 */
2120 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2121 !cap_zlr(domain->iommu->cap))
2122 prot |= DMA_PTE_READ;
2123 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2124 prot |= DMA_PTE_WRITE;
2125
5b6985ce 2126 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2127 offset = 0;
c03ab37c 2128 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2129 addr = SG_ENT_VIRT_ADDRESS(sg);
2130 addr = (void *)virt_to_phys(addr);
2131 size = aligned_size((u64)addr, sg->length);
2132 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2133 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2134 size, prot);
2135 if (ret) {
2136 /* clear the page */
2137 dma_pte_clear_range(domain, start_addr,
2138 start_addr + offset);
2139 /* free page tables */
2140 dma_pte_free_pagetable(domain, start_addr,
2141 start_addr + offset);
2142 /* free iova */
2143 __free_iova(&domain->iovad, iova);
ba395927
KA
2144 return 0;
2145 }
f76aec76 2146 sg->dma_address = start_addr + offset +
5b6985ce 2147 ((u64)addr & (~PAGE_MASK));
ba395927 2148 sg->dma_length = sg->length;
f76aec76 2149 offset += size;
ba395927
KA
2150 }
2151
ba395927 2152 /* it's a non-present to present mapping */
f76aec76 2153 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 2154 start_addr, offset >> VTD_PAGE_SHIFT, 1))
ba395927
KA
2155 iommu_flush_write_buffer(domain->iommu);
2156 return nelems;
2157}
2158
2159static struct dma_mapping_ops intel_dma_ops = {
2160 .alloc_coherent = intel_alloc_coherent,
2161 .free_coherent = intel_free_coherent,
2162 .map_single = intel_map_single,
2163 .unmap_single = intel_unmap_single,
2164 .map_sg = intel_map_sg,
2165 .unmap_sg = intel_unmap_sg,
2166};
2167
2168static inline int iommu_domain_cache_init(void)
2169{
2170 int ret = 0;
2171
2172 iommu_domain_cache = kmem_cache_create("iommu_domain",
2173 sizeof(struct dmar_domain),
2174 0,
2175 SLAB_HWCACHE_ALIGN,
2176
2177 NULL);
2178 if (!iommu_domain_cache) {
2179 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2180 ret = -ENOMEM;
2181 }
2182
2183 return ret;
2184}
2185
2186static inline int iommu_devinfo_cache_init(void)
2187{
2188 int ret = 0;
2189
2190 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2191 sizeof(struct device_domain_info),
2192 0,
2193 SLAB_HWCACHE_ALIGN,
ba395927
KA
2194 NULL);
2195 if (!iommu_devinfo_cache) {
2196 printk(KERN_ERR "Couldn't create devinfo cache\n");
2197 ret = -ENOMEM;
2198 }
2199
2200 return ret;
2201}
2202
2203static inline int iommu_iova_cache_init(void)
2204{
2205 int ret = 0;
2206
2207 iommu_iova_cache = kmem_cache_create("iommu_iova",
2208 sizeof(struct iova),
2209 0,
2210 SLAB_HWCACHE_ALIGN,
ba395927
KA
2211 NULL);
2212 if (!iommu_iova_cache) {
2213 printk(KERN_ERR "Couldn't create iova cache\n");
2214 ret = -ENOMEM;
2215 }
2216
2217 return ret;
2218}
2219
2220static int __init iommu_init_mempool(void)
2221{
2222 int ret;
2223 ret = iommu_iova_cache_init();
2224 if (ret)
2225 return ret;
2226
2227 ret = iommu_domain_cache_init();
2228 if (ret)
2229 goto domain_error;
2230
2231 ret = iommu_devinfo_cache_init();
2232 if (!ret)
2233 return ret;
2234
2235 kmem_cache_destroy(iommu_domain_cache);
2236domain_error:
2237 kmem_cache_destroy(iommu_iova_cache);
2238
2239 return -ENOMEM;
2240}
2241
2242static void __init iommu_exit_mempool(void)
2243{
2244 kmem_cache_destroy(iommu_devinfo_cache);
2245 kmem_cache_destroy(iommu_domain_cache);
2246 kmem_cache_destroy(iommu_iova_cache);
2247
2248}
2249
ba395927
KA
2250static void __init init_no_remapping_devices(void)
2251{
2252 struct dmar_drhd_unit *drhd;
2253
2254 for_each_drhd_unit(drhd) {
2255 if (!drhd->include_all) {
2256 int i;
2257 for (i = 0; i < drhd->devices_cnt; i++)
2258 if (drhd->devices[i] != NULL)
2259 break;
2260 /* ignore DMAR unit if no pci devices exist */
2261 if (i == drhd->devices_cnt)
2262 drhd->ignored = 1;
2263 }
2264 }
2265
2266 if (dmar_map_gfx)
2267 return;
2268
2269 for_each_drhd_unit(drhd) {
2270 int i;
2271 if (drhd->ignored || drhd->include_all)
2272 continue;
2273
2274 for (i = 0; i < drhd->devices_cnt; i++)
2275 if (drhd->devices[i] &&
2276 !IS_GFX_DEVICE(drhd->devices[i]))
2277 break;
2278
2279 if (i < drhd->devices_cnt)
2280 continue;
2281
2282 /* bypass IOMMU if it is just for gfx devices */
2283 drhd->ignored = 1;
2284 for (i = 0; i < drhd->devices_cnt; i++) {
2285 if (!drhd->devices[i])
2286 continue;
358dd8ac 2287 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2288 }
2289 }
2290}
2291
2292int __init intel_iommu_init(void)
2293{
2294 int ret = 0;
2295
ba395927
KA
2296 if (dmar_table_init())
2297 return -ENODEV;
2298
1886e8a9
SS
2299 if (dmar_dev_scope_init())
2300 return -ENODEV;
2301
2ae21010
SS
2302 /*
2303 * Check the need for DMA-remapping initialization now.
2304 * Above initialization will also be used by Interrupt-remapping.
2305 */
2306 if (no_iommu || swiotlb || dmar_disabled)
2307 return -ENODEV;
2308
ba395927
KA
2309 iommu_init_mempool();
2310 dmar_init_reserved_ranges();
2311
2312 init_no_remapping_devices();
2313
2314 ret = init_dmars();
2315 if (ret) {
2316 printk(KERN_ERR "IOMMU: dmar init failed\n");
2317 put_iova_domain(&reserved_iova_list);
2318 iommu_exit_mempool();
2319 return ret;
2320 }
2321 printk(KERN_INFO
2322 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2323
5e0d2a6f 2324 init_timer(&unmap_timer);
ba395927
KA
2325 force_iommu = 1;
2326 dma_ops = &intel_dma_ops;
2327 return 0;
2328}
e820482c 2329
38717946
KA
2330void intel_iommu_domain_exit(struct dmar_domain *domain)
2331{
2332 u64 end;
2333
2334 /* Domain 0 is reserved, so dont process it */
2335 if (!domain)
2336 return;
2337
2338 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2339 end = end & (~VTD_PAGE_MASK);
38717946
KA
2340
2341 /* clear ptes */
2342 dma_pte_clear_range(domain, 0, end);
2343
2344 /* free page tables */
2345 dma_pte_free_pagetable(domain, 0, end);
2346
2347 iommu_free_domain(domain);
2348 free_domain_mem(domain);
2349}
2350EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2351
2352struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2353{
2354 struct dmar_drhd_unit *drhd;
2355 struct dmar_domain *domain;
2356 struct intel_iommu *iommu;
2357
2358 drhd = dmar_find_matched_drhd_unit(pdev);
2359 if (!drhd) {
2360 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2361 return NULL;
2362 }
2363
2364 iommu = drhd->iommu;
2365 if (!iommu) {
2366 printk(KERN_ERR
2367 "intel_iommu_domain_alloc: iommu == NULL\n");
2368 return NULL;
2369 }
2370 domain = iommu_alloc_domain(iommu);
2371 if (!domain) {
2372 printk(KERN_ERR
2373 "intel_iommu_domain_alloc: domain == NULL\n");
2374 return NULL;
2375 }
2376 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2377 printk(KERN_ERR
2378 "intel_iommu_domain_alloc: domain_init() failed\n");
2379 intel_iommu_domain_exit(domain);
2380 return NULL;
2381 }
2382 return domain;
2383}
2384EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2385
2386int intel_iommu_context_mapping(
2387 struct dmar_domain *domain, struct pci_dev *pdev)
2388{
2389 int rc;
2390 rc = domain_context_mapping(domain, pdev);
2391 return rc;
2392}
2393EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2394
2395int intel_iommu_page_mapping(
2396 struct dmar_domain *domain, dma_addr_t iova,
2397 u64 hpa, size_t size, int prot)
2398{
2399 int rc;
2400 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2401 return rc;
2402}
2403EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2404
2405void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2406{
2407 detach_domain_for_dev(domain, bus, devfn);
2408}
2409EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2410
2411struct dmar_domain *
2412intel_iommu_find_domain(struct pci_dev *pdev)
2413{
2414 return find_domain(pdev);
2415}
2416EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2417
2418int intel_iommu_found(void)
2419{
2420 return g_num_of_iommus;
2421}
2422EXPORT_SYMBOL_GPL(intel_iommu_found);
2423
2424u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2425{
2426 struct dma_pte *pte;
2427 u64 pfn;
2428
2429 pfn = 0;
2430 pte = addr_to_dma_pte(domain, iova);
2431
2432 if (pte)
2433 pfn = dma_pte_addr(*pte);
2434
5b6985ce 2435 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2436}
2437EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
This page took 0.371386 seconds and 5 git commands to generate.