intel-iommu: move root entry defs from dma_remapping.h
[deliverable/linux.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946
KA
36#include <linux/iova.h>
37#include <linux/intel-iommu.h>
ba395927 38#include <asm/cacheflush.h>
46a7fa27 39#include <asm/iommu.h>
ba395927
KA
40#include "pci.h"
41
5b6985ce
FY
42#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
ba395927
KA
45#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
ba395927
KA
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
f27be03b
MM
56#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 59
46b08e1a
MM
60/*
61 * 0: Present
62 * 1-11: Reserved
63 * 12-63: Context Ptr (12 - (haw-1))
64 * 64-127: Reserved
65 */
66struct root_entry {
67 u64 val;
68 u64 rsvd1;
69};
70#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
71static inline bool root_present(struct root_entry *root)
72{
73 return (root->val & 1);
74}
75static inline void set_root_present(struct root_entry *root)
76{
77 root->val |= 1;
78}
79static inline void set_root_value(struct root_entry *root, unsigned long value)
80{
81 root->val |= value & VTD_PAGE_MASK;
82}
83
84static inline struct context_entry *
85get_context_addr_from_root(struct root_entry *root)
86{
87 return (struct context_entry *)
88 (root_present(root)?phys_to_virt(
89 root->val & VTD_PAGE_MASK) :
90 NULL);
91}
92
5e0d2a6f 93static void flush_unmaps_timeout(unsigned long data);
94
95DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
96
80b20dd8 97#define HIGH_WATER_MARK 250
98struct deferred_flush_tables {
99 int next;
100 struct iova *iova[HIGH_WATER_MARK];
101 struct dmar_domain *domain[HIGH_WATER_MARK];
102};
103
104static struct deferred_flush_tables *deferred_flush;
105
5e0d2a6f 106/* bitmap for indexing intel_iommus */
5e0d2a6f 107static int g_num_of_iommus;
108
109static DEFINE_SPINLOCK(async_umap_flush_lock);
110static LIST_HEAD(unmaps_to_do);
111
112static int timer_on;
113static long list_size;
5e0d2a6f 114
ba395927
KA
115static void domain_remove_dev_info(struct dmar_domain *domain);
116
2ae21010 117int dmar_disabled;
ba395927 118static int __initdata dmar_map_gfx = 1;
7d3b03ce 119static int dmar_forcedac;
5e0d2a6f 120static int intel_iommu_strict;
ba395927
KA
121
122#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
123static DEFINE_SPINLOCK(device_domain_lock);
124static LIST_HEAD(device_domain_list);
125
126static int __init intel_iommu_setup(char *str)
127{
128 if (!str)
129 return -EINVAL;
130 while (*str) {
131 if (!strncmp(str, "off", 3)) {
132 dmar_disabled = 1;
133 printk(KERN_INFO"Intel-IOMMU: disabled\n");
134 } else if (!strncmp(str, "igfx_off", 8)) {
135 dmar_map_gfx = 0;
136 printk(KERN_INFO
137 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 138 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 139 printk(KERN_INFO
7d3b03ce
KA
140 "Intel-IOMMU: Forcing DAC for PCI devices\n");
141 dmar_forcedac = 1;
5e0d2a6f 142 } else if (!strncmp(str, "strict", 6)) {
143 printk(KERN_INFO
144 "Intel-IOMMU: disable batched IOTLB flush\n");
145 intel_iommu_strict = 1;
ba395927
KA
146 }
147
148 str += strcspn(str, ",");
149 while (*str == ',')
150 str++;
151 }
152 return 0;
153}
154__setup("intel_iommu=", intel_iommu_setup);
155
156static struct kmem_cache *iommu_domain_cache;
157static struct kmem_cache *iommu_devinfo_cache;
158static struct kmem_cache *iommu_iova_cache;
159
eb3fa7cb
KA
160static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
161{
162 unsigned int flags;
163 void *vaddr;
164
165 /* trying to avoid low memory issues */
166 flags = current->flags & PF_MEMALLOC;
167 current->flags |= PF_MEMALLOC;
168 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
169 current->flags &= (~PF_MEMALLOC | flags);
170 return vaddr;
171}
172
173
ba395927
KA
174static inline void *alloc_pgtable_page(void)
175{
eb3fa7cb
KA
176 unsigned int flags;
177 void *vaddr;
178
179 /* trying to avoid low memory issues */
180 flags = current->flags & PF_MEMALLOC;
181 current->flags |= PF_MEMALLOC;
182 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
183 current->flags &= (~PF_MEMALLOC | flags);
184 return vaddr;
ba395927
KA
185}
186
187static inline void free_pgtable_page(void *vaddr)
188{
189 free_page((unsigned long)vaddr);
190}
191
192static inline void *alloc_domain_mem(void)
193{
eb3fa7cb 194 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
195}
196
38717946 197static void free_domain_mem(void *vaddr)
ba395927
KA
198{
199 kmem_cache_free(iommu_domain_cache, vaddr);
200}
201
202static inline void * alloc_devinfo_mem(void)
203{
eb3fa7cb 204 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
205}
206
207static inline void free_devinfo_mem(void *vaddr)
208{
209 kmem_cache_free(iommu_devinfo_cache, vaddr);
210}
211
212struct iova *alloc_iova_mem(void)
213{
eb3fa7cb 214 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
215}
216
217void free_iova_mem(struct iova *iova)
218{
219 kmem_cache_free(iommu_iova_cache, iova);
220}
221
ba395927
KA
222/* Gets context entry for a given bus and devfn */
223static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
224 u8 bus, u8 devfn)
225{
226 struct root_entry *root;
227 struct context_entry *context;
228 unsigned long phy_addr;
229 unsigned long flags;
230
231 spin_lock_irqsave(&iommu->lock, flags);
232 root = &iommu->root_entry[bus];
233 context = get_context_addr_from_root(root);
234 if (!context) {
235 context = (struct context_entry *)alloc_pgtable_page();
236 if (!context) {
237 spin_unlock_irqrestore(&iommu->lock, flags);
238 return NULL;
239 }
5b6985ce 240 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
241 phy_addr = virt_to_phys((void *)context);
242 set_root_value(root, phy_addr);
243 set_root_present(root);
244 __iommu_flush_cache(iommu, root, sizeof(*root));
245 }
246 spin_unlock_irqrestore(&iommu->lock, flags);
247 return &context[devfn];
248}
249
250static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
251{
252 struct root_entry *root;
253 struct context_entry *context;
254 int ret;
255 unsigned long flags;
256
257 spin_lock_irqsave(&iommu->lock, flags);
258 root = &iommu->root_entry[bus];
259 context = get_context_addr_from_root(root);
260 if (!context) {
261 ret = 0;
262 goto out;
263 }
264 ret = context_present(context[devfn]);
265out:
266 spin_unlock_irqrestore(&iommu->lock, flags);
267 return ret;
268}
269
270static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
271{
272 struct root_entry *root;
273 struct context_entry *context;
274 unsigned long flags;
275
276 spin_lock_irqsave(&iommu->lock, flags);
277 root = &iommu->root_entry[bus];
278 context = get_context_addr_from_root(root);
279 if (context) {
280 context_clear_entry(context[devfn]);
281 __iommu_flush_cache(iommu, &context[devfn], \
282 sizeof(*context));
283 }
284 spin_unlock_irqrestore(&iommu->lock, flags);
285}
286
287static void free_context_table(struct intel_iommu *iommu)
288{
289 struct root_entry *root;
290 int i;
291 unsigned long flags;
292 struct context_entry *context;
293
294 spin_lock_irqsave(&iommu->lock, flags);
295 if (!iommu->root_entry) {
296 goto out;
297 }
298 for (i = 0; i < ROOT_ENTRY_NR; i++) {
299 root = &iommu->root_entry[i];
300 context = get_context_addr_from_root(root);
301 if (context)
302 free_pgtable_page(context);
303 }
304 free_pgtable_page(iommu->root_entry);
305 iommu->root_entry = NULL;
306out:
307 spin_unlock_irqrestore(&iommu->lock, flags);
308}
309
310/* page table handling */
311#define LEVEL_STRIDE (9)
312#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
313
314static inline int agaw_to_level(int agaw)
315{
316 return agaw + 2;
317}
318
319static inline int agaw_to_width(int agaw)
320{
321 return 30 + agaw * LEVEL_STRIDE;
322
323}
324
325static inline int width_to_agaw(int width)
326{
327 return (width - 30) / LEVEL_STRIDE;
328}
329
330static inline unsigned int level_to_offset_bits(int level)
331{
332 return (12 + (level - 1) * LEVEL_STRIDE);
333}
334
335static inline int address_level_offset(u64 addr, int level)
336{
337 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
338}
339
340static inline u64 level_mask(int level)
341{
342 return ((u64)-1 << level_to_offset_bits(level));
343}
344
345static inline u64 level_size(int level)
346{
347 return ((u64)1 << level_to_offset_bits(level));
348}
349
350static inline u64 align_to_level(u64 addr, int level)
351{
352 return ((addr + level_size(level) - 1) & level_mask(level));
353}
354
355static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
356{
357 int addr_width = agaw_to_width(domain->agaw);
358 struct dma_pte *parent, *pte = NULL;
359 int level = agaw_to_level(domain->agaw);
360 int offset;
361 unsigned long flags;
362
363 BUG_ON(!domain->pgd);
364
365 addr &= (((u64)1) << addr_width) - 1;
366 parent = domain->pgd;
367
368 spin_lock_irqsave(&domain->mapping_lock, flags);
369 while (level > 0) {
370 void *tmp_page;
371
372 offset = address_level_offset(addr, level);
373 pte = &parent[offset];
374 if (level == 1)
375 break;
376
377 if (!dma_pte_present(*pte)) {
378 tmp_page = alloc_pgtable_page();
379
380 if (!tmp_page) {
381 spin_unlock_irqrestore(&domain->mapping_lock,
382 flags);
383 return NULL;
384 }
385 __iommu_flush_cache(domain->iommu, tmp_page,
5b6985ce 386 PAGE_SIZE);
ba395927
KA
387 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
388 /*
389 * high level table always sets r/w, last level page
390 * table control read/write
391 */
392 dma_set_pte_readable(*pte);
393 dma_set_pte_writable(*pte);
394 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
395 }
396 parent = phys_to_virt(dma_pte_addr(*pte));
397 level--;
398 }
399
400 spin_unlock_irqrestore(&domain->mapping_lock, flags);
401 return pte;
402}
403
404/* return address's pte at specific level */
405static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
406 int level)
407{
408 struct dma_pte *parent, *pte = NULL;
409 int total = agaw_to_level(domain->agaw);
410 int offset;
411
412 parent = domain->pgd;
413 while (level <= total) {
414 offset = address_level_offset(addr, total);
415 pte = &parent[offset];
416 if (level == total)
417 return pte;
418
419 if (!dma_pte_present(*pte))
420 break;
421 parent = phys_to_virt(dma_pte_addr(*pte));
422 total--;
423 }
424 return NULL;
425}
426
427/* clear one page's page table */
428static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
429{
430 struct dma_pte *pte = NULL;
431
432 /* get last level pte */
433 pte = dma_addr_level_pte(domain, addr, 1);
434
435 if (pte) {
436 dma_clear_pte(*pte);
437 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
438 }
439}
440
441/* clear last level pte, a tlb flush should be followed */
442static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
443{
444 int addr_width = agaw_to_width(domain->agaw);
445
446 start &= (((u64)1) << addr_width) - 1;
447 end &= (((u64)1) << addr_width) - 1;
448 /* in case it's partial page */
5b6985ce
FY
449 start = PAGE_ALIGN(start);
450 end &= PAGE_MASK;
ba395927
KA
451
452 /* we don't need lock here, nobody else touches the iova range */
453 while (start < end) {
454 dma_pte_clear_one(domain, start);
5b6985ce 455 start += VTD_PAGE_SIZE;
ba395927
KA
456 }
457}
458
459/* free page table pages. last level pte should already be cleared */
460static void dma_pte_free_pagetable(struct dmar_domain *domain,
461 u64 start, u64 end)
462{
463 int addr_width = agaw_to_width(domain->agaw);
464 struct dma_pte *pte;
465 int total = agaw_to_level(domain->agaw);
466 int level;
467 u64 tmp;
468
469 start &= (((u64)1) << addr_width) - 1;
470 end &= (((u64)1) << addr_width) - 1;
471
472 /* we don't need lock here, nobody else touches the iova range */
473 level = 2;
474 while (level <= total) {
475 tmp = align_to_level(start, level);
476 if (tmp >= end || (tmp + level_size(level) > end))
477 return;
478
479 while (tmp < end) {
480 pte = dma_addr_level_pte(domain, tmp, level);
481 if (pte) {
482 free_pgtable_page(
483 phys_to_virt(dma_pte_addr(*pte)));
484 dma_clear_pte(*pte);
485 __iommu_flush_cache(domain->iommu,
486 pte, sizeof(*pte));
487 }
488 tmp += level_size(level);
489 }
490 level++;
491 }
492 /* free pgd */
493 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
494 free_pgtable_page(domain->pgd);
495 domain->pgd = NULL;
496 }
497}
498
499/* iommu handling */
500static int iommu_alloc_root_entry(struct intel_iommu *iommu)
501{
502 struct root_entry *root;
503 unsigned long flags;
504
505 root = (struct root_entry *)alloc_pgtable_page();
506 if (!root)
507 return -ENOMEM;
508
5b6985ce 509 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
510
511 spin_lock_irqsave(&iommu->lock, flags);
512 iommu->root_entry = root;
513 spin_unlock_irqrestore(&iommu->lock, flags);
514
515 return 0;
516}
517
ba395927
KA
518static void iommu_set_root_entry(struct intel_iommu *iommu)
519{
520 void *addr;
521 u32 cmd, sts;
522 unsigned long flag;
523
524 addr = iommu->root_entry;
525
526 spin_lock_irqsave(&iommu->register_lock, flag);
527 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
528
529 cmd = iommu->gcmd | DMA_GCMD_SRTP;
530 writel(cmd, iommu->reg + DMAR_GCMD_REG);
531
532 /* Make sure hardware complete it */
533 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
534 readl, (sts & DMA_GSTS_RTPS), sts);
535
536 spin_unlock_irqrestore(&iommu->register_lock, flag);
537}
538
539static void iommu_flush_write_buffer(struct intel_iommu *iommu)
540{
541 u32 val;
542 unsigned long flag;
543
544 if (!cap_rwbf(iommu->cap))
545 return;
546 val = iommu->gcmd | DMA_GCMD_WBF;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 writel(val, iommu->reg + DMAR_GCMD_REG);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
553 readl, (!(val & DMA_GSTS_WBFS)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556}
557
558/* return value determine if we need a write buffer flush */
559static int __iommu_flush_context(struct intel_iommu *iommu,
560 u16 did, u16 source_id, u8 function_mask, u64 type,
561 int non_present_entry_flush)
562{
563 u64 val = 0;
564 unsigned long flag;
565
566 /*
567 * In the non-present entry flush case, if hardware doesn't cache
568 * non-present entry we do nothing and if hardware cache non-present
569 * entry, we flush entries of domain 0 (the domain id is used to cache
570 * any non-present entries)
571 */
572 if (non_present_entry_flush) {
573 if (!cap_caching_mode(iommu->cap))
574 return 1;
575 else
576 did = 0;
577 }
578
579 switch (type) {
580 case DMA_CCMD_GLOBAL_INVL:
581 val = DMA_CCMD_GLOBAL_INVL;
582 break;
583 case DMA_CCMD_DOMAIN_INVL:
584 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
585 break;
586 case DMA_CCMD_DEVICE_INVL:
587 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
588 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
589 break;
590 default:
591 BUG();
592 }
593 val |= DMA_CCMD_ICC;
594
595 spin_lock_irqsave(&iommu->register_lock, flag);
596 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
597
598 /* Make sure hardware complete it */
599 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
600 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
601
602 spin_unlock_irqrestore(&iommu->register_lock, flag);
603
4d235ba6 604 /* flush context entry will implicitly flush write buffer */
ba395927
KA
605 return 0;
606}
607
ba395927
KA
608/* return value determine if we need a write buffer flush */
609static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
610 u64 addr, unsigned int size_order, u64 type,
611 int non_present_entry_flush)
612{
613 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
614 u64 val = 0, val_iva = 0;
615 unsigned long flag;
616
617 /*
618 * In the non-present entry flush case, if hardware doesn't cache
619 * non-present entry we do nothing and if hardware cache non-present
620 * entry, we flush entries of domain 0 (the domain id is used to cache
621 * any non-present entries)
622 */
623 if (non_present_entry_flush) {
624 if (!cap_caching_mode(iommu->cap))
625 return 1;
626 else
627 did = 0;
628 }
629
630 switch (type) {
631 case DMA_TLB_GLOBAL_FLUSH:
632 /* global flush doesn't need set IVA_REG */
633 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
634 break;
635 case DMA_TLB_DSI_FLUSH:
636 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
637 break;
638 case DMA_TLB_PSI_FLUSH:
639 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
640 /* Note: always flush non-leaf currently */
641 val_iva = size_order | addr;
642 break;
643 default:
644 BUG();
645 }
646 /* Note: set drain read/write */
647#if 0
648 /*
649 * This is probably to be super secure.. Looks like we can
650 * ignore it without any impact.
651 */
652 if (cap_read_drain(iommu->cap))
653 val |= DMA_TLB_READ_DRAIN;
654#endif
655 if (cap_write_drain(iommu->cap))
656 val |= DMA_TLB_WRITE_DRAIN;
657
658 spin_lock_irqsave(&iommu->register_lock, flag);
659 /* Note: Only uses first TLB reg currently */
660 if (val_iva)
661 dmar_writeq(iommu->reg + tlb_offset, val_iva);
662 dmar_writeq(iommu->reg + tlb_offset + 8, val);
663
664 /* Make sure hardware complete it */
665 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
666 dmar_readq, (!(val & DMA_TLB_IVT)), val);
667
668 spin_unlock_irqrestore(&iommu->register_lock, flag);
669
670 /* check IOTLB invalidation granularity */
671 if (DMA_TLB_IAIG(val) == 0)
672 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
673 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
674 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
675 (unsigned long long)DMA_TLB_IIRG(type),
676 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 677 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
678 return 0;
679}
680
ba395927
KA
681static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
682 u64 addr, unsigned int pages, int non_present_entry_flush)
683{
f76aec76 684 unsigned int mask;
ba395927 685
5b6985ce 686 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
687 BUG_ON(pages == 0);
688
689 /* Fallback to domain selective flush if no PSI support */
690 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
691 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
692 DMA_TLB_DSI_FLUSH,
693 non_present_entry_flush);
ba395927
KA
694
695 /*
696 * PSI requires page size to be 2 ^ x, and the base address is naturally
697 * aligned to the size
698 */
f76aec76 699 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 700 /* Fallback to domain selective flush if size is too big */
f76aec76 701 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
702 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
703 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 704
a77b67d4
YS
705 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
706 DMA_TLB_PSI_FLUSH,
707 non_present_entry_flush);
ba395927
KA
708}
709
f8bab735 710static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
711{
712 u32 pmen;
713 unsigned long flags;
714
715 spin_lock_irqsave(&iommu->register_lock, flags);
716 pmen = readl(iommu->reg + DMAR_PMEN_REG);
717 pmen &= ~DMA_PMEN_EPM;
718 writel(pmen, iommu->reg + DMAR_PMEN_REG);
719
720 /* wait for the protected region status bit to clear */
721 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
722 readl, !(pmen & DMA_PMEN_PRS), pmen);
723
724 spin_unlock_irqrestore(&iommu->register_lock, flags);
725}
726
ba395927
KA
727static int iommu_enable_translation(struct intel_iommu *iommu)
728{
729 u32 sts;
730 unsigned long flags;
731
732 spin_lock_irqsave(&iommu->register_lock, flags);
733 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
734
735 /* Make sure hardware complete it */
736 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
737 readl, (sts & DMA_GSTS_TES), sts);
738
739 iommu->gcmd |= DMA_GCMD_TE;
740 spin_unlock_irqrestore(&iommu->register_lock, flags);
741 return 0;
742}
743
744static int iommu_disable_translation(struct intel_iommu *iommu)
745{
746 u32 sts;
747 unsigned long flag;
748
749 spin_lock_irqsave(&iommu->register_lock, flag);
750 iommu->gcmd &= ~DMA_GCMD_TE;
751 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
752
753 /* Make sure hardware complete it */
754 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
755 readl, (!(sts & DMA_GSTS_TES)), sts);
756
757 spin_unlock_irqrestore(&iommu->register_lock, flag);
758 return 0;
759}
760
3460a6d9
KA
761/* iommu interrupt handling. Most stuff are MSI-like. */
762
d94afc6c 763static const char *fault_reason_strings[] =
3460a6d9
KA
764{
765 "Software",
766 "Present bit in root entry is clear",
767 "Present bit in context entry is clear",
768 "Invalid context entry",
769 "Access beyond MGAW",
770 "PTE Write access is not set",
771 "PTE Read access is not set",
772 "Next page table ptr is invalid",
773 "Root table address invalid",
774 "Context table ptr is invalid",
775 "non-zero reserved fields in RTP",
776 "non-zero reserved fields in CTP",
777 "non-zero reserved fields in PTE",
3460a6d9 778};
f8bab735 779#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 780
d94afc6c 781const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 782{
d94afc6c 783 if (fault_reason > MAX_FAULT_REASON_IDX)
784 return "Unknown";
3460a6d9
KA
785 else
786 return fault_reason_strings[fault_reason];
787}
788
789void dmar_msi_unmask(unsigned int irq)
790{
791 struct intel_iommu *iommu = get_irq_data(irq);
792 unsigned long flag;
793
794 /* unmask it */
795 spin_lock_irqsave(&iommu->register_lock, flag);
796 writel(0, iommu->reg + DMAR_FECTL_REG);
797 /* Read a reg to force flush the post write */
798 readl(iommu->reg + DMAR_FECTL_REG);
799 spin_unlock_irqrestore(&iommu->register_lock, flag);
800}
801
802void dmar_msi_mask(unsigned int irq)
803{
804 unsigned long flag;
805 struct intel_iommu *iommu = get_irq_data(irq);
806
807 /* mask it */
808 spin_lock_irqsave(&iommu->register_lock, flag);
809 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
810 /* Read a reg to force flush the post write */
811 readl(iommu->reg + DMAR_FECTL_REG);
812 spin_unlock_irqrestore(&iommu->register_lock, flag);
813}
814
815void dmar_msi_write(int irq, struct msi_msg *msg)
816{
817 struct intel_iommu *iommu = get_irq_data(irq);
818 unsigned long flag;
819
820 spin_lock_irqsave(&iommu->register_lock, flag);
821 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
822 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
823 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
824 spin_unlock_irqrestore(&iommu->register_lock, flag);
825}
826
827void dmar_msi_read(int irq, struct msi_msg *msg)
828{
829 struct intel_iommu *iommu = get_irq_data(irq);
830 unsigned long flag;
831
832 spin_lock_irqsave(&iommu->register_lock, flag);
833 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
834 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
835 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
836 spin_unlock_irqrestore(&iommu->register_lock, flag);
837}
838
839static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 840 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 841{
d94afc6c 842 const char *reason;
3460a6d9
KA
843
844 reason = dmar_get_fault_reason(fault_reason);
845
846 printk(KERN_ERR
847 "DMAR:[%s] Request device [%02x:%02x.%d] "
848 "fault addr %llx \n"
849 "DMAR:[fault reason %02d] %s\n",
850 (type ? "DMA Read" : "DMA Write"),
851 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
852 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
853 return 0;
854}
855
856#define PRIMARY_FAULT_REG_LEN (16)
857static irqreturn_t iommu_page_fault(int irq, void *dev_id)
858{
859 struct intel_iommu *iommu = dev_id;
860 int reg, fault_index;
861 u32 fault_status;
862 unsigned long flag;
863
864 spin_lock_irqsave(&iommu->register_lock, flag);
865 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
866
867 /* TBD: ignore advanced fault log currently */
868 if (!(fault_status & DMA_FSTS_PPF))
869 goto clear_overflow;
870
871 fault_index = dma_fsts_fault_record_index(fault_status);
872 reg = cap_fault_reg_offset(iommu->cap);
873 while (1) {
874 u8 fault_reason;
875 u16 source_id;
876 u64 guest_addr;
877 int type;
878 u32 data;
879
880 /* highest 32 bits */
881 data = readl(iommu->reg + reg +
882 fault_index * PRIMARY_FAULT_REG_LEN + 12);
883 if (!(data & DMA_FRCD_F))
884 break;
885
886 fault_reason = dma_frcd_fault_reason(data);
887 type = dma_frcd_type(data);
888
889 data = readl(iommu->reg + reg +
890 fault_index * PRIMARY_FAULT_REG_LEN + 8);
891 source_id = dma_frcd_source_id(data);
892
893 guest_addr = dmar_readq(iommu->reg + reg +
894 fault_index * PRIMARY_FAULT_REG_LEN);
895 guest_addr = dma_frcd_page_addr(guest_addr);
896 /* clear the fault */
897 writel(DMA_FRCD_F, iommu->reg + reg +
898 fault_index * PRIMARY_FAULT_REG_LEN + 12);
899
900 spin_unlock_irqrestore(&iommu->register_lock, flag);
901
902 iommu_page_fault_do_one(iommu, type, fault_reason,
903 source_id, guest_addr);
904
905 fault_index++;
906 if (fault_index > cap_num_fault_regs(iommu->cap))
907 fault_index = 0;
908 spin_lock_irqsave(&iommu->register_lock, flag);
909 }
910clear_overflow:
911 /* clear primary fault overflow */
912 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
913 if (fault_status & DMA_FSTS_PFO)
914 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
915
916 spin_unlock_irqrestore(&iommu->register_lock, flag);
917 return IRQ_HANDLED;
918}
919
920int dmar_set_interrupt(struct intel_iommu *iommu)
921{
922 int irq, ret;
923
924 irq = create_irq();
925 if (!irq) {
926 printk(KERN_ERR "IOMMU: no free vectors\n");
927 return -EINVAL;
928 }
929
930 set_irq_data(irq, iommu);
931 iommu->irq = irq;
932
933 ret = arch_setup_dmar_msi(irq);
934 if (ret) {
935 set_irq_data(irq, NULL);
936 iommu->irq = 0;
937 destroy_irq(irq);
938 return 0;
939 }
940
941 /* Force fault register is cleared */
942 iommu_page_fault(irq, iommu);
943
944 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
945 if (ret)
946 printk(KERN_ERR "IOMMU: can't request irq\n");
947 return ret;
948}
949
ba395927
KA
950static int iommu_init_domains(struct intel_iommu *iommu)
951{
952 unsigned long ndomains;
953 unsigned long nlongs;
954
955 ndomains = cap_ndoms(iommu->cap);
956 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
957 nlongs = BITS_TO_LONGS(ndomains);
958
959 /* TBD: there might be 64K domains,
960 * consider other allocation for future chip
961 */
962 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
963 if (!iommu->domain_ids) {
964 printk(KERN_ERR "Allocating domain id array failed\n");
965 return -ENOMEM;
966 }
967 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
968 GFP_KERNEL);
969 if (!iommu->domains) {
970 printk(KERN_ERR "Allocating domain array failed\n");
971 kfree(iommu->domain_ids);
972 return -ENOMEM;
973 }
974
e61d98d8
SS
975 spin_lock_init(&iommu->lock);
976
ba395927
KA
977 /*
978 * if Caching mode is set, then invalid translations are tagged
979 * with domainid 0. Hence we need to pre-allocate it.
980 */
981 if (cap_caching_mode(iommu->cap))
982 set_bit(0, iommu->domain_ids);
983 return 0;
984}
ba395927 985
ba395927
KA
986
987static void domain_exit(struct dmar_domain *domain);
e61d98d8
SS
988
989void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
990{
991 struct dmar_domain *domain;
992 int i;
993
ba395927
KA
994 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
995 for (; i < cap_ndoms(iommu->cap); ) {
996 domain = iommu->domains[i];
997 clear_bit(i, iommu->domain_ids);
998 domain_exit(domain);
999 i = find_next_bit(iommu->domain_ids,
1000 cap_ndoms(iommu->cap), i+1);
1001 }
1002
1003 if (iommu->gcmd & DMA_GCMD_TE)
1004 iommu_disable_translation(iommu);
1005
1006 if (iommu->irq) {
1007 set_irq_data(iommu->irq, NULL);
1008 /* This will mask the irq */
1009 free_irq(iommu->irq, iommu);
1010 destroy_irq(iommu->irq);
1011 }
1012
1013 kfree(iommu->domains);
1014 kfree(iommu->domain_ids);
1015
1016 /* free context mapping */
1017 free_context_table(iommu);
ba395927
KA
1018}
1019
1020static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1021{
1022 unsigned long num;
1023 unsigned long ndomains;
1024 struct dmar_domain *domain;
1025 unsigned long flags;
1026
1027 domain = alloc_domain_mem();
1028 if (!domain)
1029 return NULL;
1030
1031 ndomains = cap_ndoms(iommu->cap);
1032
1033 spin_lock_irqsave(&iommu->lock, flags);
1034 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1035 if (num >= ndomains) {
1036 spin_unlock_irqrestore(&iommu->lock, flags);
1037 free_domain_mem(domain);
1038 printk(KERN_ERR "IOMMU: no free domain ids\n");
1039 return NULL;
1040 }
1041
1042 set_bit(num, iommu->domain_ids);
1043 domain->id = num;
1044 domain->iommu = iommu;
1045 iommu->domains[num] = domain;
1046 spin_unlock_irqrestore(&iommu->lock, flags);
1047
1048 return domain;
1049}
1050
1051static void iommu_free_domain(struct dmar_domain *domain)
1052{
1053 unsigned long flags;
1054
1055 spin_lock_irqsave(&domain->iommu->lock, flags);
1056 clear_bit(domain->id, domain->iommu->domain_ids);
1057 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1058}
1059
1060static struct iova_domain reserved_iova_list;
8a443df4
MG
1061static struct lock_class_key reserved_alloc_key;
1062static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1063
1064static void dmar_init_reserved_ranges(void)
1065{
1066 struct pci_dev *pdev = NULL;
1067 struct iova *iova;
1068 int i;
1069 u64 addr, size;
1070
f661197e 1071 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1072
8a443df4
MG
1073 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1074 &reserved_alloc_key);
1075 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1076 &reserved_rbtree_key);
1077
ba395927
KA
1078 /* IOAPIC ranges shouldn't be accessed by DMA */
1079 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1080 IOVA_PFN(IOAPIC_RANGE_END));
1081 if (!iova)
1082 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1083
1084 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1085 for_each_pci_dev(pdev) {
1086 struct resource *r;
1087
1088 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1089 r = &pdev->resource[i];
1090 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1091 continue;
1092 addr = r->start;
5b6985ce 1093 addr &= PAGE_MASK;
ba395927 1094 size = r->end - addr;
5b6985ce 1095 size = PAGE_ALIGN(size);
ba395927
KA
1096 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1097 IOVA_PFN(size + addr) - 1);
1098 if (!iova)
1099 printk(KERN_ERR "Reserve iova failed\n");
1100 }
1101 }
1102
1103}
1104
1105static void domain_reserve_special_ranges(struct dmar_domain *domain)
1106{
1107 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1108}
1109
1110static inline int guestwidth_to_adjustwidth(int gaw)
1111{
1112 int agaw;
1113 int r = (gaw - 12) % 9;
1114
1115 if (r == 0)
1116 agaw = gaw;
1117 else
1118 agaw = gaw + 9 - r;
1119 if (agaw > 64)
1120 agaw = 64;
1121 return agaw;
1122}
1123
1124static int domain_init(struct dmar_domain *domain, int guest_width)
1125{
1126 struct intel_iommu *iommu;
1127 int adjust_width, agaw;
1128 unsigned long sagaw;
1129
f661197e 1130 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1131 spin_lock_init(&domain->mapping_lock);
1132
1133 domain_reserve_special_ranges(domain);
1134
1135 /* calculate AGAW */
1136 iommu = domain->iommu;
1137 if (guest_width > cap_mgaw(iommu->cap))
1138 guest_width = cap_mgaw(iommu->cap);
1139 domain->gaw = guest_width;
1140 adjust_width = guestwidth_to_adjustwidth(guest_width);
1141 agaw = width_to_agaw(adjust_width);
1142 sagaw = cap_sagaw(iommu->cap);
1143 if (!test_bit(agaw, &sagaw)) {
1144 /* hardware doesn't support it, choose a bigger one */
1145 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1146 agaw = find_next_bit(&sagaw, 5, agaw);
1147 if (agaw >= 5)
1148 return -ENODEV;
1149 }
1150 domain->agaw = agaw;
1151 INIT_LIST_HEAD(&domain->devices);
1152
1153 /* always allocate the top pgd */
1154 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1155 if (!domain->pgd)
1156 return -ENOMEM;
5b6985ce 1157 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1158 return 0;
1159}
1160
1161static void domain_exit(struct dmar_domain *domain)
1162{
1163 u64 end;
1164
1165 /* Domain 0 is reserved, so dont process it */
1166 if (!domain)
1167 return;
1168
1169 domain_remove_dev_info(domain);
1170 /* destroy iovas */
1171 put_iova_domain(&domain->iovad);
1172 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1173 end = end & (~PAGE_MASK);
ba395927
KA
1174
1175 /* clear ptes */
1176 dma_pte_clear_range(domain, 0, end);
1177
1178 /* free page tables */
1179 dma_pte_free_pagetable(domain, 0, end);
1180
1181 iommu_free_domain(domain);
1182 free_domain_mem(domain);
1183}
1184
1185static int domain_context_mapping_one(struct dmar_domain *domain,
1186 u8 bus, u8 devfn)
1187{
1188 struct context_entry *context;
1189 struct intel_iommu *iommu = domain->iommu;
1190 unsigned long flags;
1191
1192 pr_debug("Set context mapping for %02x:%02x.%d\n",
1193 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1194 BUG_ON(!domain->pgd);
1195 context = device_to_context_entry(iommu, bus, devfn);
1196 if (!context)
1197 return -ENOMEM;
1198 spin_lock_irqsave(&iommu->lock, flags);
1199 if (context_present(*context)) {
1200 spin_unlock_irqrestore(&iommu->lock, flags);
1201 return 0;
1202 }
1203
1204 context_set_domain_id(*context, domain->id);
1205 context_set_address_width(*context, domain->agaw);
1206 context_set_address_root(*context, virt_to_phys(domain->pgd));
1207 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1208 context_set_fault_enable(*context);
1209 context_set_present(*context);
1210 __iommu_flush_cache(iommu, context, sizeof(*context));
1211
1212 /* it's a non-present to present mapping */
a77b67d4
YS
1213 if (iommu->flush.flush_context(iommu, domain->id,
1214 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1215 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1216 iommu_flush_write_buffer(iommu);
1217 else
a77b67d4
YS
1218 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1219
ba395927
KA
1220 spin_unlock_irqrestore(&iommu->lock, flags);
1221 return 0;
1222}
1223
1224static int
1225domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1226{
1227 int ret;
1228 struct pci_dev *tmp, *parent;
1229
1230 ret = domain_context_mapping_one(domain, pdev->bus->number,
1231 pdev->devfn);
1232 if (ret)
1233 return ret;
1234
1235 /* dependent device mapping */
1236 tmp = pci_find_upstream_pcie_bridge(pdev);
1237 if (!tmp)
1238 return 0;
1239 /* Secondary interface's bus number and devfn 0 */
1240 parent = pdev->bus->self;
1241 while (parent != tmp) {
1242 ret = domain_context_mapping_one(domain, parent->bus->number,
1243 parent->devfn);
1244 if (ret)
1245 return ret;
1246 parent = parent->bus->self;
1247 }
1248 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1249 return domain_context_mapping_one(domain,
1250 tmp->subordinate->number, 0);
1251 else /* this is a legacy PCI bridge */
1252 return domain_context_mapping_one(domain,
1253 tmp->bus->number, tmp->devfn);
1254}
1255
1256static int domain_context_mapped(struct dmar_domain *domain,
1257 struct pci_dev *pdev)
1258{
1259 int ret;
1260 struct pci_dev *tmp, *parent;
1261
1262 ret = device_context_mapped(domain->iommu,
1263 pdev->bus->number, pdev->devfn);
1264 if (!ret)
1265 return ret;
1266 /* dependent device mapping */
1267 tmp = pci_find_upstream_pcie_bridge(pdev);
1268 if (!tmp)
1269 return ret;
1270 /* Secondary interface's bus number and devfn 0 */
1271 parent = pdev->bus->self;
1272 while (parent != tmp) {
1273 ret = device_context_mapped(domain->iommu, parent->bus->number,
1274 parent->devfn);
1275 if (!ret)
1276 return ret;
1277 parent = parent->bus->self;
1278 }
1279 if (tmp->is_pcie)
1280 return device_context_mapped(domain->iommu,
1281 tmp->subordinate->number, 0);
1282 else
1283 return device_context_mapped(domain->iommu,
1284 tmp->bus->number, tmp->devfn);
1285}
1286
1287static int
1288domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1289 u64 hpa, size_t size, int prot)
1290{
1291 u64 start_pfn, end_pfn;
1292 struct dma_pte *pte;
1293 int index;
5b6985ce
FY
1294 int addr_width = agaw_to_width(domain->agaw);
1295
1296 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1297
1298 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1299 return -EINVAL;
5b6985ce
FY
1300 iova &= PAGE_MASK;
1301 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1302 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1303 index = 0;
1304 while (start_pfn < end_pfn) {
5b6985ce 1305 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1306 if (!pte)
1307 return -ENOMEM;
1308 /* We don't need lock here, nobody else
1309 * touches the iova range
1310 */
1311 BUG_ON(dma_pte_addr(*pte));
5b6985ce 1312 dma_set_pte_addr(*pte, start_pfn << VTD_PAGE_SHIFT);
ba395927
KA
1313 dma_set_pte_prot(*pte, prot);
1314 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1315 start_pfn++;
1316 index++;
1317 }
1318 return 0;
1319}
1320
1321static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1322{
1323 clear_context_table(domain->iommu, bus, devfn);
a77b67d4
YS
1324 domain->iommu->flush.flush_context(domain->iommu, 0, 0, 0,
1325 DMA_CCMD_GLOBAL_INVL, 0);
1326 domain->iommu->flush.flush_iotlb(domain->iommu, 0, 0, 0,
1327 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1328}
1329
1330static void domain_remove_dev_info(struct dmar_domain *domain)
1331{
1332 struct device_domain_info *info;
1333 unsigned long flags;
1334
1335 spin_lock_irqsave(&device_domain_lock, flags);
1336 while (!list_empty(&domain->devices)) {
1337 info = list_entry(domain->devices.next,
1338 struct device_domain_info, link);
1339 list_del(&info->link);
1340 list_del(&info->global);
1341 if (info->dev)
358dd8ac 1342 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1343 spin_unlock_irqrestore(&device_domain_lock, flags);
1344
1345 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1346 free_devinfo_mem(info);
1347
1348 spin_lock_irqsave(&device_domain_lock, flags);
1349 }
1350 spin_unlock_irqrestore(&device_domain_lock, flags);
1351}
1352
1353/*
1354 * find_domain
358dd8ac 1355 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1356 */
38717946 1357static struct dmar_domain *
ba395927
KA
1358find_domain(struct pci_dev *pdev)
1359{
1360 struct device_domain_info *info;
1361
1362 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1363 info = pdev->dev.archdata.iommu;
ba395927
KA
1364 if (info)
1365 return info->domain;
1366 return NULL;
1367}
1368
ba395927
KA
1369/* domain is initialized */
1370static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1371{
1372 struct dmar_domain *domain, *found = NULL;
1373 struct intel_iommu *iommu;
1374 struct dmar_drhd_unit *drhd;
1375 struct device_domain_info *info, *tmp;
1376 struct pci_dev *dev_tmp;
1377 unsigned long flags;
1378 int bus = 0, devfn = 0;
1379
1380 domain = find_domain(pdev);
1381 if (domain)
1382 return domain;
1383
1384 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1385 if (dev_tmp) {
1386 if (dev_tmp->is_pcie) {
1387 bus = dev_tmp->subordinate->number;
1388 devfn = 0;
1389 } else {
1390 bus = dev_tmp->bus->number;
1391 devfn = dev_tmp->devfn;
1392 }
1393 spin_lock_irqsave(&device_domain_lock, flags);
1394 list_for_each_entry(info, &device_domain_list, global) {
1395 if (info->bus == bus && info->devfn == devfn) {
1396 found = info->domain;
1397 break;
1398 }
1399 }
1400 spin_unlock_irqrestore(&device_domain_lock, flags);
1401 /* pcie-pci bridge already has a domain, uses it */
1402 if (found) {
1403 domain = found;
1404 goto found_domain;
1405 }
1406 }
1407
1408 /* Allocate new domain for the device */
1409 drhd = dmar_find_matched_drhd_unit(pdev);
1410 if (!drhd) {
1411 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1412 pci_name(pdev));
1413 return NULL;
1414 }
1415 iommu = drhd->iommu;
1416
1417 domain = iommu_alloc_domain(iommu);
1418 if (!domain)
1419 goto error;
1420
1421 if (domain_init(domain, gaw)) {
1422 domain_exit(domain);
1423 goto error;
1424 }
1425
1426 /* register pcie-to-pci device */
1427 if (dev_tmp) {
1428 info = alloc_devinfo_mem();
1429 if (!info) {
1430 domain_exit(domain);
1431 goto error;
1432 }
1433 info->bus = bus;
1434 info->devfn = devfn;
1435 info->dev = NULL;
1436 info->domain = domain;
1437 /* This domain is shared by devices under p2p bridge */
1438 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1439
1440 /* pcie-to-pci bridge already has a domain, uses it */
1441 found = NULL;
1442 spin_lock_irqsave(&device_domain_lock, flags);
1443 list_for_each_entry(tmp, &device_domain_list, global) {
1444 if (tmp->bus == bus && tmp->devfn == devfn) {
1445 found = tmp->domain;
1446 break;
1447 }
1448 }
1449 if (found) {
1450 free_devinfo_mem(info);
1451 domain_exit(domain);
1452 domain = found;
1453 } else {
1454 list_add(&info->link, &domain->devices);
1455 list_add(&info->global, &device_domain_list);
1456 }
1457 spin_unlock_irqrestore(&device_domain_lock, flags);
1458 }
1459
1460found_domain:
1461 info = alloc_devinfo_mem();
1462 if (!info)
1463 goto error;
1464 info->bus = pdev->bus->number;
1465 info->devfn = pdev->devfn;
1466 info->dev = pdev;
1467 info->domain = domain;
1468 spin_lock_irqsave(&device_domain_lock, flags);
1469 /* somebody is fast */
1470 found = find_domain(pdev);
1471 if (found != NULL) {
1472 spin_unlock_irqrestore(&device_domain_lock, flags);
1473 if (found != domain) {
1474 domain_exit(domain);
1475 domain = found;
1476 }
1477 free_devinfo_mem(info);
1478 return domain;
1479 }
1480 list_add(&info->link, &domain->devices);
1481 list_add(&info->global, &device_domain_list);
358dd8ac 1482 pdev->dev.archdata.iommu = info;
ba395927
KA
1483 spin_unlock_irqrestore(&device_domain_lock, flags);
1484 return domain;
1485error:
1486 /* recheck it here, maybe others set it */
1487 return find_domain(pdev);
1488}
1489
5b6985ce
FY
1490static int iommu_prepare_identity_map(struct pci_dev *pdev,
1491 unsigned long long start,
1492 unsigned long long end)
ba395927
KA
1493{
1494 struct dmar_domain *domain;
1495 unsigned long size;
5b6985ce 1496 unsigned long long base;
ba395927
KA
1497 int ret;
1498
1499 printk(KERN_INFO
1500 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1501 pci_name(pdev), start, end);
1502 /* page table init */
1503 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1504 if (!domain)
1505 return -ENOMEM;
1506
1507 /* The address might not be aligned */
5b6985ce 1508 base = start & PAGE_MASK;
ba395927 1509 size = end - base;
5b6985ce 1510 size = PAGE_ALIGN(size);
ba395927
KA
1511 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1512 IOVA_PFN(base + size) - 1)) {
1513 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1514 ret = -ENOMEM;
1515 goto error;
1516 }
1517
1518 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1519 size, base, pci_name(pdev));
1520 /*
1521 * RMRR range might have overlap with physical memory range,
1522 * clear it first
1523 */
1524 dma_pte_clear_range(domain, base, base + size);
1525
1526 ret = domain_page_mapping(domain, base, base, size,
1527 DMA_PTE_READ|DMA_PTE_WRITE);
1528 if (ret)
1529 goto error;
1530
1531 /* context entry init */
1532 ret = domain_context_mapping(domain, pdev);
1533 if (!ret)
1534 return 0;
1535error:
1536 domain_exit(domain);
1537 return ret;
1538
1539}
1540
1541static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1542 struct pci_dev *pdev)
1543{
358dd8ac 1544 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1545 return 0;
1546 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1547 rmrr->end_address + 1);
1548}
1549
e820482c 1550#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1551struct iommu_prepare_data {
1552 struct pci_dev *pdev;
1553 int ret;
1554};
1555
1556static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1557 unsigned long end_pfn, void *datax)
1558{
1559 struct iommu_prepare_data *data;
1560
1561 data = (struct iommu_prepare_data *)datax;
1562
1563 data->ret = iommu_prepare_identity_map(data->pdev,
1564 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1565 return data->ret;
1566
1567}
1568
1569static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1570{
1571 int nid;
1572 struct iommu_prepare_data data;
1573
1574 data.pdev = pdev;
1575 data.ret = 0;
1576
1577 for_each_online_node(nid) {
1578 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1579 if (data.ret)
1580 return data.ret;
1581 }
1582 return data.ret;
1583}
1584
e820482c
KA
1585static void __init iommu_prepare_gfx_mapping(void)
1586{
1587 struct pci_dev *pdev = NULL;
e820482c
KA
1588 int ret;
1589
1590 for_each_pci_dev(pdev) {
358dd8ac 1591 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1592 !IS_GFX_DEVICE(pdev))
1593 continue;
1594 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1595 pci_name(pdev));
d52d53b8
YL
1596 ret = iommu_prepare_with_active_regions(pdev);
1597 if (ret)
1598 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1599 }
1600}
1601#endif
1602
49a0429e
KA
1603#ifdef CONFIG_DMAR_FLOPPY_WA
1604static inline void iommu_prepare_isa(void)
1605{
1606 struct pci_dev *pdev;
1607 int ret;
1608
1609 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1610 if (!pdev)
1611 return;
1612
1613 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1614 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1615
1616 if (ret)
1617 printk("IOMMU: Failed to create 0-64M identity map, "
1618 "floppy might not work\n");
1619
1620}
1621#else
1622static inline void iommu_prepare_isa(void)
1623{
1624 return;
1625}
1626#endif /* !CONFIG_DMAR_FLPY_WA */
1627
519a0549 1628static int __init init_dmars(void)
ba395927
KA
1629{
1630 struct dmar_drhd_unit *drhd;
1631 struct dmar_rmrr_unit *rmrr;
1632 struct pci_dev *pdev;
1633 struct intel_iommu *iommu;
80b20dd8 1634 int i, ret, unit = 0;
ba395927
KA
1635
1636 /*
1637 * for each drhd
1638 * allocate root
1639 * initialize and program root entry to not present
1640 * endfor
1641 */
1642 for_each_drhd_unit(drhd) {
5e0d2a6f 1643 g_num_of_iommus++;
1644 /*
1645 * lock not needed as this is only incremented in the single
1646 * threaded kernel __init code path all other access are read
1647 * only
1648 */
1649 }
1650
80b20dd8 1651 deferred_flush = kzalloc(g_num_of_iommus *
1652 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1653 if (!deferred_flush) {
5e0d2a6f 1654 ret = -ENOMEM;
1655 goto error;
1656 }
1657
5e0d2a6f 1658 for_each_drhd_unit(drhd) {
1659 if (drhd->ignored)
1660 continue;
1886e8a9
SS
1661
1662 iommu = drhd->iommu;
ba395927 1663
e61d98d8
SS
1664 ret = iommu_init_domains(iommu);
1665 if (ret)
1666 goto error;
1667
ba395927
KA
1668 /*
1669 * TBD:
1670 * we could share the same root & context tables
1671 * amoung all IOMMU's. Need to Split it later.
1672 */
1673 ret = iommu_alloc_root_entry(iommu);
1674 if (ret) {
1675 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1676 goto error;
1677 }
1678 }
1679
a77b67d4
YS
1680 for_each_drhd_unit(drhd) {
1681 if (drhd->ignored)
1682 continue;
1683
1684 iommu = drhd->iommu;
1685 if (dmar_enable_qi(iommu)) {
1686 /*
1687 * Queued Invalidate not enabled, use Register Based
1688 * Invalidate
1689 */
1690 iommu->flush.flush_context = __iommu_flush_context;
1691 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
1692 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
1693 "invalidation\n",
1694 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1695 } else {
1696 iommu->flush.flush_context = qi_flush_context;
1697 iommu->flush.flush_iotlb = qi_flush_iotlb;
1698 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
1699 "invalidation\n",
1700 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
1701 }
1702 }
1703
ba395927
KA
1704 /*
1705 * For each rmrr
1706 * for each dev attached to rmrr
1707 * do
1708 * locate drhd for dev, alloc domain for dev
1709 * allocate free domain
1710 * allocate page table entries for rmrr
1711 * if context not allocated for bus
1712 * allocate and init context
1713 * set present in root table for this bus
1714 * init context with domain, translation etc
1715 * endfor
1716 * endfor
1717 */
1718 for_each_rmrr_units(rmrr) {
ba395927
KA
1719 for (i = 0; i < rmrr->devices_cnt; i++) {
1720 pdev = rmrr->devices[i];
1721 /* some BIOS lists non-exist devices in DMAR table */
1722 if (!pdev)
1723 continue;
1724 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1725 if (ret)
1726 printk(KERN_ERR
1727 "IOMMU: mapping reserved region failed\n");
1728 }
1729 }
1730
e820482c
KA
1731 iommu_prepare_gfx_mapping();
1732
49a0429e
KA
1733 iommu_prepare_isa();
1734
ba395927
KA
1735 /*
1736 * for each drhd
1737 * enable fault log
1738 * global invalidate context cache
1739 * global invalidate iotlb
1740 * enable translation
1741 */
1742 for_each_drhd_unit(drhd) {
1743 if (drhd->ignored)
1744 continue;
1745 iommu = drhd->iommu;
1746 sprintf (iommu->name, "dmar%d", unit++);
1747
1748 iommu_flush_write_buffer(iommu);
1749
3460a6d9
KA
1750 ret = dmar_set_interrupt(iommu);
1751 if (ret)
1752 goto error;
1753
ba395927
KA
1754 iommu_set_root_entry(iommu);
1755
a77b67d4
YS
1756 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
1757 0);
1758 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
1759 0);
f8bab735 1760 iommu_disable_protect_mem_regions(iommu);
1761
ba395927
KA
1762 ret = iommu_enable_translation(iommu);
1763 if (ret)
1764 goto error;
1765 }
1766
1767 return 0;
1768error:
1769 for_each_drhd_unit(drhd) {
1770 if (drhd->ignored)
1771 continue;
1772 iommu = drhd->iommu;
1773 free_iommu(iommu);
1774 }
1775 return ret;
1776}
1777
1778static inline u64 aligned_size(u64 host_addr, size_t size)
1779{
1780 u64 addr;
5b6985ce
FY
1781 addr = (host_addr & (~PAGE_MASK)) + size;
1782 return PAGE_ALIGN(addr);
ba395927
KA
1783}
1784
1785struct iova *
f76aec76 1786iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1787{
ba395927
KA
1788 struct iova *piova;
1789
1790 /* Make sure it's in range */
ba395927 1791 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1792 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1793 return NULL;
1794
1795 piova = alloc_iova(&domain->iovad,
5b6985ce 1796 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
1797 return piova;
1798}
1799
f76aec76
KA
1800static struct iova *
1801__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 1802 size_t size, u64 dma_mask)
ba395927 1803{
ba395927 1804 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1805 struct iova *iova = NULL;
ba395927 1806
bb9e6d65
FT
1807 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
1808 iova = iommu_alloc_iova(domain, size, dma_mask);
1809 else {
ba395927
KA
1810 /*
1811 * First try to allocate an io virtual address in
1812 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1813 * from higher range
ba395927 1814 */
f76aec76 1815 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1816 if (!iova)
bb9e6d65 1817 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
1818 }
1819
1820 if (!iova) {
1821 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1822 return NULL;
1823 }
1824
1825 return iova;
1826}
1827
1828static struct dmar_domain *
1829get_valid_domain_for_dev(struct pci_dev *pdev)
1830{
1831 struct dmar_domain *domain;
1832 int ret;
1833
1834 domain = get_domain_for_dev(pdev,
1835 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1836 if (!domain) {
1837 printk(KERN_ERR
1838 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1839 return NULL;
ba395927
KA
1840 }
1841
1842 /* make sure context mapping is ok */
1843 if (unlikely(!domain_context_mapped(domain, pdev))) {
1844 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1845 if (ret) {
1846 printk(KERN_ERR
1847 "Domain context map for %s failed",
1848 pci_name(pdev));
4fe05bbc 1849 return NULL;
f76aec76 1850 }
ba395927
KA
1851 }
1852
f76aec76
KA
1853 return domain;
1854}
1855
bb9e6d65
FT
1856static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
1857 size_t size, int dir, u64 dma_mask)
f76aec76
KA
1858{
1859 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 1860 struct dmar_domain *domain;
5b6985ce 1861 phys_addr_t start_paddr;
f76aec76
KA
1862 struct iova *iova;
1863 int prot = 0;
6865f0d1 1864 int ret;
f76aec76
KA
1865
1866 BUG_ON(dir == DMA_NONE);
358dd8ac 1867 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 1868 return paddr;
f76aec76
KA
1869
1870 domain = get_valid_domain_for_dev(pdev);
1871 if (!domain)
1872 return 0;
1873
6865f0d1 1874 size = aligned_size((u64)paddr, size);
f76aec76 1875
bb9e6d65 1876 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
1877 if (!iova)
1878 goto error;
1879
5b6985ce 1880 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 1881
ba395927
KA
1882 /*
1883 * Check if DMAR supports zero-length reads on write only
1884 * mappings..
1885 */
1886 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1887 !cap_zlr(domain->iommu->cap))
1888 prot |= DMA_PTE_READ;
1889 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1890 prot |= DMA_PTE_WRITE;
1891 /*
6865f0d1 1892 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 1893 * page. Note: if two part of one page are separately mapped, we
6865f0d1 1894 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
1895 * is not a big problem
1896 */
6865f0d1 1897 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 1898 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
1899 if (ret)
1900 goto error;
1901
f76aec76
KA
1902 /* it's a non-present to present mapping */
1903 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 1904 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76
KA
1905 if (ret)
1906 iommu_flush_write_buffer(domain->iommu);
1907
5b6985ce 1908 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 1909
ba395927 1910error:
f76aec76
KA
1911 if (iova)
1912 __free_iova(&domain->iovad, iova);
ba395927 1913 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 1914 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
1915 return 0;
1916}
1917
bb9e6d65
FT
1918dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
1919 size_t size, int dir)
1920{
1921 return __intel_map_single(hwdev, paddr, size, dir,
1922 to_pci_dev(hwdev)->dma_mask);
1923}
1924
5e0d2a6f 1925static void flush_unmaps(void)
1926{
80b20dd8 1927 int i, j;
5e0d2a6f 1928
5e0d2a6f 1929 timer_on = 0;
1930
1931 /* just flush them all */
1932 for (i = 0; i < g_num_of_iommus; i++) {
80b20dd8 1933 if (deferred_flush[i].next) {
c42d9f32
SS
1934 struct intel_iommu *iommu =
1935 deferred_flush[i].domain[0]->iommu;
1936
a77b67d4
YS
1937 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1938 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 1939 for (j = 0; j < deferred_flush[i].next; j++) {
1940 __free_iova(&deferred_flush[i].domain[j]->iovad,
1941 deferred_flush[i].iova[j]);
1942 }
1943 deferred_flush[i].next = 0;
1944 }
5e0d2a6f 1945 }
1946
5e0d2a6f 1947 list_size = 0;
5e0d2a6f 1948}
1949
1950static void flush_unmaps_timeout(unsigned long data)
1951{
80b20dd8 1952 unsigned long flags;
1953
1954 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 1955 flush_unmaps();
80b20dd8 1956 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 1957}
1958
1959static void add_unmap(struct dmar_domain *dom, struct iova *iova)
1960{
1961 unsigned long flags;
80b20dd8 1962 int next, iommu_id;
5e0d2a6f 1963
1964 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 1965 if (list_size == HIGH_WATER_MARK)
1966 flush_unmaps();
1967
c42d9f32
SS
1968 iommu_id = dom->iommu->seq_id;
1969
80b20dd8 1970 next = deferred_flush[iommu_id].next;
1971 deferred_flush[iommu_id].domain[next] = dom;
1972 deferred_flush[iommu_id].iova[next] = iova;
1973 deferred_flush[iommu_id].next++;
5e0d2a6f 1974
1975 if (!timer_on) {
1976 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
1977 timer_on = 1;
1978 }
1979 list_size++;
1980 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
1981}
1982
5b6985ce
FY
1983void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
1984 int dir)
ba395927 1985{
ba395927 1986 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
1987 struct dmar_domain *domain;
1988 unsigned long start_addr;
ba395927
KA
1989 struct iova *iova;
1990
358dd8ac 1991 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 1992 return;
ba395927
KA
1993 domain = find_domain(pdev);
1994 BUG_ON(!domain);
1995
1996 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 1997 if (!iova)
ba395927 1998 return;
ba395927 1999
5b6985ce 2000 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2001 size = aligned_size((u64)dev_addr, size);
ba395927 2002
f76aec76 2003 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2004 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2005
f76aec76
KA
2006 /* clear the whole page */
2007 dma_pte_clear_range(domain, start_addr, start_addr + size);
2008 /* free page tables */
2009 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2010 if (intel_iommu_strict) {
2011 if (iommu_flush_iotlb_psi(domain->iommu,
5b6985ce 2012 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
5e0d2a6f 2013 iommu_flush_write_buffer(domain->iommu);
2014 /* free iova */
2015 __free_iova(&domain->iovad, iova);
2016 } else {
2017 add_unmap(domain, iova);
2018 /*
2019 * queue up the release of the unmap to save the 1/6th of the
2020 * cpu used up by the iotlb flush operation...
2021 */
5e0d2a6f 2022 }
ba395927
KA
2023}
2024
5b6985ce
FY
2025void *intel_alloc_coherent(struct device *hwdev, size_t size,
2026 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2027{
2028 void *vaddr;
2029 int order;
2030
5b6985ce 2031 size = PAGE_ALIGN(size);
ba395927
KA
2032 order = get_order(size);
2033 flags &= ~(GFP_DMA | GFP_DMA32);
2034
2035 vaddr = (void *)__get_free_pages(flags, order);
2036 if (!vaddr)
2037 return NULL;
2038 memset(vaddr, 0, size);
2039
bb9e6d65
FT
2040 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2041 DMA_BIDIRECTIONAL,
2042 hwdev->coherent_dma_mask);
ba395927
KA
2043 if (*dma_handle)
2044 return vaddr;
2045 free_pages((unsigned long)vaddr, order);
2046 return NULL;
2047}
2048
5b6985ce
FY
2049void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2050 dma_addr_t dma_handle)
ba395927
KA
2051{
2052 int order;
2053
5b6985ce 2054 size = PAGE_ALIGN(size);
ba395927
KA
2055 order = get_order(size);
2056
2057 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2058 free_pages((unsigned long)vaddr, order);
2059}
2060
12d4d40e 2061#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2062
2063void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2064 int nelems, int dir)
ba395927
KA
2065{
2066 int i;
2067 struct pci_dev *pdev = to_pci_dev(hwdev);
2068 struct dmar_domain *domain;
f76aec76
KA
2069 unsigned long start_addr;
2070 struct iova *iova;
2071 size_t size = 0;
2072 void *addr;
c03ab37c 2073 struct scatterlist *sg;
ba395927 2074
358dd8ac 2075 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2076 return;
2077
2078 domain = find_domain(pdev);
ba395927 2079
c03ab37c 2080 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2081 if (!iova)
2082 return;
c03ab37c 2083 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2084 addr = SG_ENT_VIRT_ADDRESS(sg);
2085 size += aligned_size((u64)addr, sg->length);
2086 }
2087
5b6985ce 2088 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2089
2090 /* clear the whole page */
2091 dma_pte_clear_range(domain, start_addr, start_addr + size);
2092 /* free page tables */
2093 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2094
2095 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
5b6985ce 2096 size >> VTD_PAGE_SHIFT, 0))
ba395927 2097 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2098
2099 /* free iova */
2100 __free_iova(&domain->iovad, iova);
ba395927
KA
2101}
2102
ba395927 2103static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2104 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2105{
2106 int i;
c03ab37c 2107 struct scatterlist *sg;
ba395927 2108
c03ab37c 2109 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2110 BUG_ON(!sg_page(sg));
c03ab37c
FT
2111 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2112 sg->dma_length = sg->length;
ba395927
KA
2113 }
2114 return nelems;
2115}
2116
5b6985ce
FY
2117int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2118 int dir)
ba395927
KA
2119{
2120 void *addr;
2121 int i;
ba395927
KA
2122 struct pci_dev *pdev = to_pci_dev(hwdev);
2123 struct dmar_domain *domain;
f76aec76
KA
2124 size_t size = 0;
2125 int prot = 0;
2126 size_t offset = 0;
2127 struct iova *iova = NULL;
2128 int ret;
c03ab37c 2129 struct scatterlist *sg;
f76aec76 2130 unsigned long start_addr;
ba395927
KA
2131
2132 BUG_ON(dir == DMA_NONE);
358dd8ac 2133 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2134 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2135
f76aec76
KA
2136 domain = get_valid_domain_for_dev(pdev);
2137 if (!domain)
2138 return 0;
2139
c03ab37c 2140 for_each_sg(sglist, sg, nelems, i) {
ba395927 2141 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2142 addr = (void *)virt_to_phys(addr);
2143 size += aligned_size((u64)addr, sg->length);
2144 }
2145
bb9e6d65 2146 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2147 if (!iova) {
c03ab37c 2148 sglist->dma_length = 0;
f76aec76
KA
2149 return 0;
2150 }
2151
2152 /*
2153 * Check if DMAR supports zero-length reads on write only
2154 * mappings..
2155 */
2156 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2157 !cap_zlr(domain->iommu->cap))
2158 prot |= DMA_PTE_READ;
2159 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2160 prot |= DMA_PTE_WRITE;
2161
5b6985ce 2162 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2163 offset = 0;
c03ab37c 2164 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2165 addr = SG_ENT_VIRT_ADDRESS(sg);
2166 addr = (void *)virt_to_phys(addr);
2167 size = aligned_size((u64)addr, sg->length);
2168 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2169 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2170 size, prot);
2171 if (ret) {
2172 /* clear the page */
2173 dma_pte_clear_range(domain, start_addr,
2174 start_addr + offset);
2175 /* free page tables */
2176 dma_pte_free_pagetable(domain, start_addr,
2177 start_addr + offset);
2178 /* free iova */
2179 __free_iova(&domain->iovad, iova);
ba395927
KA
2180 return 0;
2181 }
f76aec76 2182 sg->dma_address = start_addr + offset +
5b6985ce 2183 ((u64)addr & (~PAGE_MASK));
ba395927 2184 sg->dma_length = sg->length;
f76aec76 2185 offset += size;
ba395927
KA
2186 }
2187
ba395927 2188 /* it's a non-present to present mapping */
f76aec76 2189 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
5b6985ce 2190 start_addr, offset >> VTD_PAGE_SHIFT, 1))
ba395927
KA
2191 iommu_flush_write_buffer(domain->iommu);
2192 return nelems;
2193}
2194
2195static struct dma_mapping_ops intel_dma_ops = {
2196 .alloc_coherent = intel_alloc_coherent,
2197 .free_coherent = intel_free_coherent,
2198 .map_single = intel_map_single,
2199 .unmap_single = intel_unmap_single,
2200 .map_sg = intel_map_sg,
2201 .unmap_sg = intel_unmap_sg,
2202};
2203
2204static inline int iommu_domain_cache_init(void)
2205{
2206 int ret = 0;
2207
2208 iommu_domain_cache = kmem_cache_create("iommu_domain",
2209 sizeof(struct dmar_domain),
2210 0,
2211 SLAB_HWCACHE_ALIGN,
2212
2213 NULL);
2214 if (!iommu_domain_cache) {
2215 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2216 ret = -ENOMEM;
2217 }
2218
2219 return ret;
2220}
2221
2222static inline int iommu_devinfo_cache_init(void)
2223{
2224 int ret = 0;
2225
2226 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2227 sizeof(struct device_domain_info),
2228 0,
2229 SLAB_HWCACHE_ALIGN,
ba395927
KA
2230 NULL);
2231 if (!iommu_devinfo_cache) {
2232 printk(KERN_ERR "Couldn't create devinfo cache\n");
2233 ret = -ENOMEM;
2234 }
2235
2236 return ret;
2237}
2238
2239static inline int iommu_iova_cache_init(void)
2240{
2241 int ret = 0;
2242
2243 iommu_iova_cache = kmem_cache_create("iommu_iova",
2244 sizeof(struct iova),
2245 0,
2246 SLAB_HWCACHE_ALIGN,
ba395927
KA
2247 NULL);
2248 if (!iommu_iova_cache) {
2249 printk(KERN_ERR "Couldn't create iova cache\n");
2250 ret = -ENOMEM;
2251 }
2252
2253 return ret;
2254}
2255
2256static int __init iommu_init_mempool(void)
2257{
2258 int ret;
2259 ret = iommu_iova_cache_init();
2260 if (ret)
2261 return ret;
2262
2263 ret = iommu_domain_cache_init();
2264 if (ret)
2265 goto domain_error;
2266
2267 ret = iommu_devinfo_cache_init();
2268 if (!ret)
2269 return ret;
2270
2271 kmem_cache_destroy(iommu_domain_cache);
2272domain_error:
2273 kmem_cache_destroy(iommu_iova_cache);
2274
2275 return -ENOMEM;
2276}
2277
2278static void __init iommu_exit_mempool(void)
2279{
2280 kmem_cache_destroy(iommu_devinfo_cache);
2281 kmem_cache_destroy(iommu_domain_cache);
2282 kmem_cache_destroy(iommu_iova_cache);
2283
2284}
2285
ba395927
KA
2286static void __init init_no_remapping_devices(void)
2287{
2288 struct dmar_drhd_unit *drhd;
2289
2290 for_each_drhd_unit(drhd) {
2291 if (!drhd->include_all) {
2292 int i;
2293 for (i = 0; i < drhd->devices_cnt; i++)
2294 if (drhd->devices[i] != NULL)
2295 break;
2296 /* ignore DMAR unit if no pci devices exist */
2297 if (i == drhd->devices_cnt)
2298 drhd->ignored = 1;
2299 }
2300 }
2301
2302 if (dmar_map_gfx)
2303 return;
2304
2305 for_each_drhd_unit(drhd) {
2306 int i;
2307 if (drhd->ignored || drhd->include_all)
2308 continue;
2309
2310 for (i = 0; i < drhd->devices_cnt; i++)
2311 if (drhd->devices[i] &&
2312 !IS_GFX_DEVICE(drhd->devices[i]))
2313 break;
2314
2315 if (i < drhd->devices_cnt)
2316 continue;
2317
2318 /* bypass IOMMU if it is just for gfx devices */
2319 drhd->ignored = 1;
2320 for (i = 0; i < drhd->devices_cnt; i++) {
2321 if (!drhd->devices[i])
2322 continue;
358dd8ac 2323 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2324 }
2325 }
2326}
2327
2328int __init intel_iommu_init(void)
2329{
2330 int ret = 0;
2331
ba395927
KA
2332 if (dmar_table_init())
2333 return -ENODEV;
2334
1886e8a9
SS
2335 if (dmar_dev_scope_init())
2336 return -ENODEV;
2337
2ae21010
SS
2338 /*
2339 * Check the need for DMA-remapping initialization now.
2340 * Above initialization will also be used by Interrupt-remapping.
2341 */
2342 if (no_iommu || swiotlb || dmar_disabled)
2343 return -ENODEV;
2344
ba395927
KA
2345 iommu_init_mempool();
2346 dmar_init_reserved_ranges();
2347
2348 init_no_remapping_devices();
2349
2350 ret = init_dmars();
2351 if (ret) {
2352 printk(KERN_ERR "IOMMU: dmar init failed\n");
2353 put_iova_domain(&reserved_iova_list);
2354 iommu_exit_mempool();
2355 return ret;
2356 }
2357 printk(KERN_INFO
2358 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2359
5e0d2a6f 2360 init_timer(&unmap_timer);
ba395927
KA
2361 force_iommu = 1;
2362 dma_ops = &intel_dma_ops;
2363 return 0;
2364}
e820482c 2365
38717946
KA
2366void intel_iommu_domain_exit(struct dmar_domain *domain)
2367{
2368 u64 end;
2369
2370 /* Domain 0 is reserved, so dont process it */
2371 if (!domain)
2372 return;
2373
2374 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 2375 end = end & (~VTD_PAGE_MASK);
38717946
KA
2376
2377 /* clear ptes */
2378 dma_pte_clear_range(domain, 0, end);
2379
2380 /* free page tables */
2381 dma_pte_free_pagetable(domain, 0, end);
2382
2383 iommu_free_domain(domain);
2384 free_domain_mem(domain);
2385}
2386EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
2387
2388struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
2389{
2390 struct dmar_drhd_unit *drhd;
2391 struct dmar_domain *domain;
2392 struct intel_iommu *iommu;
2393
2394 drhd = dmar_find_matched_drhd_unit(pdev);
2395 if (!drhd) {
2396 printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
2397 return NULL;
2398 }
2399
2400 iommu = drhd->iommu;
2401 if (!iommu) {
2402 printk(KERN_ERR
2403 "intel_iommu_domain_alloc: iommu == NULL\n");
2404 return NULL;
2405 }
2406 domain = iommu_alloc_domain(iommu);
2407 if (!domain) {
2408 printk(KERN_ERR
2409 "intel_iommu_domain_alloc: domain == NULL\n");
2410 return NULL;
2411 }
2412 if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2413 printk(KERN_ERR
2414 "intel_iommu_domain_alloc: domain_init() failed\n");
2415 intel_iommu_domain_exit(domain);
2416 return NULL;
2417 }
2418 return domain;
2419}
2420EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
2421
2422int intel_iommu_context_mapping(
2423 struct dmar_domain *domain, struct pci_dev *pdev)
2424{
2425 int rc;
2426 rc = domain_context_mapping(domain, pdev);
2427 return rc;
2428}
2429EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
2430
2431int intel_iommu_page_mapping(
2432 struct dmar_domain *domain, dma_addr_t iova,
2433 u64 hpa, size_t size, int prot)
2434{
2435 int rc;
2436 rc = domain_page_mapping(domain, iova, hpa, size, prot);
2437 return rc;
2438}
2439EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
2440
2441void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
2442{
2443 detach_domain_for_dev(domain, bus, devfn);
2444}
2445EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
2446
2447struct dmar_domain *
2448intel_iommu_find_domain(struct pci_dev *pdev)
2449{
2450 return find_domain(pdev);
2451}
2452EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
2453
2454int intel_iommu_found(void)
2455{
2456 return g_num_of_iommus;
2457}
2458EXPORT_SYMBOL_GPL(intel_iommu_found);
2459
2460u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
2461{
2462 struct dma_pte *pte;
2463 u64 pfn;
2464
2465 pfn = 0;
2466 pte = addr_to_dma_pte(domain, iova);
2467
2468 if (pte)
2469 pfn = dma_pte_addr(*pte);
2470
5b6985ce 2471 return pfn >> VTD_PAGE_SHIFT;
38717946
KA
2472}
2473EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn);
This page took 0.270907 seconds and 5 git commands to generate.