copyright owner and author clean up for intel iommu and related files
[deliverable/linux.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
ba395927
KA
21 */
22
23#include <linux/init.h>
24#include <linux/bitmap.h>
25#include <linux/slab.h>
26#include <linux/irq.h>
27#include <linux/interrupt.h>
28#include <linux/sysdev.h>
29#include <linux/spinlock.h>
30#include <linux/pci.h>
31#include <linux/dmar.h>
32#include <linux/dma-mapping.h>
33#include <linux/mempool.h>
34#include "iova.h"
35#include "intel-iommu.h"
36#include <asm/proto.h> /* force_iommu in this header in x86-64*/
37#include <asm/cacheflush.h>
395624fc 38#include <asm/gart.h>
ba395927
KA
39#include "pci.h"
40
41#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
42#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
43
44#define IOAPIC_RANGE_START (0xfee00000)
45#define IOAPIC_RANGE_END (0xfeefffff)
46#define IOVA_START_ADDR (0x1000)
47
48#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
49
50#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
51
52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
53
54static void domain_remove_dev_info(struct dmar_domain *domain);
55
56static int dmar_disabled;
57static int __initdata dmar_map_gfx = 1;
7d3b03ce 58static int dmar_forcedac;
ba395927
KA
59
60#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
61static DEFINE_SPINLOCK(device_domain_lock);
62static LIST_HEAD(device_domain_list);
63
64static int __init intel_iommu_setup(char *str)
65{
66 if (!str)
67 return -EINVAL;
68 while (*str) {
69 if (!strncmp(str, "off", 3)) {
70 dmar_disabled = 1;
71 printk(KERN_INFO"Intel-IOMMU: disabled\n");
72 } else if (!strncmp(str, "igfx_off", 8)) {
73 dmar_map_gfx = 0;
74 printk(KERN_INFO
75 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce
KA
76 } else if (!strncmp(str, "forcedac", 8)) {
77 printk (KERN_INFO
78 "Intel-IOMMU: Forcing DAC for PCI devices\n");
79 dmar_forcedac = 1;
ba395927
KA
80 }
81
82 str += strcspn(str, ",");
83 while (*str == ',')
84 str++;
85 }
86 return 0;
87}
88__setup("intel_iommu=", intel_iommu_setup);
89
90static struct kmem_cache *iommu_domain_cache;
91static struct kmem_cache *iommu_devinfo_cache;
92static struct kmem_cache *iommu_iova_cache;
93
eb3fa7cb
KA
94static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
95{
96 unsigned int flags;
97 void *vaddr;
98
99 /* trying to avoid low memory issues */
100 flags = current->flags & PF_MEMALLOC;
101 current->flags |= PF_MEMALLOC;
102 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
103 current->flags &= (~PF_MEMALLOC | flags);
104 return vaddr;
105}
106
107
ba395927
KA
108static inline void *alloc_pgtable_page(void)
109{
eb3fa7cb
KA
110 unsigned int flags;
111 void *vaddr;
112
113 /* trying to avoid low memory issues */
114 flags = current->flags & PF_MEMALLOC;
115 current->flags |= PF_MEMALLOC;
116 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
117 current->flags &= (~PF_MEMALLOC | flags);
118 return vaddr;
ba395927
KA
119}
120
121static inline void free_pgtable_page(void *vaddr)
122{
123 free_page((unsigned long)vaddr);
124}
125
126static inline void *alloc_domain_mem(void)
127{
eb3fa7cb 128 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
129}
130
131static inline void free_domain_mem(void *vaddr)
132{
133 kmem_cache_free(iommu_domain_cache, vaddr);
134}
135
136static inline void * alloc_devinfo_mem(void)
137{
eb3fa7cb 138 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
139}
140
141static inline void free_devinfo_mem(void *vaddr)
142{
143 kmem_cache_free(iommu_devinfo_cache, vaddr);
144}
145
146struct iova *alloc_iova_mem(void)
147{
eb3fa7cb 148 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
149}
150
151void free_iova_mem(struct iova *iova)
152{
153 kmem_cache_free(iommu_iova_cache, iova);
154}
155
156static inline void __iommu_flush_cache(
157 struct intel_iommu *iommu, void *addr, int size)
158{
159 if (!ecap_coherent(iommu->ecap))
160 clflush_cache_range(addr, size);
161}
162
163/* Gets context entry for a given bus and devfn */
164static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
165 u8 bus, u8 devfn)
166{
167 struct root_entry *root;
168 struct context_entry *context;
169 unsigned long phy_addr;
170 unsigned long flags;
171
172 spin_lock_irqsave(&iommu->lock, flags);
173 root = &iommu->root_entry[bus];
174 context = get_context_addr_from_root(root);
175 if (!context) {
176 context = (struct context_entry *)alloc_pgtable_page();
177 if (!context) {
178 spin_unlock_irqrestore(&iommu->lock, flags);
179 return NULL;
180 }
181 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
182 phy_addr = virt_to_phys((void *)context);
183 set_root_value(root, phy_addr);
184 set_root_present(root);
185 __iommu_flush_cache(iommu, root, sizeof(*root));
186 }
187 spin_unlock_irqrestore(&iommu->lock, flags);
188 return &context[devfn];
189}
190
191static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
192{
193 struct root_entry *root;
194 struct context_entry *context;
195 int ret;
196 unsigned long flags;
197
198 spin_lock_irqsave(&iommu->lock, flags);
199 root = &iommu->root_entry[bus];
200 context = get_context_addr_from_root(root);
201 if (!context) {
202 ret = 0;
203 goto out;
204 }
205 ret = context_present(context[devfn]);
206out:
207 spin_unlock_irqrestore(&iommu->lock, flags);
208 return ret;
209}
210
211static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
212{
213 struct root_entry *root;
214 struct context_entry *context;
215 unsigned long flags;
216
217 spin_lock_irqsave(&iommu->lock, flags);
218 root = &iommu->root_entry[bus];
219 context = get_context_addr_from_root(root);
220 if (context) {
221 context_clear_entry(context[devfn]);
222 __iommu_flush_cache(iommu, &context[devfn], \
223 sizeof(*context));
224 }
225 spin_unlock_irqrestore(&iommu->lock, flags);
226}
227
228static void free_context_table(struct intel_iommu *iommu)
229{
230 struct root_entry *root;
231 int i;
232 unsigned long flags;
233 struct context_entry *context;
234
235 spin_lock_irqsave(&iommu->lock, flags);
236 if (!iommu->root_entry) {
237 goto out;
238 }
239 for (i = 0; i < ROOT_ENTRY_NR; i++) {
240 root = &iommu->root_entry[i];
241 context = get_context_addr_from_root(root);
242 if (context)
243 free_pgtable_page(context);
244 }
245 free_pgtable_page(iommu->root_entry);
246 iommu->root_entry = NULL;
247out:
248 spin_unlock_irqrestore(&iommu->lock, flags);
249}
250
251/* page table handling */
252#define LEVEL_STRIDE (9)
253#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
254
255static inline int agaw_to_level(int agaw)
256{
257 return agaw + 2;
258}
259
260static inline int agaw_to_width(int agaw)
261{
262 return 30 + agaw * LEVEL_STRIDE;
263
264}
265
266static inline int width_to_agaw(int width)
267{
268 return (width - 30) / LEVEL_STRIDE;
269}
270
271static inline unsigned int level_to_offset_bits(int level)
272{
273 return (12 + (level - 1) * LEVEL_STRIDE);
274}
275
276static inline int address_level_offset(u64 addr, int level)
277{
278 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
279}
280
281static inline u64 level_mask(int level)
282{
283 return ((u64)-1 << level_to_offset_bits(level));
284}
285
286static inline u64 level_size(int level)
287{
288 return ((u64)1 << level_to_offset_bits(level));
289}
290
291static inline u64 align_to_level(u64 addr, int level)
292{
293 return ((addr + level_size(level) - 1) & level_mask(level));
294}
295
296static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
297{
298 int addr_width = agaw_to_width(domain->agaw);
299 struct dma_pte *parent, *pte = NULL;
300 int level = agaw_to_level(domain->agaw);
301 int offset;
302 unsigned long flags;
303
304 BUG_ON(!domain->pgd);
305
306 addr &= (((u64)1) << addr_width) - 1;
307 parent = domain->pgd;
308
309 spin_lock_irqsave(&domain->mapping_lock, flags);
310 while (level > 0) {
311 void *tmp_page;
312
313 offset = address_level_offset(addr, level);
314 pte = &parent[offset];
315 if (level == 1)
316 break;
317
318 if (!dma_pte_present(*pte)) {
319 tmp_page = alloc_pgtable_page();
320
321 if (!tmp_page) {
322 spin_unlock_irqrestore(&domain->mapping_lock,
323 flags);
324 return NULL;
325 }
326 __iommu_flush_cache(domain->iommu, tmp_page,
327 PAGE_SIZE_4K);
328 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
329 /*
330 * high level table always sets r/w, last level page
331 * table control read/write
332 */
333 dma_set_pte_readable(*pte);
334 dma_set_pte_writable(*pte);
335 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
336 }
337 parent = phys_to_virt(dma_pte_addr(*pte));
338 level--;
339 }
340
341 spin_unlock_irqrestore(&domain->mapping_lock, flags);
342 return pte;
343}
344
345/* return address's pte at specific level */
346static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
347 int level)
348{
349 struct dma_pte *parent, *pte = NULL;
350 int total = agaw_to_level(domain->agaw);
351 int offset;
352
353 parent = domain->pgd;
354 while (level <= total) {
355 offset = address_level_offset(addr, total);
356 pte = &parent[offset];
357 if (level == total)
358 return pte;
359
360 if (!dma_pte_present(*pte))
361 break;
362 parent = phys_to_virt(dma_pte_addr(*pte));
363 total--;
364 }
365 return NULL;
366}
367
368/* clear one page's page table */
369static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
370{
371 struct dma_pte *pte = NULL;
372
373 /* get last level pte */
374 pte = dma_addr_level_pte(domain, addr, 1);
375
376 if (pte) {
377 dma_clear_pte(*pte);
378 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
379 }
380}
381
382/* clear last level pte, a tlb flush should be followed */
383static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
384{
385 int addr_width = agaw_to_width(domain->agaw);
386
387 start &= (((u64)1) << addr_width) - 1;
388 end &= (((u64)1) << addr_width) - 1;
389 /* in case it's partial page */
390 start = PAGE_ALIGN_4K(start);
391 end &= PAGE_MASK_4K;
392
393 /* we don't need lock here, nobody else touches the iova range */
394 while (start < end) {
395 dma_pte_clear_one(domain, start);
396 start += PAGE_SIZE_4K;
397 }
398}
399
400/* free page table pages. last level pte should already be cleared */
401static void dma_pte_free_pagetable(struct dmar_domain *domain,
402 u64 start, u64 end)
403{
404 int addr_width = agaw_to_width(domain->agaw);
405 struct dma_pte *pte;
406 int total = agaw_to_level(domain->agaw);
407 int level;
408 u64 tmp;
409
410 start &= (((u64)1) << addr_width) - 1;
411 end &= (((u64)1) << addr_width) - 1;
412
413 /* we don't need lock here, nobody else touches the iova range */
414 level = 2;
415 while (level <= total) {
416 tmp = align_to_level(start, level);
417 if (tmp >= end || (tmp + level_size(level) > end))
418 return;
419
420 while (tmp < end) {
421 pte = dma_addr_level_pte(domain, tmp, level);
422 if (pte) {
423 free_pgtable_page(
424 phys_to_virt(dma_pte_addr(*pte)));
425 dma_clear_pte(*pte);
426 __iommu_flush_cache(domain->iommu,
427 pte, sizeof(*pte));
428 }
429 tmp += level_size(level);
430 }
431 level++;
432 }
433 /* free pgd */
434 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
435 free_pgtable_page(domain->pgd);
436 domain->pgd = NULL;
437 }
438}
439
440/* iommu handling */
441static int iommu_alloc_root_entry(struct intel_iommu *iommu)
442{
443 struct root_entry *root;
444 unsigned long flags;
445
446 root = (struct root_entry *)alloc_pgtable_page();
447 if (!root)
448 return -ENOMEM;
449
450 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
451
452 spin_lock_irqsave(&iommu->lock, flags);
453 iommu->root_entry = root;
454 spin_unlock_irqrestore(&iommu->lock, flags);
455
456 return 0;
457}
458
459#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
460{\
461 unsigned long start_time = jiffies;\
462 while (1) {\
463 sts = op (iommu->reg + offset);\
464 if (cond)\
465 break;\
466 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
467 panic("DMAR hardware is malfunctioning\n");\
468 cpu_relax();\
469 }\
470}
471
472static void iommu_set_root_entry(struct intel_iommu *iommu)
473{
474 void *addr;
475 u32 cmd, sts;
476 unsigned long flag;
477
478 addr = iommu->root_entry;
479
480 spin_lock_irqsave(&iommu->register_lock, flag);
481 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
482
483 cmd = iommu->gcmd | DMA_GCMD_SRTP;
484 writel(cmd, iommu->reg + DMAR_GCMD_REG);
485
486 /* Make sure hardware complete it */
487 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
488 readl, (sts & DMA_GSTS_RTPS), sts);
489
490 spin_unlock_irqrestore(&iommu->register_lock, flag);
491}
492
493static void iommu_flush_write_buffer(struct intel_iommu *iommu)
494{
495 u32 val;
496 unsigned long flag;
497
498 if (!cap_rwbf(iommu->cap))
499 return;
500 val = iommu->gcmd | DMA_GCMD_WBF;
501
502 spin_lock_irqsave(&iommu->register_lock, flag);
503 writel(val, iommu->reg + DMAR_GCMD_REG);
504
505 /* Make sure hardware complete it */
506 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
507 readl, (!(val & DMA_GSTS_WBFS)), val);
508
509 spin_unlock_irqrestore(&iommu->register_lock, flag);
510}
511
512/* return value determine if we need a write buffer flush */
513static int __iommu_flush_context(struct intel_iommu *iommu,
514 u16 did, u16 source_id, u8 function_mask, u64 type,
515 int non_present_entry_flush)
516{
517 u64 val = 0;
518 unsigned long flag;
519
520 /*
521 * In the non-present entry flush case, if hardware doesn't cache
522 * non-present entry we do nothing and if hardware cache non-present
523 * entry, we flush entries of domain 0 (the domain id is used to cache
524 * any non-present entries)
525 */
526 if (non_present_entry_flush) {
527 if (!cap_caching_mode(iommu->cap))
528 return 1;
529 else
530 did = 0;
531 }
532
533 switch (type) {
534 case DMA_CCMD_GLOBAL_INVL:
535 val = DMA_CCMD_GLOBAL_INVL;
536 break;
537 case DMA_CCMD_DOMAIN_INVL:
538 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
539 break;
540 case DMA_CCMD_DEVICE_INVL:
541 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
542 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
543 break;
544 default:
545 BUG();
546 }
547 val |= DMA_CCMD_ICC;
548
549 spin_lock_irqsave(&iommu->register_lock, flag);
550 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
551
552 /* Make sure hardware complete it */
553 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
554 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
555
556 spin_unlock_irqrestore(&iommu->register_lock, flag);
557
558 /* flush context entry will implictly flush write buffer */
559 return 0;
560}
561
562static int inline iommu_flush_context_global(struct intel_iommu *iommu,
563 int non_present_entry_flush)
564{
565 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
566 non_present_entry_flush);
567}
568
569static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
570 int non_present_entry_flush)
571{
572 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
573 non_present_entry_flush);
574}
575
576static int inline iommu_flush_context_device(struct intel_iommu *iommu,
577 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
578{
579 return __iommu_flush_context(iommu, did, source_id, function_mask,
580 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
581}
582
583/* return value determine if we need a write buffer flush */
584static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
585 u64 addr, unsigned int size_order, u64 type,
586 int non_present_entry_flush)
587{
588 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
589 u64 val = 0, val_iva = 0;
590 unsigned long flag;
591
592 /*
593 * In the non-present entry flush case, if hardware doesn't cache
594 * non-present entry we do nothing and if hardware cache non-present
595 * entry, we flush entries of domain 0 (the domain id is used to cache
596 * any non-present entries)
597 */
598 if (non_present_entry_flush) {
599 if (!cap_caching_mode(iommu->cap))
600 return 1;
601 else
602 did = 0;
603 }
604
605 switch (type) {
606 case DMA_TLB_GLOBAL_FLUSH:
607 /* global flush doesn't need set IVA_REG */
608 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
609 break;
610 case DMA_TLB_DSI_FLUSH:
611 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
612 break;
613 case DMA_TLB_PSI_FLUSH:
614 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
615 /* Note: always flush non-leaf currently */
616 val_iva = size_order | addr;
617 break;
618 default:
619 BUG();
620 }
621 /* Note: set drain read/write */
622#if 0
623 /*
624 * This is probably to be super secure.. Looks like we can
625 * ignore it without any impact.
626 */
627 if (cap_read_drain(iommu->cap))
628 val |= DMA_TLB_READ_DRAIN;
629#endif
630 if (cap_write_drain(iommu->cap))
631 val |= DMA_TLB_WRITE_DRAIN;
632
633 spin_lock_irqsave(&iommu->register_lock, flag);
634 /* Note: Only uses first TLB reg currently */
635 if (val_iva)
636 dmar_writeq(iommu->reg + tlb_offset, val_iva);
637 dmar_writeq(iommu->reg + tlb_offset + 8, val);
638
639 /* Make sure hardware complete it */
640 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
641 dmar_readq, (!(val & DMA_TLB_IVT)), val);
642
643 spin_unlock_irqrestore(&iommu->register_lock, flag);
644
645 /* check IOTLB invalidation granularity */
646 if (DMA_TLB_IAIG(val) == 0)
647 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
648 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
649 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
650 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
651 /* flush context entry will implictly flush write buffer */
652 return 0;
653}
654
655static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
656 int non_present_entry_flush)
657{
658 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
659 non_present_entry_flush);
660}
661
662static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
663 int non_present_entry_flush)
664{
665 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
666 non_present_entry_flush);
667}
668
ba395927
KA
669static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
670 u64 addr, unsigned int pages, int non_present_entry_flush)
671{
f76aec76 672 unsigned int mask;
ba395927
KA
673
674 BUG_ON(addr & (~PAGE_MASK_4K));
675 BUG_ON(pages == 0);
676
677 /* Fallback to domain selective flush if no PSI support */
678 if (!cap_pgsel_inv(iommu->cap))
679 return iommu_flush_iotlb_dsi(iommu, did,
680 non_present_entry_flush);
681
682 /*
683 * PSI requires page size to be 2 ^ x, and the base address is naturally
684 * aligned to the size
685 */
f76aec76 686 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 687 /* Fallback to domain selective flush if size is too big */
f76aec76 688 if (mask > cap_max_amask_val(iommu->cap))
ba395927
KA
689 return iommu_flush_iotlb_dsi(iommu, did,
690 non_present_entry_flush);
691
f76aec76 692 return __iommu_flush_iotlb(iommu, did, addr, mask,
ba395927
KA
693 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
694}
695
f8bab735 696static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
697{
698 u32 pmen;
699 unsigned long flags;
700
701 spin_lock_irqsave(&iommu->register_lock, flags);
702 pmen = readl(iommu->reg + DMAR_PMEN_REG);
703 pmen &= ~DMA_PMEN_EPM;
704 writel(pmen, iommu->reg + DMAR_PMEN_REG);
705
706 /* wait for the protected region status bit to clear */
707 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
708 readl, !(pmen & DMA_PMEN_PRS), pmen);
709
710 spin_unlock_irqrestore(&iommu->register_lock, flags);
711}
712
ba395927
KA
713static int iommu_enable_translation(struct intel_iommu *iommu)
714{
715 u32 sts;
716 unsigned long flags;
717
718 spin_lock_irqsave(&iommu->register_lock, flags);
719 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
720
721 /* Make sure hardware complete it */
722 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723 readl, (sts & DMA_GSTS_TES), sts);
724
725 iommu->gcmd |= DMA_GCMD_TE;
726 spin_unlock_irqrestore(&iommu->register_lock, flags);
727 return 0;
728}
729
730static int iommu_disable_translation(struct intel_iommu *iommu)
731{
732 u32 sts;
733 unsigned long flag;
734
735 spin_lock_irqsave(&iommu->register_lock, flag);
736 iommu->gcmd &= ~DMA_GCMD_TE;
737 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
738
739 /* Make sure hardware complete it */
740 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
741 readl, (!(sts & DMA_GSTS_TES)), sts);
742
743 spin_unlock_irqrestore(&iommu->register_lock, flag);
744 return 0;
745}
746
3460a6d9
KA
747/* iommu interrupt handling. Most stuff are MSI-like. */
748
d94afc6c 749static const char *fault_reason_strings[] =
3460a6d9
KA
750{
751 "Software",
752 "Present bit in root entry is clear",
753 "Present bit in context entry is clear",
754 "Invalid context entry",
755 "Access beyond MGAW",
756 "PTE Write access is not set",
757 "PTE Read access is not set",
758 "Next page table ptr is invalid",
759 "Root table address invalid",
760 "Context table ptr is invalid",
761 "non-zero reserved fields in RTP",
762 "non-zero reserved fields in CTP",
763 "non-zero reserved fields in PTE",
3460a6d9 764};
f8bab735 765#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 766
d94afc6c 767const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 768{
d94afc6c 769 if (fault_reason > MAX_FAULT_REASON_IDX)
770 return "Unknown";
3460a6d9
KA
771 else
772 return fault_reason_strings[fault_reason];
773}
774
775void dmar_msi_unmask(unsigned int irq)
776{
777 struct intel_iommu *iommu = get_irq_data(irq);
778 unsigned long flag;
779
780 /* unmask it */
781 spin_lock_irqsave(&iommu->register_lock, flag);
782 writel(0, iommu->reg + DMAR_FECTL_REG);
783 /* Read a reg to force flush the post write */
784 readl(iommu->reg + DMAR_FECTL_REG);
785 spin_unlock_irqrestore(&iommu->register_lock, flag);
786}
787
788void dmar_msi_mask(unsigned int irq)
789{
790 unsigned long flag;
791 struct intel_iommu *iommu = get_irq_data(irq);
792
793 /* mask it */
794 spin_lock_irqsave(&iommu->register_lock, flag);
795 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
796 /* Read a reg to force flush the post write */
797 readl(iommu->reg + DMAR_FECTL_REG);
798 spin_unlock_irqrestore(&iommu->register_lock, flag);
799}
800
801void dmar_msi_write(int irq, struct msi_msg *msg)
802{
803 struct intel_iommu *iommu = get_irq_data(irq);
804 unsigned long flag;
805
806 spin_lock_irqsave(&iommu->register_lock, flag);
807 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
808 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
809 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
810 spin_unlock_irqrestore(&iommu->register_lock, flag);
811}
812
813void dmar_msi_read(int irq, struct msi_msg *msg)
814{
815 struct intel_iommu *iommu = get_irq_data(irq);
816 unsigned long flag;
817
818 spin_lock_irqsave(&iommu->register_lock, flag);
819 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
820 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
821 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
822 spin_unlock_irqrestore(&iommu->register_lock, flag);
823}
824
825static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
826 u8 fault_reason, u16 source_id, u64 addr)
827{
d94afc6c 828 const char *reason;
3460a6d9
KA
829
830 reason = dmar_get_fault_reason(fault_reason);
831
832 printk(KERN_ERR
833 "DMAR:[%s] Request device [%02x:%02x.%d] "
834 "fault addr %llx \n"
835 "DMAR:[fault reason %02d] %s\n",
836 (type ? "DMA Read" : "DMA Write"),
837 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
838 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
839 return 0;
840}
841
842#define PRIMARY_FAULT_REG_LEN (16)
843static irqreturn_t iommu_page_fault(int irq, void *dev_id)
844{
845 struct intel_iommu *iommu = dev_id;
846 int reg, fault_index;
847 u32 fault_status;
848 unsigned long flag;
849
850 spin_lock_irqsave(&iommu->register_lock, flag);
851 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
852
853 /* TBD: ignore advanced fault log currently */
854 if (!(fault_status & DMA_FSTS_PPF))
855 goto clear_overflow;
856
857 fault_index = dma_fsts_fault_record_index(fault_status);
858 reg = cap_fault_reg_offset(iommu->cap);
859 while (1) {
860 u8 fault_reason;
861 u16 source_id;
862 u64 guest_addr;
863 int type;
864 u32 data;
865
866 /* highest 32 bits */
867 data = readl(iommu->reg + reg +
868 fault_index * PRIMARY_FAULT_REG_LEN + 12);
869 if (!(data & DMA_FRCD_F))
870 break;
871
872 fault_reason = dma_frcd_fault_reason(data);
873 type = dma_frcd_type(data);
874
875 data = readl(iommu->reg + reg +
876 fault_index * PRIMARY_FAULT_REG_LEN + 8);
877 source_id = dma_frcd_source_id(data);
878
879 guest_addr = dmar_readq(iommu->reg + reg +
880 fault_index * PRIMARY_FAULT_REG_LEN);
881 guest_addr = dma_frcd_page_addr(guest_addr);
882 /* clear the fault */
883 writel(DMA_FRCD_F, iommu->reg + reg +
884 fault_index * PRIMARY_FAULT_REG_LEN + 12);
885
886 spin_unlock_irqrestore(&iommu->register_lock, flag);
887
888 iommu_page_fault_do_one(iommu, type, fault_reason,
889 source_id, guest_addr);
890
891 fault_index++;
892 if (fault_index > cap_num_fault_regs(iommu->cap))
893 fault_index = 0;
894 spin_lock_irqsave(&iommu->register_lock, flag);
895 }
896clear_overflow:
897 /* clear primary fault overflow */
898 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
899 if (fault_status & DMA_FSTS_PFO)
900 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
901
902 spin_unlock_irqrestore(&iommu->register_lock, flag);
903 return IRQ_HANDLED;
904}
905
906int dmar_set_interrupt(struct intel_iommu *iommu)
907{
908 int irq, ret;
909
910 irq = create_irq();
911 if (!irq) {
912 printk(KERN_ERR "IOMMU: no free vectors\n");
913 return -EINVAL;
914 }
915
916 set_irq_data(irq, iommu);
917 iommu->irq = irq;
918
919 ret = arch_setup_dmar_msi(irq);
920 if (ret) {
921 set_irq_data(irq, NULL);
922 iommu->irq = 0;
923 destroy_irq(irq);
924 return 0;
925 }
926
927 /* Force fault register is cleared */
928 iommu_page_fault(irq, iommu);
929
930 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
931 if (ret)
932 printk(KERN_ERR "IOMMU: can't request irq\n");
933 return ret;
934}
935
ba395927
KA
936static int iommu_init_domains(struct intel_iommu *iommu)
937{
938 unsigned long ndomains;
939 unsigned long nlongs;
940
941 ndomains = cap_ndoms(iommu->cap);
942 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
943 nlongs = BITS_TO_LONGS(ndomains);
944
945 /* TBD: there might be 64K domains,
946 * consider other allocation for future chip
947 */
948 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
949 if (!iommu->domain_ids) {
950 printk(KERN_ERR "Allocating domain id array failed\n");
951 return -ENOMEM;
952 }
953 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
954 GFP_KERNEL);
955 if (!iommu->domains) {
956 printk(KERN_ERR "Allocating domain array failed\n");
957 kfree(iommu->domain_ids);
958 return -ENOMEM;
959 }
960
961 /*
962 * if Caching mode is set, then invalid translations are tagged
963 * with domainid 0. Hence we need to pre-allocate it.
964 */
965 if (cap_caching_mode(iommu->cap))
966 set_bit(0, iommu->domain_ids);
967 return 0;
968}
969
970static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
971{
972 struct intel_iommu *iommu;
973 int ret;
974 int map_size;
975 u32 ver;
976
977 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
978 if (!iommu)
979 return NULL;
980 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
981 if (!iommu->reg) {
982 printk(KERN_ERR "IOMMU: can't map the region\n");
983 goto error;
984 }
985 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
986 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
987
988 /* the registers might be more than one page */
989 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
990 cap_max_fault_reg_offset(iommu->cap));
991 map_size = PAGE_ALIGN_4K(map_size);
992 if (map_size > PAGE_SIZE_4K) {
993 iounmap(iommu->reg);
994 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
995 if (!iommu->reg) {
996 printk(KERN_ERR "IOMMU: can't map the region\n");
997 goto error;
998 }
999 }
1000
1001 ver = readl(iommu->reg + DMAR_VER_REG);
1002 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1003 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1004 iommu->cap, iommu->ecap);
1005 ret = iommu_init_domains(iommu);
1006 if (ret)
1007 goto error_unmap;
1008 spin_lock_init(&iommu->lock);
1009 spin_lock_init(&iommu->register_lock);
1010
1011 drhd->iommu = iommu;
1012 return iommu;
1013error_unmap:
1014 iounmap(iommu->reg);
ba395927
KA
1015error:
1016 kfree(iommu);
1017 return NULL;
1018}
1019
1020static void domain_exit(struct dmar_domain *domain);
1021static void free_iommu(struct intel_iommu *iommu)
1022{
1023 struct dmar_domain *domain;
1024 int i;
1025
1026 if (!iommu)
1027 return;
1028
1029 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1030 for (; i < cap_ndoms(iommu->cap); ) {
1031 domain = iommu->domains[i];
1032 clear_bit(i, iommu->domain_ids);
1033 domain_exit(domain);
1034 i = find_next_bit(iommu->domain_ids,
1035 cap_ndoms(iommu->cap), i+1);
1036 }
1037
1038 if (iommu->gcmd & DMA_GCMD_TE)
1039 iommu_disable_translation(iommu);
1040
1041 if (iommu->irq) {
1042 set_irq_data(iommu->irq, NULL);
1043 /* This will mask the irq */
1044 free_irq(iommu->irq, iommu);
1045 destroy_irq(iommu->irq);
1046 }
1047
1048 kfree(iommu->domains);
1049 kfree(iommu->domain_ids);
1050
1051 /* free context mapping */
1052 free_context_table(iommu);
1053
1054 if (iommu->reg)
1055 iounmap(iommu->reg);
1056 kfree(iommu);
1057}
1058
1059static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1060{
1061 unsigned long num;
1062 unsigned long ndomains;
1063 struct dmar_domain *domain;
1064 unsigned long flags;
1065
1066 domain = alloc_domain_mem();
1067 if (!domain)
1068 return NULL;
1069
1070 ndomains = cap_ndoms(iommu->cap);
1071
1072 spin_lock_irqsave(&iommu->lock, flags);
1073 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1074 if (num >= ndomains) {
1075 spin_unlock_irqrestore(&iommu->lock, flags);
1076 free_domain_mem(domain);
1077 printk(KERN_ERR "IOMMU: no free domain ids\n");
1078 return NULL;
1079 }
1080
1081 set_bit(num, iommu->domain_ids);
1082 domain->id = num;
1083 domain->iommu = iommu;
1084 iommu->domains[num] = domain;
1085 spin_unlock_irqrestore(&iommu->lock, flags);
1086
1087 return domain;
1088}
1089
1090static void iommu_free_domain(struct dmar_domain *domain)
1091{
1092 unsigned long flags;
1093
1094 spin_lock_irqsave(&domain->iommu->lock, flags);
1095 clear_bit(domain->id, domain->iommu->domain_ids);
1096 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1097}
1098
1099static struct iova_domain reserved_iova_list;
1100
1101static void dmar_init_reserved_ranges(void)
1102{
1103 struct pci_dev *pdev = NULL;
1104 struct iova *iova;
1105 int i;
1106 u64 addr, size;
1107
f661197e 1108 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927
KA
1109
1110 /* IOAPIC ranges shouldn't be accessed by DMA */
1111 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1112 IOVA_PFN(IOAPIC_RANGE_END));
1113 if (!iova)
1114 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1115
1116 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1117 for_each_pci_dev(pdev) {
1118 struct resource *r;
1119
1120 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1121 r = &pdev->resource[i];
1122 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1123 continue;
1124 addr = r->start;
1125 addr &= PAGE_MASK_4K;
1126 size = r->end - addr;
1127 size = PAGE_ALIGN_4K(size);
1128 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1129 IOVA_PFN(size + addr) - 1);
1130 if (!iova)
1131 printk(KERN_ERR "Reserve iova failed\n");
1132 }
1133 }
1134
1135}
1136
1137static void domain_reserve_special_ranges(struct dmar_domain *domain)
1138{
1139 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1140}
1141
1142static inline int guestwidth_to_adjustwidth(int gaw)
1143{
1144 int agaw;
1145 int r = (gaw - 12) % 9;
1146
1147 if (r == 0)
1148 agaw = gaw;
1149 else
1150 agaw = gaw + 9 - r;
1151 if (agaw > 64)
1152 agaw = 64;
1153 return agaw;
1154}
1155
1156static int domain_init(struct dmar_domain *domain, int guest_width)
1157{
1158 struct intel_iommu *iommu;
1159 int adjust_width, agaw;
1160 unsigned long sagaw;
1161
f661197e 1162 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1163 spin_lock_init(&domain->mapping_lock);
1164
1165 domain_reserve_special_ranges(domain);
1166
1167 /* calculate AGAW */
1168 iommu = domain->iommu;
1169 if (guest_width > cap_mgaw(iommu->cap))
1170 guest_width = cap_mgaw(iommu->cap);
1171 domain->gaw = guest_width;
1172 adjust_width = guestwidth_to_adjustwidth(guest_width);
1173 agaw = width_to_agaw(adjust_width);
1174 sagaw = cap_sagaw(iommu->cap);
1175 if (!test_bit(agaw, &sagaw)) {
1176 /* hardware doesn't support it, choose a bigger one */
1177 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1178 agaw = find_next_bit(&sagaw, 5, agaw);
1179 if (agaw >= 5)
1180 return -ENODEV;
1181 }
1182 domain->agaw = agaw;
1183 INIT_LIST_HEAD(&domain->devices);
1184
1185 /* always allocate the top pgd */
1186 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1187 if (!domain->pgd)
1188 return -ENOMEM;
1189 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1190 return 0;
1191}
1192
1193static void domain_exit(struct dmar_domain *domain)
1194{
1195 u64 end;
1196
1197 /* Domain 0 is reserved, so dont process it */
1198 if (!domain)
1199 return;
1200
1201 domain_remove_dev_info(domain);
1202 /* destroy iovas */
1203 put_iova_domain(&domain->iovad);
1204 end = DOMAIN_MAX_ADDR(domain->gaw);
1205 end = end & (~PAGE_MASK_4K);
1206
1207 /* clear ptes */
1208 dma_pte_clear_range(domain, 0, end);
1209
1210 /* free page tables */
1211 dma_pte_free_pagetable(domain, 0, end);
1212
1213 iommu_free_domain(domain);
1214 free_domain_mem(domain);
1215}
1216
1217static int domain_context_mapping_one(struct dmar_domain *domain,
1218 u8 bus, u8 devfn)
1219{
1220 struct context_entry *context;
1221 struct intel_iommu *iommu = domain->iommu;
1222 unsigned long flags;
1223
1224 pr_debug("Set context mapping for %02x:%02x.%d\n",
1225 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1226 BUG_ON(!domain->pgd);
1227 context = device_to_context_entry(iommu, bus, devfn);
1228 if (!context)
1229 return -ENOMEM;
1230 spin_lock_irqsave(&iommu->lock, flags);
1231 if (context_present(*context)) {
1232 spin_unlock_irqrestore(&iommu->lock, flags);
1233 return 0;
1234 }
1235
1236 context_set_domain_id(*context, domain->id);
1237 context_set_address_width(*context, domain->agaw);
1238 context_set_address_root(*context, virt_to_phys(domain->pgd));
1239 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1240 context_set_fault_enable(*context);
1241 context_set_present(*context);
1242 __iommu_flush_cache(iommu, context, sizeof(*context));
1243
1244 /* it's a non-present to present mapping */
1245 if (iommu_flush_context_device(iommu, domain->id,
1246 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1247 iommu_flush_write_buffer(iommu);
1248 else
1249 iommu_flush_iotlb_dsi(iommu, 0, 0);
1250 spin_unlock_irqrestore(&iommu->lock, flags);
1251 return 0;
1252}
1253
1254static int
1255domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1256{
1257 int ret;
1258 struct pci_dev *tmp, *parent;
1259
1260 ret = domain_context_mapping_one(domain, pdev->bus->number,
1261 pdev->devfn);
1262 if (ret)
1263 return ret;
1264
1265 /* dependent device mapping */
1266 tmp = pci_find_upstream_pcie_bridge(pdev);
1267 if (!tmp)
1268 return 0;
1269 /* Secondary interface's bus number and devfn 0 */
1270 parent = pdev->bus->self;
1271 while (parent != tmp) {
1272 ret = domain_context_mapping_one(domain, parent->bus->number,
1273 parent->devfn);
1274 if (ret)
1275 return ret;
1276 parent = parent->bus->self;
1277 }
1278 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1279 return domain_context_mapping_one(domain,
1280 tmp->subordinate->number, 0);
1281 else /* this is a legacy PCI bridge */
1282 return domain_context_mapping_one(domain,
1283 tmp->bus->number, tmp->devfn);
1284}
1285
1286static int domain_context_mapped(struct dmar_domain *domain,
1287 struct pci_dev *pdev)
1288{
1289 int ret;
1290 struct pci_dev *tmp, *parent;
1291
1292 ret = device_context_mapped(domain->iommu,
1293 pdev->bus->number, pdev->devfn);
1294 if (!ret)
1295 return ret;
1296 /* dependent device mapping */
1297 tmp = pci_find_upstream_pcie_bridge(pdev);
1298 if (!tmp)
1299 return ret;
1300 /* Secondary interface's bus number and devfn 0 */
1301 parent = pdev->bus->self;
1302 while (parent != tmp) {
1303 ret = device_context_mapped(domain->iommu, parent->bus->number,
1304 parent->devfn);
1305 if (!ret)
1306 return ret;
1307 parent = parent->bus->self;
1308 }
1309 if (tmp->is_pcie)
1310 return device_context_mapped(domain->iommu,
1311 tmp->subordinate->number, 0);
1312 else
1313 return device_context_mapped(domain->iommu,
1314 tmp->bus->number, tmp->devfn);
1315}
1316
1317static int
1318domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1319 u64 hpa, size_t size, int prot)
1320{
1321 u64 start_pfn, end_pfn;
1322 struct dma_pte *pte;
1323 int index;
1324
1325 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1326 return -EINVAL;
1327 iova &= PAGE_MASK_4K;
1328 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1329 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1330 index = 0;
1331 while (start_pfn < end_pfn) {
1332 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1333 if (!pte)
1334 return -ENOMEM;
1335 /* We don't need lock here, nobody else
1336 * touches the iova range
1337 */
1338 BUG_ON(dma_pte_addr(*pte));
1339 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1340 dma_set_pte_prot(*pte, prot);
1341 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1342 start_pfn++;
1343 index++;
1344 }
1345 return 0;
1346}
1347
1348static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1349{
1350 clear_context_table(domain->iommu, bus, devfn);
1351 iommu_flush_context_global(domain->iommu, 0);
1352 iommu_flush_iotlb_global(domain->iommu, 0);
1353}
1354
1355static void domain_remove_dev_info(struct dmar_domain *domain)
1356{
1357 struct device_domain_info *info;
1358 unsigned long flags;
1359
1360 spin_lock_irqsave(&device_domain_lock, flags);
1361 while (!list_empty(&domain->devices)) {
1362 info = list_entry(domain->devices.next,
1363 struct device_domain_info, link);
1364 list_del(&info->link);
1365 list_del(&info->global);
1366 if (info->dev)
358dd8ac 1367 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1368 spin_unlock_irqrestore(&device_domain_lock, flags);
1369
1370 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1371 free_devinfo_mem(info);
1372
1373 spin_lock_irqsave(&device_domain_lock, flags);
1374 }
1375 spin_unlock_irqrestore(&device_domain_lock, flags);
1376}
1377
1378/*
1379 * find_domain
358dd8ac 1380 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927
KA
1381 */
1382struct dmar_domain *
1383find_domain(struct pci_dev *pdev)
1384{
1385 struct device_domain_info *info;
1386
1387 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1388 info = pdev->dev.archdata.iommu;
ba395927
KA
1389 if (info)
1390 return info->domain;
1391 return NULL;
1392}
1393
1394static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1395 struct pci_dev *dev)
1396{
1397 int index;
1398
1399 while (dev) {
1400 for (index = 0; index < cnt; index ++)
1401 if (dev == devices[index])
1402 return 1;
1403
1404 /* Check our parent */
1405 dev = dev->bus->self;
1406 }
1407
1408 return 0;
1409}
1410
1411static struct dmar_drhd_unit *
1412dmar_find_matched_drhd_unit(struct pci_dev *dev)
1413{
1414 struct dmar_drhd_unit *drhd = NULL;
1415
1416 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1417 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1418 drhd->devices_cnt, dev))
1419 return drhd;
1420 }
1421
1422 return NULL;
1423}
1424
1425/* domain is initialized */
1426static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1427{
1428 struct dmar_domain *domain, *found = NULL;
1429 struct intel_iommu *iommu;
1430 struct dmar_drhd_unit *drhd;
1431 struct device_domain_info *info, *tmp;
1432 struct pci_dev *dev_tmp;
1433 unsigned long flags;
1434 int bus = 0, devfn = 0;
1435
1436 domain = find_domain(pdev);
1437 if (domain)
1438 return domain;
1439
1440 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1441 if (dev_tmp) {
1442 if (dev_tmp->is_pcie) {
1443 bus = dev_tmp->subordinate->number;
1444 devfn = 0;
1445 } else {
1446 bus = dev_tmp->bus->number;
1447 devfn = dev_tmp->devfn;
1448 }
1449 spin_lock_irqsave(&device_domain_lock, flags);
1450 list_for_each_entry(info, &device_domain_list, global) {
1451 if (info->bus == bus && info->devfn == devfn) {
1452 found = info->domain;
1453 break;
1454 }
1455 }
1456 spin_unlock_irqrestore(&device_domain_lock, flags);
1457 /* pcie-pci bridge already has a domain, uses it */
1458 if (found) {
1459 domain = found;
1460 goto found_domain;
1461 }
1462 }
1463
1464 /* Allocate new domain for the device */
1465 drhd = dmar_find_matched_drhd_unit(pdev);
1466 if (!drhd) {
1467 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1468 pci_name(pdev));
1469 return NULL;
1470 }
1471 iommu = drhd->iommu;
1472
1473 domain = iommu_alloc_domain(iommu);
1474 if (!domain)
1475 goto error;
1476
1477 if (domain_init(domain, gaw)) {
1478 domain_exit(domain);
1479 goto error;
1480 }
1481
1482 /* register pcie-to-pci device */
1483 if (dev_tmp) {
1484 info = alloc_devinfo_mem();
1485 if (!info) {
1486 domain_exit(domain);
1487 goto error;
1488 }
1489 info->bus = bus;
1490 info->devfn = devfn;
1491 info->dev = NULL;
1492 info->domain = domain;
1493 /* This domain is shared by devices under p2p bridge */
1494 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1495
1496 /* pcie-to-pci bridge already has a domain, uses it */
1497 found = NULL;
1498 spin_lock_irqsave(&device_domain_lock, flags);
1499 list_for_each_entry(tmp, &device_domain_list, global) {
1500 if (tmp->bus == bus && tmp->devfn == devfn) {
1501 found = tmp->domain;
1502 break;
1503 }
1504 }
1505 if (found) {
1506 free_devinfo_mem(info);
1507 domain_exit(domain);
1508 domain = found;
1509 } else {
1510 list_add(&info->link, &domain->devices);
1511 list_add(&info->global, &device_domain_list);
1512 }
1513 spin_unlock_irqrestore(&device_domain_lock, flags);
1514 }
1515
1516found_domain:
1517 info = alloc_devinfo_mem();
1518 if (!info)
1519 goto error;
1520 info->bus = pdev->bus->number;
1521 info->devfn = pdev->devfn;
1522 info->dev = pdev;
1523 info->domain = domain;
1524 spin_lock_irqsave(&device_domain_lock, flags);
1525 /* somebody is fast */
1526 found = find_domain(pdev);
1527 if (found != NULL) {
1528 spin_unlock_irqrestore(&device_domain_lock, flags);
1529 if (found != domain) {
1530 domain_exit(domain);
1531 domain = found;
1532 }
1533 free_devinfo_mem(info);
1534 return domain;
1535 }
1536 list_add(&info->link, &domain->devices);
1537 list_add(&info->global, &device_domain_list);
358dd8ac 1538 pdev->dev.archdata.iommu = info;
ba395927
KA
1539 spin_unlock_irqrestore(&device_domain_lock, flags);
1540 return domain;
1541error:
1542 /* recheck it here, maybe others set it */
1543 return find_domain(pdev);
1544}
1545
1546static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1547{
1548 struct dmar_domain *domain;
1549 unsigned long size;
1550 u64 base;
1551 int ret;
1552
1553 printk(KERN_INFO
1554 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1555 pci_name(pdev), start, end);
1556 /* page table init */
1557 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1558 if (!domain)
1559 return -ENOMEM;
1560
1561 /* The address might not be aligned */
1562 base = start & PAGE_MASK_4K;
1563 size = end - base;
1564 size = PAGE_ALIGN_4K(size);
1565 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1566 IOVA_PFN(base + size) - 1)) {
1567 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1568 ret = -ENOMEM;
1569 goto error;
1570 }
1571
1572 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1573 size, base, pci_name(pdev));
1574 /*
1575 * RMRR range might have overlap with physical memory range,
1576 * clear it first
1577 */
1578 dma_pte_clear_range(domain, base, base + size);
1579
1580 ret = domain_page_mapping(domain, base, base, size,
1581 DMA_PTE_READ|DMA_PTE_WRITE);
1582 if (ret)
1583 goto error;
1584
1585 /* context entry init */
1586 ret = domain_context_mapping(domain, pdev);
1587 if (!ret)
1588 return 0;
1589error:
1590 domain_exit(domain);
1591 return ret;
1592
1593}
1594
1595static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1596 struct pci_dev *pdev)
1597{
358dd8ac 1598 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1599 return 0;
1600 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1601 rmrr->end_address + 1);
1602}
1603
e820482c
KA
1604#ifdef CONFIG_DMAR_GFX_WA
1605extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1606static void __init iommu_prepare_gfx_mapping(void)
1607{
1608 struct pci_dev *pdev = NULL;
1609 u64 base, size;
1610 int slot;
1611 int ret;
1612
1613 for_each_pci_dev(pdev) {
358dd8ac 1614 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1615 !IS_GFX_DEVICE(pdev))
1616 continue;
1617 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1618 pci_name(pdev));
1619 slot = arch_get_ram_range(0, &base, &size);
1620 while (slot >= 0) {
1621 ret = iommu_prepare_identity_map(pdev,
1622 base, base + size);
1623 if (ret)
1624 goto error;
1625 slot = arch_get_ram_range(slot, &base, &size);
1626 }
1627 continue;
1628error:
1629 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1630 }
1631}
1632#endif
1633
49a0429e
KA
1634#ifdef CONFIG_DMAR_FLOPPY_WA
1635static inline void iommu_prepare_isa(void)
1636{
1637 struct pci_dev *pdev;
1638 int ret;
1639
1640 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1641 if (!pdev)
1642 return;
1643
1644 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1645 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1646
1647 if (ret)
1648 printk("IOMMU: Failed to create 0-64M identity map, "
1649 "floppy might not work\n");
1650
1651}
1652#else
1653static inline void iommu_prepare_isa(void)
1654{
1655 return;
1656}
1657#endif /* !CONFIG_DMAR_FLPY_WA */
1658
ba395927
KA
1659int __init init_dmars(void)
1660{
1661 struct dmar_drhd_unit *drhd;
1662 struct dmar_rmrr_unit *rmrr;
1663 struct pci_dev *pdev;
1664 struct intel_iommu *iommu;
1665 int ret, unit = 0;
1666
1667 /*
1668 * for each drhd
1669 * allocate root
1670 * initialize and program root entry to not present
1671 * endfor
1672 */
1673 for_each_drhd_unit(drhd) {
1674 if (drhd->ignored)
1675 continue;
1676 iommu = alloc_iommu(drhd);
1677 if (!iommu) {
1678 ret = -ENOMEM;
1679 goto error;
1680 }
1681
1682 /*
1683 * TBD:
1684 * we could share the same root & context tables
1685 * amoung all IOMMU's. Need to Split it later.
1686 */
1687 ret = iommu_alloc_root_entry(iommu);
1688 if (ret) {
1689 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1690 goto error;
1691 }
1692 }
1693
1694 /*
1695 * For each rmrr
1696 * for each dev attached to rmrr
1697 * do
1698 * locate drhd for dev, alloc domain for dev
1699 * allocate free domain
1700 * allocate page table entries for rmrr
1701 * if context not allocated for bus
1702 * allocate and init context
1703 * set present in root table for this bus
1704 * init context with domain, translation etc
1705 * endfor
1706 * endfor
1707 */
1708 for_each_rmrr_units(rmrr) {
1709 int i;
1710 for (i = 0; i < rmrr->devices_cnt; i++) {
1711 pdev = rmrr->devices[i];
1712 /* some BIOS lists non-exist devices in DMAR table */
1713 if (!pdev)
1714 continue;
1715 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1716 if (ret)
1717 printk(KERN_ERR
1718 "IOMMU: mapping reserved region failed\n");
1719 }
1720 }
1721
e820482c
KA
1722 iommu_prepare_gfx_mapping();
1723
49a0429e
KA
1724 iommu_prepare_isa();
1725
ba395927
KA
1726 /*
1727 * for each drhd
1728 * enable fault log
1729 * global invalidate context cache
1730 * global invalidate iotlb
1731 * enable translation
1732 */
1733 for_each_drhd_unit(drhd) {
1734 if (drhd->ignored)
1735 continue;
1736 iommu = drhd->iommu;
1737 sprintf (iommu->name, "dmar%d", unit++);
1738
1739 iommu_flush_write_buffer(iommu);
1740
3460a6d9
KA
1741 ret = dmar_set_interrupt(iommu);
1742 if (ret)
1743 goto error;
1744
ba395927
KA
1745 iommu_set_root_entry(iommu);
1746
1747 iommu_flush_context_global(iommu, 0);
1748 iommu_flush_iotlb_global(iommu, 0);
1749
f8bab735 1750 iommu_disable_protect_mem_regions(iommu);
1751
ba395927
KA
1752 ret = iommu_enable_translation(iommu);
1753 if (ret)
1754 goto error;
1755 }
1756
1757 return 0;
1758error:
1759 for_each_drhd_unit(drhd) {
1760 if (drhd->ignored)
1761 continue;
1762 iommu = drhd->iommu;
1763 free_iommu(iommu);
1764 }
1765 return ret;
1766}
1767
1768static inline u64 aligned_size(u64 host_addr, size_t size)
1769{
1770 u64 addr;
1771 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1772 return PAGE_ALIGN_4K(addr);
1773}
1774
1775struct iova *
f76aec76 1776iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1777{
ba395927
KA
1778 struct iova *piova;
1779
1780 /* Make sure it's in range */
ba395927 1781 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1782 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1783 return NULL;
1784
1785 piova = alloc_iova(&domain->iovad,
f76aec76 1786 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
ba395927
KA
1787 return piova;
1788}
1789
f76aec76
KA
1790static struct iova *
1791__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1792 size_t size)
ba395927 1793{
ba395927 1794 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1795 struct iova *iova = NULL;
ba395927 1796
7d3b03ce 1797 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
f76aec76 1798 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1799 } else {
1800 /*
1801 * First try to allocate an io virtual address in
1802 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1803 * from higher range
ba395927 1804 */
f76aec76 1805 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1806 if (!iova)
f76aec76 1807 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1808 }
1809
1810 if (!iova) {
1811 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1812 return NULL;
1813 }
1814
1815 return iova;
1816}
1817
1818static struct dmar_domain *
1819get_valid_domain_for_dev(struct pci_dev *pdev)
1820{
1821 struct dmar_domain *domain;
1822 int ret;
1823
1824 domain = get_domain_for_dev(pdev,
1825 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1826 if (!domain) {
1827 printk(KERN_ERR
1828 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1829 return NULL;
ba395927
KA
1830 }
1831
1832 /* make sure context mapping is ok */
1833 if (unlikely(!domain_context_mapped(domain, pdev))) {
1834 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1835 if (ret) {
1836 printk(KERN_ERR
1837 "Domain context map for %s failed",
1838 pci_name(pdev));
4fe05bbc 1839 return NULL;
f76aec76 1840 }
ba395927
KA
1841 }
1842
f76aec76
KA
1843 return domain;
1844}
1845
1846static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1847 size_t size, int dir)
1848{
1849 struct pci_dev *pdev = to_pci_dev(hwdev);
1850 int ret;
1851 struct dmar_domain *domain;
1852 unsigned long start_addr;
1853 struct iova *iova;
1854 int prot = 0;
1855
1856 BUG_ON(dir == DMA_NONE);
358dd8ac 1857 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76
KA
1858 return virt_to_bus(addr);
1859
1860 domain = get_valid_domain_for_dev(pdev);
1861 if (!domain)
1862 return 0;
1863
1864 addr = (void *)virt_to_phys(addr);
1865 size = aligned_size((u64)addr, size);
1866
1867 iova = __intel_alloc_iova(hwdev, domain, size);
1868 if (!iova)
1869 goto error;
1870
1871 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1872
ba395927
KA
1873 /*
1874 * Check if DMAR supports zero-length reads on write only
1875 * mappings..
1876 */
1877 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1878 !cap_zlr(domain->iommu->cap))
1879 prot |= DMA_PTE_READ;
1880 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1881 prot |= DMA_PTE_WRITE;
1882 /*
1883 * addr - (addr + size) might be partial page, we should map the whole
1884 * page. Note: if two part of one page are separately mapped, we
1885 * might have two guest_addr mapping to the same host addr, but this
1886 * is not a big problem
1887 */
f76aec76
KA
1888 ret = domain_page_mapping(domain, start_addr,
1889 ((u64)addr) & PAGE_MASK_4K, size, prot);
ba395927
KA
1890 if (ret)
1891 goto error;
1892
1893 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1894 pci_name(pdev), size, (u64)addr,
f76aec76
KA
1895 size, (u64)start_addr, dir);
1896
1897 /* it's a non-present to present mapping */
1898 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1899 start_addr, size >> PAGE_SHIFT_4K, 1);
1900 if (ret)
1901 iommu_flush_write_buffer(domain->iommu);
1902
1903 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
ba395927 1904
ba395927 1905error:
f76aec76
KA
1906 if (iova)
1907 __free_iova(&domain->iovad, iova);
ba395927
KA
1908 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1909 pci_name(pdev), size, (u64)addr, dir);
1910 return 0;
1911}
1912
f76aec76 1913static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
ba395927
KA
1914 size_t size, int dir)
1915{
ba395927 1916 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
1917 struct dmar_domain *domain;
1918 unsigned long start_addr;
ba395927
KA
1919 struct iova *iova;
1920
358dd8ac 1921 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 1922 return;
ba395927
KA
1923 domain = find_domain(pdev);
1924 BUG_ON(!domain);
1925
1926 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 1927 if (!iova)
ba395927 1928 return;
ba395927 1929
f76aec76
KA
1930 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1931 size = aligned_size((u64)dev_addr, size);
ba395927 1932
f76aec76
KA
1933 pr_debug("Device %s unmapping: %lx@%llx\n",
1934 pci_name(pdev), size, (u64)start_addr);
ba395927 1935
f76aec76
KA
1936 /* clear the whole page */
1937 dma_pte_clear_range(domain, start_addr, start_addr + size);
1938 /* free page tables */
1939 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1940
1941 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1942 size >> PAGE_SHIFT_4K, 0))
ba395927 1943 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
1944
1945 /* free iova */
1946 __free_iova(&domain->iovad, iova);
ba395927
KA
1947}
1948
1949static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1950 dma_addr_t *dma_handle, gfp_t flags)
1951{
1952 void *vaddr;
1953 int order;
1954
1955 size = PAGE_ALIGN_4K(size);
1956 order = get_order(size);
1957 flags &= ~(GFP_DMA | GFP_DMA32);
1958
1959 vaddr = (void *)__get_free_pages(flags, order);
1960 if (!vaddr)
1961 return NULL;
1962 memset(vaddr, 0, size);
1963
1964 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1965 if (*dma_handle)
1966 return vaddr;
1967 free_pages((unsigned long)vaddr, order);
1968 return NULL;
1969}
1970
1971static void intel_free_coherent(struct device *hwdev, size_t size,
1972 void *vaddr, dma_addr_t dma_handle)
1973{
1974 int order;
1975
1976 size = PAGE_ALIGN_4K(size);
1977 order = get_order(size);
1978
1979 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1980 free_pages((unsigned long)vaddr, order);
1981}
1982
12d4d40e 1983#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
c03ab37c 1984static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
ba395927
KA
1985 int nelems, int dir)
1986{
1987 int i;
1988 struct pci_dev *pdev = to_pci_dev(hwdev);
1989 struct dmar_domain *domain;
f76aec76
KA
1990 unsigned long start_addr;
1991 struct iova *iova;
1992 size_t size = 0;
1993 void *addr;
c03ab37c 1994 struct scatterlist *sg;
ba395927 1995
358dd8ac 1996 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1997 return;
1998
1999 domain = find_domain(pdev);
ba395927 2000
c03ab37c 2001 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2002 if (!iova)
2003 return;
c03ab37c 2004 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2005 addr = SG_ENT_VIRT_ADDRESS(sg);
2006 size += aligned_size((u64)addr, sg->length);
2007 }
2008
2009 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2010
2011 /* clear the whole page */
2012 dma_pte_clear_range(domain, start_addr, start_addr + size);
2013 /* free page tables */
2014 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2015
2016 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2017 size >> PAGE_SHIFT_4K, 0))
ba395927 2018 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2019
2020 /* free iova */
2021 __free_iova(&domain->iovad, iova);
ba395927
KA
2022}
2023
ba395927 2024static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2025 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2026{
2027 int i;
c03ab37c 2028 struct scatterlist *sg;
ba395927 2029
c03ab37c 2030 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2031 BUG_ON(!sg_page(sg));
c03ab37c
FT
2032 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2033 sg->dma_length = sg->length;
ba395927
KA
2034 }
2035 return nelems;
2036}
2037
c03ab37c
FT
2038static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2039 int nelems, int dir)
ba395927
KA
2040{
2041 void *addr;
2042 int i;
ba395927
KA
2043 struct pci_dev *pdev = to_pci_dev(hwdev);
2044 struct dmar_domain *domain;
f76aec76
KA
2045 size_t size = 0;
2046 int prot = 0;
2047 size_t offset = 0;
2048 struct iova *iova = NULL;
2049 int ret;
c03ab37c 2050 struct scatterlist *sg;
f76aec76 2051 unsigned long start_addr;
ba395927
KA
2052
2053 BUG_ON(dir == DMA_NONE);
358dd8ac 2054 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2055 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2056
f76aec76
KA
2057 domain = get_valid_domain_for_dev(pdev);
2058 if (!domain)
2059 return 0;
2060
c03ab37c 2061 for_each_sg(sglist, sg, nelems, i) {
ba395927 2062 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2063 addr = (void *)virt_to_phys(addr);
2064 size += aligned_size((u64)addr, sg->length);
2065 }
2066
2067 iova = __intel_alloc_iova(hwdev, domain, size);
2068 if (!iova) {
c03ab37c 2069 sglist->dma_length = 0;
f76aec76
KA
2070 return 0;
2071 }
2072
2073 /*
2074 * Check if DMAR supports zero-length reads on write only
2075 * mappings..
2076 */
2077 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2078 !cap_zlr(domain->iommu->cap))
2079 prot |= DMA_PTE_READ;
2080 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2081 prot |= DMA_PTE_WRITE;
2082
2083 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2084 offset = 0;
c03ab37c 2085 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2086 addr = SG_ENT_VIRT_ADDRESS(sg);
2087 addr = (void *)virt_to_phys(addr);
2088 size = aligned_size((u64)addr, sg->length);
2089 ret = domain_page_mapping(domain, start_addr + offset,
2090 ((u64)addr) & PAGE_MASK_4K,
2091 size, prot);
2092 if (ret) {
2093 /* clear the page */
2094 dma_pte_clear_range(domain, start_addr,
2095 start_addr + offset);
2096 /* free page tables */
2097 dma_pte_free_pagetable(domain, start_addr,
2098 start_addr + offset);
2099 /* free iova */
2100 __free_iova(&domain->iovad, iova);
ba395927
KA
2101 return 0;
2102 }
f76aec76
KA
2103 sg->dma_address = start_addr + offset +
2104 ((u64)addr & (~PAGE_MASK_4K));
ba395927 2105 sg->dma_length = sg->length;
f76aec76 2106 offset += size;
ba395927
KA
2107 }
2108
ba395927 2109 /* it's a non-present to present mapping */
f76aec76
KA
2110 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2111 start_addr, offset >> PAGE_SHIFT_4K, 1))
ba395927
KA
2112 iommu_flush_write_buffer(domain->iommu);
2113 return nelems;
2114}
2115
2116static struct dma_mapping_ops intel_dma_ops = {
2117 .alloc_coherent = intel_alloc_coherent,
2118 .free_coherent = intel_free_coherent,
2119 .map_single = intel_map_single,
2120 .unmap_single = intel_unmap_single,
2121 .map_sg = intel_map_sg,
2122 .unmap_sg = intel_unmap_sg,
2123};
2124
2125static inline int iommu_domain_cache_init(void)
2126{
2127 int ret = 0;
2128
2129 iommu_domain_cache = kmem_cache_create("iommu_domain",
2130 sizeof(struct dmar_domain),
2131 0,
2132 SLAB_HWCACHE_ALIGN,
2133
2134 NULL);
2135 if (!iommu_domain_cache) {
2136 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2137 ret = -ENOMEM;
2138 }
2139
2140 return ret;
2141}
2142
2143static inline int iommu_devinfo_cache_init(void)
2144{
2145 int ret = 0;
2146
2147 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2148 sizeof(struct device_domain_info),
2149 0,
2150 SLAB_HWCACHE_ALIGN,
2151
2152 NULL);
2153 if (!iommu_devinfo_cache) {
2154 printk(KERN_ERR "Couldn't create devinfo cache\n");
2155 ret = -ENOMEM;
2156 }
2157
2158 return ret;
2159}
2160
2161static inline int iommu_iova_cache_init(void)
2162{
2163 int ret = 0;
2164
2165 iommu_iova_cache = kmem_cache_create("iommu_iova",
2166 sizeof(struct iova),
2167 0,
2168 SLAB_HWCACHE_ALIGN,
2169
2170 NULL);
2171 if (!iommu_iova_cache) {
2172 printk(KERN_ERR "Couldn't create iova cache\n");
2173 ret = -ENOMEM;
2174 }
2175
2176 return ret;
2177}
2178
2179static int __init iommu_init_mempool(void)
2180{
2181 int ret;
2182 ret = iommu_iova_cache_init();
2183 if (ret)
2184 return ret;
2185
2186 ret = iommu_domain_cache_init();
2187 if (ret)
2188 goto domain_error;
2189
2190 ret = iommu_devinfo_cache_init();
2191 if (!ret)
2192 return ret;
2193
2194 kmem_cache_destroy(iommu_domain_cache);
2195domain_error:
2196 kmem_cache_destroy(iommu_iova_cache);
2197
2198 return -ENOMEM;
2199}
2200
2201static void __init iommu_exit_mempool(void)
2202{
2203 kmem_cache_destroy(iommu_devinfo_cache);
2204 kmem_cache_destroy(iommu_domain_cache);
2205 kmem_cache_destroy(iommu_iova_cache);
2206
2207}
2208
2209void __init detect_intel_iommu(void)
2210{
2211 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2212 return;
2213 if (early_dmar_detect()) {
2214 iommu_detected = 1;
2215 }
2216}
2217
2218static void __init init_no_remapping_devices(void)
2219{
2220 struct dmar_drhd_unit *drhd;
2221
2222 for_each_drhd_unit(drhd) {
2223 if (!drhd->include_all) {
2224 int i;
2225 for (i = 0; i < drhd->devices_cnt; i++)
2226 if (drhd->devices[i] != NULL)
2227 break;
2228 /* ignore DMAR unit if no pci devices exist */
2229 if (i == drhd->devices_cnt)
2230 drhd->ignored = 1;
2231 }
2232 }
2233
2234 if (dmar_map_gfx)
2235 return;
2236
2237 for_each_drhd_unit(drhd) {
2238 int i;
2239 if (drhd->ignored || drhd->include_all)
2240 continue;
2241
2242 for (i = 0; i < drhd->devices_cnt; i++)
2243 if (drhd->devices[i] &&
2244 !IS_GFX_DEVICE(drhd->devices[i]))
2245 break;
2246
2247 if (i < drhd->devices_cnt)
2248 continue;
2249
2250 /* bypass IOMMU if it is just for gfx devices */
2251 drhd->ignored = 1;
2252 for (i = 0; i < drhd->devices_cnt; i++) {
2253 if (!drhd->devices[i])
2254 continue;
358dd8ac 2255 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2256 }
2257 }
2258}
2259
2260int __init intel_iommu_init(void)
2261{
2262 int ret = 0;
2263
2264 if (no_iommu || swiotlb || dmar_disabled)
2265 return -ENODEV;
2266
2267 if (dmar_table_init())
2268 return -ENODEV;
2269
2270 iommu_init_mempool();
2271 dmar_init_reserved_ranges();
2272
2273 init_no_remapping_devices();
2274
2275 ret = init_dmars();
2276 if (ret) {
2277 printk(KERN_ERR "IOMMU: dmar init failed\n");
2278 put_iova_domain(&reserved_iova_list);
2279 iommu_exit_mempool();
2280 return ret;
2281 }
2282 printk(KERN_INFO
2283 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2284
2285 force_iommu = 1;
2286 dma_ops = &intel_dma_ops;
2287 return 0;
2288}
e820482c 2289
This page took 0.213124 seconds and 5 git commands to generate.