futex: fix reference leak
[deliverable/linux.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
ba395927 39#include <asm/cacheflush.h>
46a7fa27 40#include <asm/iommu.h>
ba395927
KA
41#include "pci.h"
42
5b6985ce
FY
43#define ROOT_SIZE VTD_PAGE_SIZE
44#define CONTEXT_SIZE VTD_PAGE_SIZE
45
ba395927
KA
46#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
47#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
48
49#define IOAPIC_RANGE_START (0xfee00000)
50#define IOAPIC_RANGE_END (0xfeefffff)
51#define IOVA_START_ADDR (0x1000)
52
53#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
54
ba395927
KA
55#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
56
f27be03b
MM
57#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
58#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
59#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
5e0d2a6f 60
d9630fe9
WH
61/* global iommu list, set NULL for ignored DMAR units */
62static struct intel_iommu **g_iommus;
63
46b08e1a
MM
64/*
65 * 0: Present
66 * 1-11: Reserved
67 * 12-63: Context Ptr (12 - (haw-1))
68 * 64-127: Reserved
69 */
70struct root_entry {
71 u64 val;
72 u64 rsvd1;
73};
74#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
75static inline bool root_present(struct root_entry *root)
76{
77 return (root->val & 1);
78}
79static inline void set_root_present(struct root_entry *root)
80{
81 root->val |= 1;
82}
83static inline void set_root_value(struct root_entry *root, unsigned long value)
84{
85 root->val |= value & VTD_PAGE_MASK;
86}
87
88static inline struct context_entry *
89get_context_addr_from_root(struct root_entry *root)
90{
91 return (struct context_entry *)
92 (root_present(root)?phys_to_virt(
93 root->val & VTD_PAGE_MASK) :
94 NULL);
95}
96
7a8fc25e
MM
97/*
98 * low 64 bits:
99 * 0: present
100 * 1: fault processing disable
101 * 2-3: translation type
102 * 12-63: address space root
103 * high 64 bits:
104 * 0-2: address width
105 * 3-6: aval
106 * 8-23: domain id
107 */
108struct context_entry {
109 u64 lo;
110 u64 hi;
111};
c07e7d21
MM
112
113static inline bool context_present(struct context_entry *context)
114{
115 return (context->lo & 1);
116}
117static inline void context_set_present(struct context_entry *context)
118{
119 context->lo |= 1;
120}
121
122static inline void context_set_fault_enable(struct context_entry *context)
123{
124 context->lo &= (((u64)-1) << 2) | 1;
125}
126
7a8fc25e 127#define CONTEXT_TT_MULTI_LEVEL 0
c07e7d21
MM
128
129static inline void context_set_translation_type(struct context_entry *context,
130 unsigned long value)
131{
132 context->lo &= (((u64)-1) << 4) | 3;
133 context->lo |= (value & 3) << 2;
134}
135
136static inline void context_set_address_root(struct context_entry *context,
137 unsigned long value)
138{
139 context->lo |= value & VTD_PAGE_MASK;
140}
141
142static inline void context_set_address_width(struct context_entry *context,
143 unsigned long value)
144{
145 context->hi |= value & 7;
146}
147
148static inline void context_set_domain_id(struct context_entry *context,
149 unsigned long value)
150{
151 context->hi |= (value & ((1 << 16) - 1)) << 8;
152}
153
154static inline void context_clear_entry(struct context_entry *context)
155{
156 context->lo = 0;
157 context->hi = 0;
158}
7a8fc25e 159
622ba12a
MM
160/*
161 * 0: readable
162 * 1: writable
163 * 2-6: reserved
164 * 7: super page
165 * 8-11: available
166 * 12-63: Host physcial address
167 */
168struct dma_pte {
169 u64 val;
170};
622ba12a 171
19c239ce
MM
172static inline void dma_clear_pte(struct dma_pte *pte)
173{
174 pte->val = 0;
175}
176
177static inline void dma_set_pte_readable(struct dma_pte *pte)
178{
179 pte->val |= DMA_PTE_READ;
180}
181
182static inline void dma_set_pte_writable(struct dma_pte *pte)
183{
184 pte->val |= DMA_PTE_WRITE;
185}
186
187static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
188{
189 pte->val = (pte->val & ~3) | (prot & 3);
190}
191
192static inline u64 dma_pte_addr(struct dma_pte *pte)
193{
194 return (pte->val & VTD_PAGE_MASK);
195}
196
197static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
198{
199 pte->val |= (addr & VTD_PAGE_MASK);
200}
201
202static inline bool dma_pte_present(struct dma_pte *pte)
203{
204 return (pte->val & 3) != 0;
205}
622ba12a 206
3b5410e7 207/* devices under the same p2p bridge are owned in one domain */
cdc7b837 208#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 209
1ce28feb
WH
210/* domain represents a virtual machine, more than one devices
211 * across iommus may be owned in one domain, e.g. kvm guest.
212 */
213#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
214
99126f7c
MM
215struct dmar_domain {
216 int id; /* domain id */
8c11e798 217 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
218
219 struct list_head devices; /* all devices' list */
220 struct iova_domain iovad; /* iova's that belong to this domain */
221
222 struct dma_pte *pgd; /* virtual address */
223 spinlock_t mapping_lock; /* page table lock */
224 int gaw; /* max guest address width */
225
226 /* adjusted guest address width, 0 is level 2 30-bit */
227 int agaw;
228
3b5410e7 229 int flags; /* flags to find out type of domain */
8e604097
WH
230
231 int iommu_coherency;/* indicate coherency of iommu access */
c7151a8d
WH
232 int iommu_count; /* reference count of iommu */
233 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 234 u64 max_addr; /* maximum mapped address */
99126f7c
MM
235};
236
a647dacb
MM
237/* PCI domain-device relationship */
238struct device_domain_info {
239 struct list_head link; /* link to domain siblings */
240 struct list_head global; /* link to global list */
241 u8 bus; /* PCI bus numer */
242 u8 devfn; /* PCI devfn number */
243 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
244 struct dmar_domain *domain; /* pointer to domain */
245};
246
5e0d2a6f 247static void flush_unmaps_timeout(unsigned long data);
248
249DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
250
80b20dd8 251#define HIGH_WATER_MARK 250
252struct deferred_flush_tables {
253 int next;
254 struct iova *iova[HIGH_WATER_MARK];
255 struct dmar_domain *domain[HIGH_WATER_MARK];
256};
257
258static struct deferred_flush_tables *deferred_flush;
259
5e0d2a6f 260/* bitmap for indexing intel_iommus */
5e0d2a6f 261static int g_num_of_iommus;
262
263static DEFINE_SPINLOCK(async_umap_flush_lock);
264static LIST_HEAD(unmaps_to_do);
265
266static int timer_on;
267static long list_size;
5e0d2a6f 268
ba395927
KA
269static void domain_remove_dev_info(struct dmar_domain *domain);
270
0cd5c3c8
KM
271#ifdef CONFIG_DMAR_DEFAULT_ON
272int dmar_disabled = 0;
273#else
274int dmar_disabled = 1;
275#endif /*CONFIG_DMAR_DEFAULT_ON*/
276
ba395927 277static int __initdata dmar_map_gfx = 1;
7d3b03ce 278static int dmar_forcedac;
5e0d2a6f 279static int intel_iommu_strict;
ba395927
KA
280
281#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
282static DEFINE_SPINLOCK(device_domain_lock);
283static LIST_HEAD(device_domain_list);
284
a8bcbb0d
JR
285static struct iommu_ops intel_iommu_ops;
286
ba395927
KA
287static int __init intel_iommu_setup(char *str)
288{
289 if (!str)
290 return -EINVAL;
291 while (*str) {
0cd5c3c8
KM
292 if (!strncmp(str, "on", 2)) {
293 dmar_disabled = 0;
294 printk(KERN_INFO "Intel-IOMMU: enabled\n");
295 } else if (!strncmp(str, "off", 3)) {
ba395927 296 dmar_disabled = 1;
0cd5c3c8 297 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
298 } else if (!strncmp(str, "igfx_off", 8)) {
299 dmar_map_gfx = 0;
300 printk(KERN_INFO
301 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 302 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 303 printk(KERN_INFO
7d3b03ce
KA
304 "Intel-IOMMU: Forcing DAC for PCI devices\n");
305 dmar_forcedac = 1;
5e0d2a6f 306 } else if (!strncmp(str, "strict", 6)) {
307 printk(KERN_INFO
308 "Intel-IOMMU: disable batched IOTLB flush\n");
309 intel_iommu_strict = 1;
ba395927
KA
310 }
311
312 str += strcspn(str, ",");
313 while (*str == ',')
314 str++;
315 }
316 return 0;
317}
318__setup("intel_iommu=", intel_iommu_setup);
319
320static struct kmem_cache *iommu_domain_cache;
321static struct kmem_cache *iommu_devinfo_cache;
322static struct kmem_cache *iommu_iova_cache;
323
eb3fa7cb
KA
324static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
325{
326 unsigned int flags;
327 void *vaddr;
328
329 /* trying to avoid low memory issues */
330 flags = current->flags & PF_MEMALLOC;
331 current->flags |= PF_MEMALLOC;
332 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
333 current->flags &= (~PF_MEMALLOC | flags);
334 return vaddr;
335}
336
337
ba395927
KA
338static inline void *alloc_pgtable_page(void)
339{
eb3fa7cb
KA
340 unsigned int flags;
341 void *vaddr;
342
343 /* trying to avoid low memory issues */
344 flags = current->flags & PF_MEMALLOC;
345 current->flags |= PF_MEMALLOC;
346 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
347 current->flags &= (~PF_MEMALLOC | flags);
348 return vaddr;
ba395927
KA
349}
350
351static inline void free_pgtable_page(void *vaddr)
352{
353 free_page((unsigned long)vaddr);
354}
355
356static inline void *alloc_domain_mem(void)
357{
eb3fa7cb 358 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
359}
360
38717946 361static void free_domain_mem(void *vaddr)
ba395927
KA
362{
363 kmem_cache_free(iommu_domain_cache, vaddr);
364}
365
366static inline void * alloc_devinfo_mem(void)
367{
eb3fa7cb 368 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
369}
370
371static inline void free_devinfo_mem(void *vaddr)
372{
373 kmem_cache_free(iommu_devinfo_cache, vaddr);
374}
375
376struct iova *alloc_iova_mem(void)
377{
eb3fa7cb 378 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
379}
380
381void free_iova_mem(struct iova *iova)
382{
383 kmem_cache_free(iommu_iova_cache, iova);
384}
385
1b573683
WH
386
387static inline int width_to_agaw(int width);
388
389/* calculate agaw for each iommu.
390 * "SAGAW" may be different across iommus, use a default agaw, and
391 * get a supported less agaw for iommus that don't support the default agaw.
392 */
393int iommu_calculate_agaw(struct intel_iommu *iommu)
394{
395 unsigned long sagaw;
396 int agaw = -1;
397
398 sagaw = cap_sagaw(iommu->cap);
399 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
400 agaw >= 0; agaw--) {
401 if (test_bit(agaw, &sagaw))
402 break;
403 }
404
405 return agaw;
406}
407
8c11e798
WH
408/* in native case, each domain is related to only one iommu */
409static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
410{
411 int iommu_id;
412
1ce28feb
WH
413 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
414
8c11e798
WH
415 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
416 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
417 return NULL;
418
419 return g_iommus[iommu_id];
420}
421
8e604097
WH
422/* "Coherency" capability may be different across iommus */
423static void domain_update_iommu_coherency(struct dmar_domain *domain)
424{
425 int i;
426
427 domain->iommu_coherency = 1;
428
429 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
430 for (; i < g_num_of_iommus; ) {
431 if (!ecap_coherent(g_iommus[i]->ecap)) {
432 domain->iommu_coherency = 0;
433 break;
434 }
435 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
436 }
437}
438
c7151a8d
WH
439static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
440{
441 struct dmar_drhd_unit *drhd = NULL;
442 int i;
443
444 for_each_drhd_unit(drhd) {
445 if (drhd->ignored)
446 continue;
447
448 for (i = 0; i < drhd->devices_cnt; i++)
288e4877
DH
449 if (drhd->devices[i] &&
450 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
451 drhd->devices[i]->devfn == devfn)
452 return drhd->iommu;
453
454 if (drhd->include_all)
455 return drhd->iommu;
456 }
457
458 return NULL;
459}
460
5331fe6f
WH
461static void domain_flush_cache(struct dmar_domain *domain,
462 void *addr, int size)
463{
464 if (!domain->iommu_coherency)
465 clflush_cache_range(addr, size);
466}
467
ba395927
KA
468/* Gets context entry for a given bus and devfn */
469static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
470 u8 bus, u8 devfn)
471{
472 struct root_entry *root;
473 struct context_entry *context;
474 unsigned long phy_addr;
475 unsigned long flags;
476
477 spin_lock_irqsave(&iommu->lock, flags);
478 root = &iommu->root_entry[bus];
479 context = get_context_addr_from_root(root);
480 if (!context) {
481 context = (struct context_entry *)alloc_pgtable_page();
482 if (!context) {
483 spin_unlock_irqrestore(&iommu->lock, flags);
484 return NULL;
485 }
5b6985ce 486 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
487 phy_addr = virt_to_phys((void *)context);
488 set_root_value(root, phy_addr);
489 set_root_present(root);
490 __iommu_flush_cache(iommu, root, sizeof(*root));
491 }
492 spin_unlock_irqrestore(&iommu->lock, flags);
493 return &context[devfn];
494}
495
496static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
497{
498 struct root_entry *root;
499 struct context_entry *context;
500 int ret;
501 unsigned long flags;
502
503 spin_lock_irqsave(&iommu->lock, flags);
504 root = &iommu->root_entry[bus];
505 context = get_context_addr_from_root(root);
506 if (!context) {
507 ret = 0;
508 goto out;
509 }
c07e7d21 510 ret = context_present(&context[devfn]);
ba395927
KA
511out:
512 spin_unlock_irqrestore(&iommu->lock, flags);
513 return ret;
514}
515
516static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
517{
518 struct root_entry *root;
519 struct context_entry *context;
520 unsigned long flags;
521
522 spin_lock_irqsave(&iommu->lock, flags);
523 root = &iommu->root_entry[bus];
524 context = get_context_addr_from_root(root);
525 if (context) {
c07e7d21 526 context_clear_entry(&context[devfn]);
ba395927
KA
527 __iommu_flush_cache(iommu, &context[devfn], \
528 sizeof(*context));
529 }
530 spin_unlock_irqrestore(&iommu->lock, flags);
531}
532
533static void free_context_table(struct intel_iommu *iommu)
534{
535 struct root_entry *root;
536 int i;
537 unsigned long flags;
538 struct context_entry *context;
539
540 spin_lock_irqsave(&iommu->lock, flags);
541 if (!iommu->root_entry) {
542 goto out;
543 }
544 for (i = 0; i < ROOT_ENTRY_NR; i++) {
545 root = &iommu->root_entry[i];
546 context = get_context_addr_from_root(root);
547 if (context)
548 free_pgtable_page(context);
549 }
550 free_pgtable_page(iommu->root_entry);
551 iommu->root_entry = NULL;
552out:
553 spin_unlock_irqrestore(&iommu->lock, flags);
554}
555
556/* page table handling */
557#define LEVEL_STRIDE (9)
558#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
559
560static inline int agaw_to_level(int agaw)
561{
562 return agaw + 2;
563}
564
565static inline int agaw_to_width(int agaw)
566{
567 return 30 + agaw * LEVEL_STRIDE;
568
569}
570
571static inline int width_to_agaw(int width)
572{
573 return (width - 30) / LEVEL_STRIDE;
574}
575
576static inline unsigned int level_to_offset_bits(int level)
577{
578 return (12 + (level - 1) * LEVEL_STRIDE);
579}
580
581static inline int address_level_offset(u64 addr, int level)
582{
583 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
584}
585
586static inline u64 level_mask(int level)
587{
588 return ((u64)-1 << level_to_offset_bits(level));
589}
590
591static inline u64 level_size(int level)
592{
593 return ((u64)1 << level_to_offset_bits(level));
594}
595
596static inline u64 align_to_level(u64 addr, int level)
597{
598 return ((addr + level_size(level) - 1) & level_mask(level));
599}
600
601static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
602{
603 int addr_width = agaw_to_width(domain->agaw);
604 struct dma_pte *parent, *pte = NULL;
605 int level = agaw_to_level(domain->agaw);
606 int offset;
607 unsigned long flags;
608
609 BUG_ON(!domain->pgd);
610
611 addr &= (((u64)1) << addr_width) - 1;
612 parent = domain->pgd;
613
614 spin_lock_irqsave(&domain->mapping_lock, flags);
615 while (level > 0) {
616 void *tmp_page;
617
618 offset = address_level_offset(addr, level);
619 pte = &parent[offset];
620 if (level == 1)
621 break;
622
19c239ce 623 if (!dma_pte_present(pte)) {
ba395927
KA
624 tmp_page = alloc_pgtable_page();
625
626 if (!tmp_page) {
627 spin_unlock_irqrestore(&domain->mapping_lock,
628 flags);
629 return NULL;
630 }
5331fe6f 631 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
19c239ce 632 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
ba395927
KA
633 /*
634 * high level table always sets r/w, last level page
635 * table control read/write
636 */
19c239ce
MM
637 dma_set_pte_readable(pte);
638 dma_set_pte_writable(pte);
5331fe6f 639 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 640 }
19c239ce 641 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
642 level--;
643 }
644
645 spin_unlock_irqrestore(&domain->mapping_lock, flags);
646 return pte;
647}
648
649/* return address's pte at specific level */
650static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
651 int level)
652{
653 struct dma_pte *parent, *pte = NULL;
654 int total = agaw_to_level(domain->agaw);
655 int offset;
656
657 parent = domain->pgd;
658 while (level <= total) {
659 offset = address_level_offset(addr, total);
660 pte = &parent[offset];
661 if (level == total)
662 return pte;
663
19c239ce 664 if (!dma_pte_present(pte))
ba395927 665 break;
19c239ce 666 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
667 total--;
668 }
669 return NULL;
670}
671
672/* clear one page's page table */
673static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
674{
675 struct dma_pte *pte = NULL;
676
677 /* get last level pte */
678 pte = dma_addr_level_pte(domain, addr, 1);
679
680 if (pte) {
19c239ce 681 dma_clear_pte(pte);
5331fe6f 682 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
683 }
684}
685
686/* clear last level pte, a tlb flush should be followed */
687static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
688{
689 int addr_width = agaw_to_width(domain->agaw);
690
691 start &= (((u64)1) << addr_width) - 1;
692 end &= (((u64)1) << addr_width) - 1;
693 /* in case it's partial page */
5b6985ce
FY
694 start = PAGE_ALIGN(start);
695 end &= PAGE_MASK;
ba395927
KA
696
697 /* we don't need lock here, nobody else touches the iova range */
698 while (start < end) {
699 dma_pte_clear_one(domain, start);
5b6985ce 700 start += VTD_PAGE_SIZE;
ba395927
KA
701 }
702}
703
704/* free page table pages. last level pte should already be cleared */
705static void dma_pte_free_pagetable(struct dmar_domain *domain,
706 u64 start, u64 end)
707{
708 int addr_width = agaw_to_width(domain->agaw);
709 struct dma_pte *pte;
710 int total = agaw_to_level(domain->agaw);
711 int level;
712 u64 tmp;
713
714 start &= (((u64)1) << addr_width) - 1;
715 end &= (((u64)1) << addr_width) - 1;
716
717 /* we don't need lock here, nobody else touches the iova range */
718 level = 2;
719 while (level <= total) {
720 tmp = align_to_level(start, level);
721 if (tmp >= end || (tmp + level_size(level) > end))
722 return;
723
724 while (tmp < end) {
725 pte = dma_addr_level_pte(domain, tmp, level);
726 if (pte) {
727 free_pgtable_page(
19c239ce
MM
728 phys_to_virt(dma_pte_addr(pte)));
729 dma_clear_pte(pte);
5331fe6f 730 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
731 }
732 tmp += level_size(level);
733 }
734 level++;
735 }
736 /* free pgd */
737 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
738 free_pgtable_page(domain->pgd);
739 domain->pgd = NULL;
740 }
741}
742
743/* iommu handling */
744static int iommu_alloc_root_entry(struct intel_iommu *iommu)
745{
746 struct root_entry *root;
747 unsigned long flags;
748
749 root = (struct root_entry *)alloc_pgtable_page();
750 if (!root)
751 return -ENOMEM;
752
5b6985ce 753 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
754
755 spin_lock_irqsave(&iommu->lock, flags);
756 iommu->root_entry = root;
757 spin_unlock_irqrestore(&iommu->lock, flags);
758
759 return 0;
760}
761
ba395927
KA
762static void iommu_set_root_entry(struct intel_iommu *iommu)
763{
764 void *addr;
765 u32 cmd, sts;
766 unsigned long flag;
767
768 addr = iommu->root_entry;
769
770 spin_lock_irqsave(&iommu->register_lock, flag);
771 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
772
773 cmd = iommu->gcmd | DMA_GCMD_SRTP;
774 writel(cmd, iommu->reg + DMAR_GCMD_REG);
775
776 /* Make sure hardware complete it */
777 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
778 readl, (sts & DMA_GSTS_RTPS), sts);
779
780 spin_unlock_irqrestore(&iommu->register_lock, flag);
781}
782
783static void iommu_flush_write_buffer(struct intel_iommu *iommu)
784{
785 u32 val;
786 unsigned long flag;
787
788 if (!cap_rwbf(iommu->cap))
789 return;
790 val = iommu->gcmd | DMA_GCMD_WBF;
791
792 spin_lock_irqsave(&iommu->register_lock, flag);
793 writel(val, iommu->reg + DMAR_GCMD_REG);
794
795 /* Make sure hardware complete it */
796 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
797 readl, (!(val & DMA_GSTS_WBFS)), val);
798
799 spin_unlock_irqrestore(&iommu->register_lock, flag);
800}
801
802/* return value determine if we need a write buffer flush */
803static int __iommu_flush_context(struct intel_iommu *iommu,
804 u16 did, u16 source_id, u8 function_mask, u64 type,
805 int non_present_entry_flush)
806{
807 u64 val = 0;
808 unsigned long flag;
809
810 /*
811 * In the non-present entry flush case, if hardware doesn't cache
812 * non-present entry we do nothing and if hardware cache non-present
813 * entry, we flush entries of domain 0 (the domain id is used to cache
814 * any non-present entries)
815 */
816 if (non_present_entry_flush) {
817 if (!cap_caching_mode(iommu->cap))
818 return 1;
819 else
820 did = 0;
821 }
822
823 switch (type) {
824 case DMA_CCMD_GLOBAL_INVL:
825 val = DMA_CCMD_GLOBAL_INVL;
826 break;
827 case DMA_CCMD_DOMAIN_INVL:
828 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
829 break;
830 case DMA_CCMD_DEVICE_INVL:
831 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
832 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
833 break;
834 default:
835 BUG();
836 }
837 val |= DMA_CCMD_ICC;
838
839 spin_lock_irqsave(&iommu->register_lock, flag);
840 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
841
842 /* Make sure hardware complete it */
843 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
844 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
845
846 spin_unlock_irqrestore(&iommu->register_lock, flag);
847
4d235ba6 848 /* flush context entry will implicitly flush write buffer */
ba395927
KA
849 return 0;
850}
851
ba395927
KA
852/* return value determine if we need a write buffer flush */
853static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
854 u64 addr, unsigned int size_order, u64 type,
855 int non_present_entry_flush)
856{
857 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
858 u64 val = 0, val_iva = 0;
859 unsigned long flag;
860
861 /*
862 * In the non-present entry flush case, if hardware doesn't cache
863 * non-present entry we do nothing and if hardware cache non-present
864 * entry, we flush entries of domain 0 (the domain id is used to cache
865 * any non-present entries)
866 */
867 if (non_present_entry_flush) {
868 if (!cap_caching_mode(iommu->cap))
869 return 1;
870 else
871 did = 0;
872 }
873
874 switch (type) {
875 case DMA_TLB_GLOBAL_FLUSH:
876 /* global flush doesn't need set IVA_REG */
877 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
878 break;
879 case DMA_TLB_DSI_FLUSH:
880 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
881 break;
882 case DMA_TLB_PSI_FLUSH:
883 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
884 /* Note: always flush non-leaf currently */
885 val_iva = size_order | addr;
886 break;
887 default:
888 BUG();
889 }
890 /* Note: set drain read/write */
891#if 0
892 /*
893 * This is probably to be super secure.. Looks like we can
894 * ignore it without any impact.
895 */
896 if (cap_read_drain(iommu->cap))
897 val |= DMA_TLB_READ_DRAIN;
898#endif
899 if (cap_write_drain(iommu->cap))
900 val |= DMA_TLB_WRITE_DRAIN;
901
902 spin_lock_irqsave(&iommu->register_lock, flag);
903 /* Note: Only uses first TLB reg currently */
904 if (val_iva)
905 dmar_writeq(iommu->reg + tlb_offset, val_iva);
906 dmar_writeq(iommu->reg + tlb_offset + 8, val);
907
908 /* Make sure hardware complete it */
909 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
910 dmar_readq, (!(val & DMA_TLB_IVT)), val);
911
912 spin_unlock_irqrestore(&iommu->register_lock, flag);
913
914 /* check IOTLB invalidation granularity */
915 if (DMA_TLB_IAIG(val) == 0)
916 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
917 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
918 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
919 (unsigned long long)DMA_TLB_IIRG(type),
920 (unsigned long long)DMA_TLB_IAIG(val));
4d235ba6 921 /* flush iotlb entry will implicitly flush write buffer */
ba395927
KA
922 return 0;
923}
924
ba395927
KA
925static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
926 u64 addr, unsigned int pages, int non_present_entry_flush)
927{
f76aec76 928 unsigned int mask;
ba395927 929
5b6985ce 930 BUG_ON(addr & (~VTD_PAGE_MASK));
ba395927
KA
931 BUG_ON(pages == 0);
932
933 /* Fallback to domain selective flush if no PSI support */
934 if (!cap_pgsel_inv(iommu->cap))
a77b67d4
YS
935 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
936 DMA_TLB_DSI_FLUSH,
937 non_present_entry_flush);
ba395927
KA
938
939 /*
940 * PSI requires page size to be 2 ^ x, and the base address is naturally
941 * aligned to the size
942 */
f76aec76 943 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 944 /* Fallback to domain selective flush if size is too big */
f76aec76 945 if (mask > cap_max_amask_val(iommu->cap))
a77b67d4
YS
946 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
947 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
ba395927 948
a77b67d4
YS
949 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
950 DMA_TLB_PSI_FLUSH,
951 non_present_entry_flush);
ba395927
KA
952}
953
f8bab735 954static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
955{
956 u32 pmen;
957 unsigned long flags;
958
959 spin_lock_irqsave(&iommu->register_lock, flags);
960 pmen = readl(iommu->reg + DMAR_PMEN_REG);
961 pmen &= ~DMA_PMEN_EPM;
962 writel(pmen, iommu->reg + DMAR_PMEN_REG);
963
964 /* wait for the protected region status bit to clear */
965 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
966 readl, !(pmen & DMA_PMEN_PRS), pmen);
967
968 spin_unlock_irqrestore(&iommu->register_lock, flags);
969}
970
ba395927
KA
971static int iommu_enable_translation(struct intel_iommu *iommu)
972{
973 u32 sts;
974 unsigned long flags;
975
976 spin_lock_irqsave(&iommu->register_lock, flags);
977 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
978
979 /* Make sure hardware complete it */
980 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
981 readl, (sts & DMA_GSTS_TES), sts);
982
983 iommu->gcmd |= DMA_GCMD_TE;
984 spin_unlock_irqrestore(&iommu->register_lock, flags);
985 return 0;
986}
987
988static int iommu_disable_translation(struct intel_iommu *iommu)
989{
990 u32 sts;
991 unsigned long flag;
992
993 spin_lock_irqsave(&iommu->register_lock, flag);
994 iommu->gcmd &= ~DMA_GCMD_TE;
995 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
996
997 /* Make sure hardware complete it */
998 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
999 readl, (!(sts & DMA_GSTS_TES)), sts);
1000
1001 spin_unlock_irqrestore(&iommu->register_lock, flag);
1002 return 0;
1003}
1004
3460a6d9
KA
1005/* iommu interrupt handling. Most stuff are MSI-like. */
1006
d94afc6c 1007static const char *fault_reason_strings[] =
3460a6d9
KA
1008{
1009 "Software",
1010 "Present bit in root entry is clear",
1011 "Present bit in context entry is clear",
1012 "Invalid context entry",
1013 "Access beyond MGAW",
1014 "PTE Write access is not set",
1015 "PTE Read access is not set",
1016 "Next page table ptr is invalid",
1017 "Root table address invalid",
1018 "Context table ptr is invalid",
1019 "non-zero reserved fields in RTP",
1020 "non-zero reserved fields in CTP",
1021 "non-zero reserved fields in PTE",
3460a6d9 1022};
f8bab735 1023#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 1024
d94afc6c 1025const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 1026{
d94afc6c 1027 if (fault_reason > MAX_FAULT_REASON_IDX)
1028 return "Unknown";
3460a6d9
KA
1029 else
1030 return fault_reason_strings[fault_reason];
1031}
1032
1033void dmar_msi_unmask(unsigned int irq)
1034{
1035 struct intel_iommu *iommu = get_irq_data(irq);
1036 unsigned long flag;
1037
1038 /* unmask it */
1039 spin_lock_irqsave(&iommu->register_lock, flag);
1040 writel(0, iommu->reg + DMAR_FECTL_REG);
1041 /* Read a reg to force flush the post write */
1042 readl(iommu->reg + DMAR_FECTL_REG);
1043 spin_unlock_irqrestore(&iommu->register_lock, flag);
1044}
1045
1046void dmar_msi_mask(unsigned int irq)
1047{
1048 unsigned long flag;
1049 struct intel_iommu *iommu = get_irq_data(irq);
1050
1051 /* mask it */
1052 spin_lock_irqsave(&iommu->register_lock, flag);
1053 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1054 /* Read a reg to force flush the post write */
1055 readl(iommu->reg + DMAR_FECTL_REG);
1056 spin_unlock_irqrestore(&iommu->register_lock, flag);
1057}
1058
1059void dmar_msi_write(int irq, struct msi_msg *msg)
1060{
1061 struct intel_iommu *iommu = get_irq_data(irq);
1062 unsigned long flag;
1063
1064 spin_lock_irqsave(&iommu->register_lock, flag);
1065 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1066 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1067 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1068 spin_unlock_irqrestore(&iommu->register_lock, flag);
1069}
1070
1071void dmar_msi_read(int irq, struct msi_msg *msg)
1072{
1073 struct intel_iommu *iommu = get_irq_data(irq);
1074 unsigned long flag;
1075
1076 spin_lock_irqsave(&iommu->register_lock, flag);
1077 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1078 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1079 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1080 spin_unlock_irqrestore(&iommu->register_lock, flag);
1081}
1082
1083static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
5b6985ce 1084 u8 fault_reason, u16 source_id, unsigned long long addr)
3460a6d9 1085{
d94afc6c 1086 const char *reason;
3460a6d9
KA
1087
1088 reason = dmar_get_fault_reason(fault_reason);
1089
1090 printk(KERN_ERR
1091 "DMAR:[%s] Request device [%02x:%02x.%d] "
1092 "fault addr %llx \n"
1093 "DMAR:[fault reason %02d] %s\n",
1094 (type ? "DMA Read" : "DMA Write"),
1095 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1096 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1097 return 0;
1098}
1099
1100#define PRIMARY_FAULT_REG_LEN (16)
1101static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1102{
1103 struct intel_iommu *iommu = dev_id;
1104 int reg, fault_index;
1105 u32 fault_status;
1106 unsigned long flag;
1107
1108 spin_lock_irqsave(&iommu->register_lock, flag);
1109 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1110
1111 /* TBD: ignore advanced fault log currently */
1112 if (!(fault_status & DMA_FSTS_PPF))
1113 goto clear_overflow;
1114
1115 fault_index = dma_fsts_fault_record_index(fault_status);
1116 reg = cap_fault_reg_offset(iommu->cap);
1117 while (1) {
1118 u8 fault_reason;
1119 u16 source_id;
1120 u64 guest_addr;
1121 int type;
1122 u32 data;
1123
1124 /* highest 32 bits */
1125 data = readl(iommu->reg + reg +
1126 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1127 if (!(data & DMA_FRCD_F))
1128 break;
1129
1130 fault_reason = dma_frcd_fault_reason(data);
1131 type = dma_frcd_type(data);
1132
1133 data = readl(iommu->reg + reg +
1134 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1135 source_id = dma_frcd_source_id(data);
1136
1137 guest_addr = dmar_readq(iommu->reg + reg +
1138 fault_index * PRIMARY_FAULT_REG_LEN);
1139 guest_addr = dma_frcd_page_addr(guest_addr);
1140 /* clear the fault */
1141 writel(DMA_FRCD_F, iommu->reg + reg +
1142 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1143
1144 spin_unlock_irqrestore(&iommu->register_lock, flag);
1145
1146 iommu_page_fault_do_one(iommu, type, fault_reason,
1147 source_id, guest_addr);
1148
1149 fault_index++;
1150 if (fault_index > cap_num_fault_regs(iommu->cap))
1151 fault_index = 0;
1152 spin_lock_irqsave(&iommu->register_lock, flag);
1153 }
1154clear_overflow:
1155 /* clear primary fault overflow */
1156 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1157 if (fault_status & DMA_FSTS_PFO)
1158 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1159
1160 spin_unlock_irqrestore(&iommu->register_lock, flag);
1161 return IRQ_HANDLED;
1162}
1163
1164int dmar_set_interrupt(struct intel_iommu *iommu)
1165{
1166 int irq, ret;
1167
1168 irq = create_irq();
1169 if (!irq) {
1170 printk(KERN_ERR "IOMMU: no free vectors\n");
1171 return -EINVAL;
1172 }
1173
1174 set_irq_data(irq, iommu);
1175 iommu->irq = irq;
1176
1177 ret = arch_setup_dmar_msi(irq);
1178 if (ret) {
1179 set_irq_data(irq, NULL);
1180 iommu->irq = 0;
1181 destroy_irq(irq);
1182 return 0;
1183 }
1184
1185 /* Force fault register is cleared */
1186 iommu_page_fault(irq, iommu);
1187
1188 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1189 if (ret)
1190 printk(KERN_ERR "IOMMU: can't request irq\n");
1191 return ret;
1192}
1193
ba395927
KA
1194static int iommu_init_domains(struct intel_iommu *iommu)
1195{
1196 unsigned long ndomains;
1197 unsigned long nlongs;
1198
1199 ndomains = cap_ndoms(iommu->cap);
1200 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1201 nlongs = BITS_TO_LONGS(ndomains);
1202
1203 /* TBD: there might be 64K domains,
1204 * consider other allocation for future chip
1205 */
1206 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1207 if (!iommu->domain_ids) {
1208 printk(KERN_ERR "Allocating domain id array failed\n");
1209 return -ENOMEM;
1210 }
1211 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1212 GFP_KERNEL);
1213 if (!iommu->domains) {
1214 printk(KERN_ERR "Allocating domain array failed\n");
1215 kfree(iommu->domain_ids);
1216 return -ENOMEM;
1217 }
1218
e61d98d8
SS
1219 spin_lock_init(&iommu->lock);
1220
ba395927
KA
1221 /*
1222 * if Caching mode is set, then invalid translations are tagged
1223 * with domainid 0. Hence we need to pre-allocate it.
1224 */
1225 if (cap_caching_mode(iommu->cap))
1226 set_bit(0, iommu->domain_ids);
1227 return 0;
1228}
ba395927 1229
ba395927
KA
1230
1231static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1232static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1233
1234void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1235{
1236 struct dmar_domain *domain;
1237 int i;
c7151a8d 1238 unsigned long flags;
ba395927 1239
ba395927
KA
1240 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1241 for (; i < cap_ndoms(iommu->cap); ) {
1242 domain = iommu->domains[i];
1243 clear_bit(i, iommu->domain_ids);
c7151a8d
WH
1244
1245 spin_lock_irqsave(&domain->iommu_lock, flags);
5e98c4b1
WH
1246 if (--domain->iommu_count == 0) {
1247 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1248 vm_domain_exit(domain);
1249 else
1250 domain_exit(domain);
1251 }
c7151a8d
WH
1252 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1253
ba395927
KA
1254 i = find_next_bit(iommu->domain_ids,
1255 cap_ndoms(iommu->cap), i+1);
1256 }
1257
1258 if (iommu->gcmd & DMA_GCMD_TE)
1259 iommu_disable_translation(iommu);
1260
1261 if (iommu->irq) {
1262 set_irq_data(iommu->irq, NULL);
1263 /* This will mask the irq */
1264 free_irq(iommu->irq, iommu);
1265 destroy_irq(iommu->irq);
1266 }
1267
1268 kfree(iommu->domains);
1269 kfree(iommu->domain_ids);
1270
d9630fe9
WH
1271 g_iommus[iommu->seq_id] = NULL;
1272
1273 /* if all iommus are freed, free g_iommus */
1274 for (i = 0; i < g_num_of_iommus; i++) {
1275 if (g_iommus[i])
1276 break;
1277 }
1278
1279 if (i == g_num_of_iommus)
1280 kfree(g_iommus);
1281
ba395927
KA
1282 /* free context mapping */
1283 free_context_table(iommu);
ba395927
KA
1284}
1285
1286static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1287{
1288 unsigned long num;
1289 unsigned long ndomains;
1290 struct dmar_domain *domain;
1291 unsigned long flags;
1292
1293 domain = alloc_domain_mem();
1294 if (!domain)
1295 return NULL;
1296
1297 ndomains = cap_ndoms(iommu->cap);
1298
1299 spin_lock_irqsave(&iommu->lock, flags);
1300 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1301 if (num >= ndomains) {
1302 spin_unlock_irqrestore(&iommu->lock, flags);
1303 free_domain_mem(domain);
1304 printk(KERN_ERR "IOMMU: no free domain ids\n");
1305 return NULL;
1306 }
1307
1308 set_bit(num, iommu->domain_ids);
1309 domain->id = num;
8c11e798
WH
1310 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1311 set_bit(iommu->seq_id, &domain->iommu_bmp);
d71a2f33 1312 domain->flags = 0;
ba395927
KA
1313 iommu->domains[num] = domain;
1314 spin_unlock_irqrestore(&iommu->lock, flags);
1315
1316 return domain;
1317}
1318
1319static void iommu_free_domain(struct dmar_domain *domain)
1320{
1321 unsigned long flags;
8c11e798
WH
1322 struct intel_iommu *iommu;
1323
1324 iommu = domain_get_iommu(domain);
ba395927 1325
8c11e798
WH
1326 spin_lock_irqsave(&iommu->lock, flags);
1327 clear_bit(domain->id, iommu->domain_ids);
1328 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1329}
1330
1331static struct iova_domain reserved_iova_list;
8a443df4
MG
1332static struct lock_class_key reserved_alloc_key;
1333static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1334
1335static void dmar_init_reserved_ranges(void)
1336{
1337 struct pci_dev *pdev = NULL;
1338 struct iova *iova;
1339 int i;
1340 u64 addr, size;
1341
f661197e 1342 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1343
8a443df4
MG
1344 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1345 &reserved_alloc_key);
1346 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1347 &reserved_rbtree_key);
1348
ba395927
KA
1349 /* IOAPIC ranges shouldn't be accessed by DMA */
1350 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1351 IOVA_PFN(IOAPIC_RANGE_END));
1352 if (!iova)
1353 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1354
1355 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1356 for_each_pci_dev(pdev) {
1357 struct resource *r;
1358
1359 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1360 r = &pdev->resource[i];
1361 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1362 continue;
1363 addr = r->start;
5b6985ce 1364 addr &= PAGE_MASK;
ba395927 1365 size = r->end - addr;
5b6985ce 1366 size = PAGE_ALIGN(size);
ba395927
KA
1367 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1368 IOVA_PFN(size + addr) - 1);
1369 if (!iova)
1370 printk(KERN_ERR "Reserve iova failed\n");
1371 }
1372 }
1373
1374}
1375
1376static void domain_reserve_special_ranges(struct dmar_domain *domain)
1377{
1378 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1379}
1380
1381static inline int guestwidth_to_adjustwidth(int gaw)
1382{
1383 int agaw;
1384 int r = (gaw - 12) % 9;
1385
1386 if (r == 0)
1387 agaw = gaw;
1388 else
1389 agaw = gaw + 9 - r;
1390 if (agaw > 64)
1391 agaw = 64;
1392 return agaw;
1393}
1394
1395static int domain_init(struct dmar_domain *domain, int guest_width)
1396{
1397 struct intel_iommu *iommu;
1398 int adjust_width, agaw;
1399 unsigned long sagaw;
1400
f661197e 1401 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927 1402 spin_lock_init(&domain->mapping_lock);
c7151a8d 1403 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1404
1405 domain_reserve_special_ranges(domain);
1406
1407 /* calculate AGAW */
8c11e798 1408 iommu = domain_get_iommu(domain);
ba395927
KA
1409 if (guest_width > cap_mgaw(iommu->cap))
1410 guest_width = cap_mgaw(iommu->cap);
1411 domain->gaw = guest_width;
1412 adjust_width = guestwidth_to_adjustwidth(guest_width);
1413 agaw = width_to_agaw(adjust_width);
1414 sagaw = cap_sagaw(iommu->cap);
1415 if (!test_bit(agaw, &sagaw)) {
1416 /* hardware doesn't support it, choose a bigger one */
1417 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1418 agaw = find_next_bit(&sagaw, 5, agaw);
1419 if (agaw >= 5)
1420 return -ENODEV;
1421 }
1422 domain->agaw = agaw;
1423 INIT_LIST_HEAD(&domain->devices);
1424
8e604097
WH
1425 if (ecap_coherent(iommu->ecap))
1426 domain->iommu_coherency = 1;
1427 else
1428 domain->iommu_coherency = 0;
1429
c7151a8d
WH
1430 domain->iommu_count = 1;
1431
ba395927
KA
1432 /* always allocate the top pgd */
1433 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1434 if (!domain->pgd)
1435 return -ENOMEM;
5b6985ce 1436 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1437 return 0;
1438}
1439
1440static void domain_exit(struct dmar_domain *domain)
1441{
1442 u64 end;
1443
1444 /* Domain 0 is reserved, so dont process it */
1445 if (!domain)
1446 return;
1447
1448 domain_remove_dev_info(domain);
1449 /* destroy iovas */
1450 put_iova_domain(&domain->iovad);
1451 end = DOMAIN_MAX_ADDR(domain->gaw);
5b6985ce 1452 end = end & (~PAGE_MASK);
ba395927
KA
1453
1454 /* clear ptes */
1455 dma_pte_clear_range(domain, 0, end);
1456
1457 /* free page tables */
1458 dma_pte_free_pagetable(domain, 0, end);
1459
1460 iommu_free_domain(domain);
1461 free_domain_mem(domain);
1462}
1463
1464static int domain_context_mapping_one(struct dmar_domain *domain,
1465 u8 bus, u8 devfn)
1466{
1467 struct context_entry *context;
ba395927 1468 unsigned long flags;
5331fe6f 1469 struct intel_iommu *iommu;
ea6606b0
WH
1470 struct dma_pte *pgd;
1471 unsigned long num;
1472 unsigned long ndomains;
1473 int id;
1474 int agaw;
ba395927
KA
1475
1476 pr_debug("Set context mapping for %02x:%02x.%d\n",
1477 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1478 BUG_ON(!domain->pgd);
5331fe6f
WH
1479
1480 iommu = device_to_iommu(bus, devfn);
1481 if (!iommu)
1482 return -ENODEV;
1483
ba395927
KA
1484 context = device_to_context_entry(iommu, bus, devfn);
1485 if (!context)
1486 return -ENOMEM;
1487 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1488 if (context_present(context)) {
ba395927
KA
1489 spin_unlock_irqrestore(&iommu->lock, flags);
1490 return 0;
1491 }
1492
ea6606b0
WH
1493 id = domain->id;
1494 pgd = domain->pgd;
1495
1496 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1497 int found = 0;
1498
1499 /* find an available domain id for this device in iommu */
1500 ndomains = cap_ndoms(iommu->cap);
1501 num = find_first_bit(iommu->domain_ids, ndomains);
1502 for (; num < ndomains; ) {
1503 if (iommu->domains[num] == domain) {
1504 id = num;
1505 found = 1;
1506 break;
1507 }
1508 num = find_next_bit(iommu->domain_ids,
1509 cap_ndoms(iommu->cap), num+1);
1510 }
1511
1512 if (found == 0) {
1513 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1514 if (num >= ndomains) {
1515 spin_unlock_irqrestore(&iommu->lock, flags);
1516 printk(KERN_ERR "IOMMU: no free domain ids\n");
1517 return -EFAULT;
1518 }
1519
1520 set_bit(num, iommu->domain_ids);
1521 iommu->domains[num] = domain;
1522 id = num;
1523 }
1524
1525 /* Skip top levels of page tables for
1526 * iommu which has less agaw than default.
1527 */
1528 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1529 pgd = phys_to_virt(dma_pte_addr(pgd));
1530 if (!dma_pte_present(pgd)) {
1531 spin_unlock_irqrestore(&iommu->lock, flags);
1532 return -ENOMEM;
1533 }
1534 }
1535 }
1536
1537 context_set_domain_id(context, id);
1538 context_set_address_width(context, iommu->agaw);
1539 context_set_address_root(context, virt_to_phys(pgd));
c07e7d21
MM
1540 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1541 context_set_fault_enable(context);
1542 context_set_present(context);
5331fe6f 1543 domain_flush_cache(domain, context, sizeof(*context));
ba395927
KA
1544
1545 /* it's a non-present to present mapping */
a77b67d4
YS
1546 if (iommu->flush.flush_context(iommu, domain->id,
1547 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1548 DMA_CCMD_DEVICE_INVL, 1))
ba395927
KA
1549 iommu_flush_write_buffer(iommu);
1550 else
a77b67d4
YS
1551 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1552
ba395927 1553 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1554
1555 spin_lock_irqsave(&domain->iommu_lock, flags);
1556 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1557 domain->iommu_count++;
1558 domain_update_iommu_coherency(domain);
1559 }
1560 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1561 return 0;
1562}
1563
1564static int
1565domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1566{
1567 int ret;
1568 struct pci_dev *tmp, *parent;
1569
1570 ret = domain_context_mapping_one(domain, pdev->bus->number,
1571 pdev->devfn);
1572 if (ret)
1573 return ret;
1574
1575 /* dependent device mapping */
1576 tmp = pci_find_upstream_pcie_bridge(pdev);
1577 if (!tmp)
1578 return 0;
1579 /* Secondary interface's bus number and devfn 0 */
1580 parent = pdev->bus->self;
1581 while (parent != tmp) {
1582 ret = domain_context_mapping_one(domain, parent->bus->number,
1583 parent->devfn);
1584 if (ret)
1585 return ret;
1586 parent = parent->bus->self;
1587 }
1588 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1589 return domain_context_mapping_one(domain,
1590 tmp->subordinate->number, 0);
1591 else /* this is a legacy PCI bridge */
1592 return domain_context_mapping_one(domain,
1593 tmp->bus->number, tmp->devfn);
1594}
1595
5331fe6f 1596static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1597{
1598 int ret;
1599 struct pci_dev *tmp, *parent;
5331fe6f
WH
1600 struct intel_iommu *iommu;
1601
1602 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1603 if (!iommu)
1604 return -ENODEV;
ba395927 1605
8c11e798 1606 ret = device_context_mapped(iommu,
ba395927
KA
1607 pdev->bus->number, pdev->devfn);
1608 if (!ret)
1609 return ret;
1610 /* dependent device mapping */
1611 tmp = pci_find_upstream_pcie_bridge(pdev);
1612 if (!tmp)
1613 return ret;
1614 /* Secondary interface's bus number and devfn 0 */
1615 parent = pdev->bus->self;
1616 while (parent != tmp) {
8c11e798 1617 ret = device_context_mapped(iommu, parent->bus->number,
ba395927
KA
1618 parent->devfn);
1619 if (!ret)
1620 return ret;
1621 parent = parent->bus->self;
1622 }
1623 if (tmp->is_pcie)
8c11e798 1624 return device_context_mapped(iommu,
ba395927
KA
1625 tmp->subordinate->number, 0);
1626 else
8c11e798 1627 return device_context_mapped(iommu,
ba395927
KA
1628 tmp->bus->number, tmp->devfn);
1629}
1630
1631static int
1632domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1633 u64 hpa, size_t size, int prot)
1634{
1635 u64 start_pfn, end_pfn;
1636 struct dma_pte *pte;
1637 int index;
5b6985ce
FY
1638 int addr_width = agaw_to_width(domain->agaw);
1639
1640 hpa &= (((u64)1) << addr_width) - 1;
ba395927
KA
1641
1642 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1643 return -EINVAL;
5b6985ce
FY
1644 iova &= PAGE_MASK;
1645 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1646 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
ba395927
KA
1647 index = 0;
1648 while (start_pfn < end_pfn) {
5b6985ce 1649 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
ba395927
KA
1650 if (!pte)
1651 return -ENOMEM;
1652 /* We don't need lock here, nobody else
1653 * touches the iova range
1654 */
19c239ce
MM
1655 BUG_ON(dma_pte_addr(pte));
1656 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1657 dma_set_pte_prot(pte, prot);
5331fe6f 1658 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927
KA
1659 start_pfn++;
1660 index++;
1661 }
1662 return 0;
1663}
1664
c7151a8d 1665static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1666{
c7151a8d
WH
1667 if (!iommu)
1668 return;
8c11e798
WH
1669
1670 clear_context_table(iommu, bus, devfn);
1671 iommu->flush.flush_context(iommu, 0, 0, 0,
a77b67d4 1672 DMA_CCMD_GLOBAL_INVL, 0);
8c11e798 1673 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
a77b67d4 1674 DMA_TLB_GLOBAL_FLUSH, 0);
ba395927
KA
1675}
1676
1677static void domain_remove_dev_info(struct dmar_domain *domain)
1678{
1679 struct device_domain_info *info;
1680 unsigned long flags;
c7151a8d 1681 struct intel_iommu *iommu;
ba395927
KA
1682
1683 spin_lock_irqsave(&device_domain_lock, flags);
1684 while (!list_empty(&domain->devices)) {
1685 info = list_entry(domain->devices.next,
1686 struct device_domain_info, link);
1687 list_del(&info->link);
1688 list_del(&info->global);
1689 if (info->dev)
358dd8ac 1690 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1691 spin_unlock_irqrestore(&device_domain_lock, flags);
1692
c7151a8d
WH
1693 iommu = device_to_iommu(info->bus, info->devfn);
1694 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1695 free_devinfo_mem(info);
1696
1697 spin_lock_irqsave(&device_domain_lock, flags);
1698 }
1699 spin_unlock_irqrestore(&device_domain_lock, flags);
1700}
1701
1702/*
1703 * find_domain
358dd8ac 1704 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1705 */
38717946 1706static struct dmar_domain *
ba395927
KA
1707find_domain(struct pci_dev *pdev)
1708{
1709 struct device_domain_info *info;
1710
1711 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1712 info = pdev->dev.archdata.iommu;
ba395927
KA
1713 if (info)
1714 return info->domain;
1715 return NULL;
1716}
1717
ba395927
KA
1718/* domain is initialized */
1719static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1720{
1721 struct dmar_domain *domain, *found = NULL;
1722 struct intel_iommu *iommu;
1723 struct dmar_drhd_unit *drhd;
1724 struct device_domain_info *info, *tmp;
1725 struct pci_dev *dev_tmp;
1726 unsigned long flags;
1727 int bus = 0, devfn = 0;
1728
1729 domain = find_domain(pdev);
1730 if (domain)
1731 return domain;
1732
1733 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1734 if (dev_tmp) {
1735 if (dev_tmp->is_pcie) {
1736 bus = dev_tmp->subordinate->number;
1737 devfn = 0;
1738 } else {
1739 bus = dev_tmp->bus->number;
1740 devfn = dev_tmp->devfn;
1741 }
1742 spin_lock_irqsave(&device_domain_lock, flags);
1743 list_for_each_entry(info, &device_domain_list, global) {
1744 if (info->bus == bus && info->devfn == devfn) {
1745 found = info->domain;
1746 break;
1747 }
1748 }
1749 spin_unlock_irqrestore(&device_domain_lock, flags);
1750 /* pcie-pci bridge already has a domain, uses it */
1751 if (found) {
1752 domain = found;
1753 goto found_domain;
1754 }
1755 }
1756
1757 /* Allocate new domain for the device */
1758 drhd = dmar_find_matched_drhd_unit(pdev);
1759 if (!drhd) {
1760 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1761 pci_name(pdev));
1762 return NULL;
1763 }
1764 iommu = drhd->iommu;
1765
1766 domain = iommu_alloc_domain(iommu);
1767 if (!domain)
1768 goto error;
1769
1770 if (domain_init(domain, gaw)) {
1771 domain_exit(domain);
1772 goto error;
1773 }
1774
1775 /* register pcie-to-pci device */
1776 if (dev_tmp) {
1777 info = alloc_devinfo_mem();
1778 if (!info) {
1779 domain_exit(domain);
1780 goto error;
1781 }
1782 info->bus = bus;
1783 info->devfn = devfn;
1784 info->dev = NULL;
1785 info->domain = domain;
1786 /* This domain is shared by devices under p2p bridge */
3b5410e7 1787 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1788
1789 /* pcie-to-pci bridge already has a domain, uses it */
1790 found = NULL;
1791 spin_lock_irqsave(&device_domain_lock, flags);
1792 list_for_each_entry(tmp, &device_domain_list, global) {
1793 if (tmp->bus == bus && tmp->devfn == devfn) {
1794 found = tmp->domain;
1795 break;
1796 }
1797 }
1798 if (found) {
1799 free_devinfo_mem(info);
1800 domain_exit(domain);
1801 domain = found;
1802 } else {
1803 list_add(&info->link, &domain->devices);
1804 list_add(&info->global, &device_domain_list);
1805 }
1806 spin_unlock_irqrestore(&device_domain_lock, flags);
1807 }
1808
1809found_domain:
1810 info = alloc_devinfo_mem();
1811 if (!info)
1812 goto error;
1813 info->bus = pdev->bus->number;
1814 info->devfn = pdev->devfn;
1815 info->dev = pdev;
1816 info->domain = domain;
1817 spin_lock_irqsave(&device_domain_lock, flags);
1818 /* somebody is fast */
1819 found = find_domain(pdev);
1820 if (found != NULL) {
1821 spin_unlock_irqrestore(&device_domain_lock, flags);
1822 if (found != domain) {
1823 domain_exit(domain);
1824 domain = found;
1825 }
1826 free_devinfo_mem(info);
1827 return domain;
1828 }
1829 list_add(&info->link, &domain->devices);
1830 list_add(&info->global, &device_domain_list);
358dd8ac 1831 pdev->dev.archdata.iommu = info;
ba395927
KA
1832 spin_unlock_irqrestore(&device_domain_lock, flags);
1833 return domain;
1834error:
1835 /* recheck it here, maybe others set it */
1836 return find_domain(pdev);
1837}
1838
5b6985ce
FY
1839static int iommu_prepare_identity_map(struct pci_dev *pdev,
1840 unsigned long long start,
1841 unsigned long long end)
ba395927
KA
1842{
1843 struct dmar_domain *domain;
1844 unsigned long size;
5b6985ce 1845 unsigned long long base;
ba395927
KA
1846 int ret;
1847
1848 printk(KERN_INFO
1849 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1850 pci_name(pdev), start, end);
1851 /* page table init */
1852 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1853 if (!domain)
1854 return -ENOMEM;
1855
1856 /* The address might not be aligned */
5b6985ce 1857 base = start & PAGE_MASK;
ba395927 1858 size = end - base;
5b6985ce 1859 size = PAGE_ALIGN(size);
ba395927
KA
1860 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1861 IOVA_PFN(base + size) - 1)) {
1862 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1863 ret = -ENOMEM;
1864 goto error;
1865 }
1866
1867 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1868 size, base, pci_name(pdev));
1869 /*
1870 * RMRR range might have overlap with physical memory range,
1871 * clear it first
1872 */
1873 dma_pte_clear_range(domain, base, base + size);
1874
1875 ret = domain_page_mapping(domain, base, base, size,
1876 DMA_PTE_READ|DMA_PTE_WRITE);
1877 if (ret)
1878 goto error;
1879
1880 /* context entry init */
1881 ret = domain_context_mapping(domain, pdev);
1882 if (!ret)
1883 return 0;
1884error:
1885 domain_exit(domain);
1886 return ret;
1887
1888}
1889
1890static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1891 struct pci_dev *pdev)
1892{
358dd8ac 1893 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1894 return 0;
1895 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1896 rmrr->end_address + 1);
1897}
1898
e820482c 1899#ifdef CONFIG_DMAR_GFX_WA
d52d53b8
YL
1900struct iommu_prepare_data {
1901 struct pci_dev *pdev;
1902 int ret;
1903};
1904
1905static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1906 unsigned long end_pfn, void *datax)
1907{
1908 struct iommu_prepare_data *data;
1909
1910 data = (struct iommu_prepare_data *)datax;
1911
1912 data->ret = iommu_prepare_identity_map(data->pdev,
1913 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1914 return data->ret;
1915
1916}
1917
1918static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1919{
1920 int nid;
1921 struct iommu_prepare_data data;
1922
1923 data.pdev = pdev;
1924 data.ret = 0;
1925
1926 for_each_online_node(nid) {
1927 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1928 if (data.ret)
1929 return data.ret;
1930 }
1931 return data.ret;
1932}
1933
e820482c
KA
1934static void __init iommu_prepare_gfx_mapping(void)
1935{
1936 struct pci_dev *pdev = NULL;
e820482c
KA
1937 int ret;
1938
1939 for_each_pci_dev(pdev) {
358dd8ac 1940 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1941 !IS_GFX_DEVICE(pdev))
1942 continue;
1943 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1944 pci_name(pdev));
d52d53b8
YL
1945 ret = iommu_prepare_with_active_regions(pdev);
1946 if (ret)
1947 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
e820482c
KA
1948 }
1949}
2abd7e16
MM
1950#else /* !CONFIG_DMAR_GFX_WA */
1951static inline void iommu_prepare_gfx_mapping(void)
1952{
1953 return;
1954}
e820482c
KA
1955#endif
1956
49a0429e
KA
1957#ifdef CONFIG_DMAR_FLOPPY_WA
1958static inline void iommu_prepare_isa(void)
1959{
1960 struct pci_dev *pdev;
1961 int ret;
1962
1963 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1964 if (!pdev)
1965 return;
1966
1967 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1968 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1969
1970 if (ret)
1971 printk("IOMMU: Failed to create 0-64M identity map, "
1972 "floppy might not work\n");
1973
1974}
1975#else
1976static inline void iommu_prepare_isa(void)
1977{
1978 return;
1979}
1980#endif /* !CONFIG_DMAR_FLPY_WA */
1981
519a0549 1982static int __init init_dmars(void)
ba395927
KA
1983{
1984 struct dmar_drhd_unit *drhd;
1985 struct dmar_rmrr_unit *rmrr;
1986 struct pci_dev *pdev;
1987 struct intel_iommu *iommu;
80b20dd8 1988 int i, ret, unit = 0;
ba395927
KA
1989
1990 /*
1991 * for each drhd
1992 * allocate root
1993 * initialize and program root entry to not present
1994 * endfor
1995 */
1996 for_each_drhd_unit(drhd) {
5e0d2a6f 1997 g_num_of_iommus++;
1998 /*
1999 * lock not needed as this is only incremented in the single
2000 * threaded kernel __init code path all other access are read
2001 * only
2002 */
2003 }
2004
d9630fe9
WH
2005 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2006 GFP_KERNEL);
2007 if (!g_iommus) {
2008 printk(KERN_ERR "Allocating global iommu array failed\n");
2009 ret = -ENOMEM;
2010 goto error;
2011 }
2012
80b20dd8 2013 deferred_flush = kzalloc(g_num_of_iommus *
2014 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2015 if (!deferred_flush) {
d9630fe9 2016 kfree(g_iommus);
5e0d2a6f 2017 ret = -ENOMEM;
2018 goto error;
2019 }
2020
5e0d2a6f 2021 for_each_drhd_unit(drhd) {
2022 if (drhd->ignored)
2023 continue;
1886e8a9
SS
2024
2025 iommu = drhd->iommu;
d9630fe9 2026 g_iommus[iommu->seq_id] = iommu;
ba395927 2027
e61d98d8
SS
2028 ret = iommu_init_domains(iommu);
2029 if (ret)
2030 goto error;
2031
ba395927
KA
2032 /*
2033 * TBD:
2034 * we could share the same root & context tables
2035 * amoung all IOMMU's. Need to Split it later.
2036 */
2037 ret = iommu_alloc_root_entry(iommu);
2038 if (ret) {
2039 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2040 goto error;
2041 }
2042 }
2043
a77b67d4
YS
2044 for_each_drhd_unit(drhd) {
2045 if (drhd->ignored)
2046 continue;
2047
2048 iommu = drhd->iommu;
2049 if (dmar_enable_qi(iommu)) {
2050 /*
2051 * Queued Invalidate not enabled, use Register Based
2052 * Invalidate
2053 */
2054 iommu->flush.flush_context = __iommu_flush_context;
2055 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2056 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
2057 "invalidation\n",
2058 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2059 } else {
2060 iommu->flush.flush_context = qi_flush_context;
2061 iommu->flush.flush_iotlb = qi_flush_iotlb;
2062 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2063 "invalidation\n",
2064 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2065 }
2066 }
2067
ba395927
KA
2068 /*
2069 * For each rmrr
2070 * for each dev attached to rmrr
2071 * do
2072 * locate drhd for dev, alloc domain for dev
2073 * allocate free domain
2074 * allocate page table entries for rmrr
2075 * if context not allocated for bus
2076 * allocate and init context
2077 * set present in root table for this bus
2078 * init context with domain, translation etc
2079 * endfor
2080 * endfor
2081 */
2082 for_each_rmrr_units(rmrr) {
ba395927
KA
2083 for (i = 0; i < rmrr->devices_cnt; i++) {
2084 pdev = rmrr->devices[i];
2085 /* some BIOS lists non-exist devices in DMAR table */
2086 if (!pdev)
2087 continue;
2088 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2089 if (ret)
2090 printk(KERN_ERR
2091 "IOMMU: mapping reserved region failed\n");
2092 }
2093 }
2094
e820482c
KA
2095 iommu_prepare_gfx_mapping();
2096
49a0429e
KA
2097 iommu_prepare_isa();
2098
ba395927
KA
2099 /*
2100 * for each drhd
2101 * enable fault log
2102 * global invalidate context cache
2103 * global invalidate iotlb
2104 * enable translation
2105 */
2106 for_each_drhd_unit(drhd) {
2107 if (drhd->ignored)
2108 continue;
2109 iommu = drhd->iommu;
2110 sprintf (iommu->name, "dmar%d", unit++);
2111
2112 iommu_flush_write_buffer(iommu);
2113
3460a6d9
KA
2114 ret = dmar_set_interrupt(iommu);
2115 if (ret)
2116 goto error;
2117
ba395927
KA
2118 iommu_set_root_entry(iommu);
2119
a77b67d4
YS
2120 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2121 0);
2122 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2123 0);
f8bab735 2124 iommu_disable_protect_mem_regions(iommu);
2125
ba395927
KA
2126 ret = iommu_enable_translation(iommu);
2127 if (ret)
2128 goto error;
2129 }
2130
2131 return 0;
2132error:
2133 for_each_drhd_unit(drhd) {
2134 if (drhd->ignored)
2135 continue;
2136 iommu = drhd->iommu;
2137 free_iommu(iommu);
2138 }
d9630fe9 2139 kfree(g_iommus);
ba395927
KA
2140 return ret;
2141}
2142
2143static inline u64 aligned_size(u64 host_addr, size_t size)
2144{
2145 u64 addr;
5b6985ce
FY
2146 addr = (host_addr & (~PAGE_MASK)) + size;
2147 return PAGE_ALIGN(addr);
ba395927
KA
2148}
2149
2150struct iova *
f76aec76 2151iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 2152{
ba395927
KA
2153 struct iova *piova;
2154
2155 /* Make sure it's in range */
ba395927 2156 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 2157 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
2158 return NULL;
2159
2160 piova = alloc_iova(&domain->iovad,
5b6985ce 2161 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
ba395927
KA
2162 return piova;
2163}
2164
f76aec76
KA
2165static struct iova *
2166__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
bb9e6d65 2167 size_t size, u64 dma_mask)
ba395927 2168{
ba395927 2169 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2170 struct iova *iova = NULL;
ba395927 2171
bb9e6d65
FT
2172 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2173 iova = iommu_alloc_iova(domain, size, dma_mask);
2174 else {
ba395927
KA
2175 /*
2176 * First try to allocate an io virtual address in
2177 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 2178 * from higher range
ba395927 2179 */
f76aec76 2180 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 2181 if (!iova)
bb9e6d65 2182 iova = iommu_alloc_iova(domain, size, dma_mask);
ba395927
KA
2183 }
2184
2185 if (!iova) {
2186 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
2187 return NULL;
2188 }
2189
2190 return iova;
2191}
2192
2193static struct dmar_domain *
2194get_valid_domain_for_dev(struct pci_dev *pdev)
2195{
2196 struct dmar_domain *domain;
2197 int ret;
2198
2199 domain = get_domain_for_dev(pdev,
2200 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2201 if (!domain) {
2202 printk(KERN_ERR
2203 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2204 return NULL;
ba395927
KA
2205 }
2206
2207 /* make sure context mapping is ok */
5331fe6f 2208 if (unlikely(!domain_context_mapped(pdev))) {
ba395927 2209 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
2210 if (ret) {
2211 printk(KERN_ERR
2212 "Domain context map for %s failed",
2213 pci_name(pdev));
4fe05bbc 2214 return NULL;
f76aec76 2215 }
ba395927
KA
2216 }
2217
f76aec76
KA
2218 return domain;
2219}
2220
bb9e6d65
FT
2221static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2222 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2223{
2224 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2225 struct dmar_domain *domain;
5b6985ce 2226 phys_addr_t start_paddr;
f76aec76
KA
2227 struct iova *iova;
2228 int prot = 0;
6865f0d1 2229 int ret;
8c11e798 2230 struct intel_iommu *iommu;
f76aec76
KA
2231
2232 BUG_ON(dir == DMA_NONE);
358dd8ac 2233 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
6865f0d1 2234 return paddr;
f76aec76
KA
2235
2236 domain = get_valid_domain_for_dev(pdev);
2237 if (!domain)
2238 return 0;
2239
8c11e798 2240 iommu = domain_get_iommu(domain);
6865f0d1 2241 size = aligned_size((u64)paddr, size);
f76aec76 2242
bb9e6d65 2243 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76
KA
2244 if (!iova)
2245 goto error;
2246
5b6985ce 2247 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
f76aec76 2248
ba395927
KA
2249 /*
2250 * Check if DMAR supports zero-length reads on write only
2251 * mappings..
2252 */
2253 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2254 !cap_zlr(iommu->cap))
ba395927
KA
2255 prot |= DMA_PTE_READ;
2256 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2257 prot |= DMA_PTE_WRITE;
2258 /*
6865f0d1 2259 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2260 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2261 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2262 * is not a big problem
2263 */
6865f0d1 2264 ret = domain_page_mapping(domain, start_paddr,
5b6985ce 2265 ((u64)paddr) & PAGE_MASK, size, prot);
ba395927
KA
2266 if (ret)
2267 goto error;
2268
f76aec76 2269 /* it's a non-present to present mapping */
8c11e798 2270 ret = iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2271 start_paddr, size >> VTD_PAGE_SHIFT, 1);
f76aec76 2272 if (ret)
8c11e798 2273 iommu_flush_write_buffer(iommu);
f76aec76 2274
5b6985ce 2275 return start_paddr + ((u64)paddr & (~PAGE_MASK));
ba395927 2276
ba395927 2277error:
f76aec76
KA
2278 if (iova)
2279 __free_iova(&domain->iovad, iova);
ba395927 2280 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
5b6985ce 2281 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2282 return 0;
2283}
2284
bb9e6d65
FT
2285dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2286 size_t size, int dir)
2287{
2288 return __intel_map_single(hwdev, paddr, size, dir,
2289 to_pci_dev(hwdev)->dma_mask);
2290}
2291
5e0d2a6f 2292static void flush_unmaps(void)
2293{
80b20dd8 2294 int i, j;
5e0d2a6f 2295
5e0d2a6f 2296 timer_on = 0;
2297
2298 /* just flush them all */
2299 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2300 struct intel_iommu *iommu = g_iommus[i];
2301 if (!iommu)
2302 continue;
c42d9f32 2303
a2bb8459 2304 if (deferred_flush[i].next) {
a77b67d4
YS
2305 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2306 DMA_TLB_GLOBAL_FLUSH, 0);
80b20dd8 2307 for (j = 0; j < deferred_flush[i].next; j++) {
2308 __free_iova(&deferred_flush[i].domain[j]->iovad,
2309 deferred_flush[i].iova[j]);
2310 }
2311 deferred_flush[i].next = 0;
2312 }
5e0d2a6f 2313 }
2314
5e0d2a6f 2315 list_size = 0;
5e0d2a6f 2316}
2317
2318static void flush_unmaps_timeout(unsigned long data)
2319{
80b20dd8 2320 unsigned long flags;
2321
2322 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2323 flush_unmaps();
80b20dd8 2324 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2325}
2326
2327static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2328{
2329 unsigned long flags;
80b20dd8 2330 int next, iommu_id;
8c11e798 2331 struct intel_iommu *iommu;
5e0d2a6f 2332
2333 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2334 if (list_size == HIGH_WATER_MARK)
2335 flush_unmaps();
2336
8c11e798
WH
2337 iommu = domain_get_iommu(dom);
2338 iommu_id = iommu->seq_id;
c42d9f32 2339
80b20dd8 2340 next = deferred_flush[iommu_id].next;
2341 deferred_flush[iommu_id].domain[next] = dom;
2342 deferred_flush[iommu_id].iova[next] = iova;
2343 deferred_flush[iommu_id].next++;
5e0d2a6f 2344
2345 if (!timer_on) {
2346 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2347 timer_on = 1;
2348 }
2349 list_size++;
2350 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2351}
2352
5b6985ce
FY
2353void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2354 int dir)
ba395927 2355{
ba395927 2356 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
2357 struct dmar_domain *domain;
2358 unsigned long start_addr;
ba395927 2359 struct iova *iova;
8c11e798 2360 struct intel_iommu *iommu;
ba395927 2361
358dd8ac 2362 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 2363 return;
ba395927
KA
2364 domain = find_domain(pdev);
2365 BUG_ON(!domain);
2366
8c11e798
WH
2367 iommu = domain_get_iommu(domain);
2368
ba395927 2369 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 2370 if (!iova)
ba395927 2371 return;
ba395927 2372
5b6985ce 2373 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2374 size = aligned_size((u64)dev_addr, size);
ba395927 2375
f76aec76 2376 pr_debug("Device %s unmapping: %lx@%llx\n",
5b6985ce 2377 pci_name(pdev), size, (unsigned long long)start_addr);
ba395927 2378
f76aec76
KA
2379 /* clear the whole page */
2380 dma_pte_clear_range(domain, start_addr, start_addr + size);
2381 /* free page tables */
2382 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
5e0d2a6f 2383 if (intel_iommu_strict) {
8c11e798 2384 if (iommu_flush_iotlb_psi(iommu,
5b6985ce 2385 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
8c11e798 2386 iommu_flush_write_buffer(iommu);
5e0d2a6f 2387 /* free iova */
2388 __free_iova(&domain->iovad, iova);
2389 } else {
2390 add_unmap(domain, iova);
2391 /*
2392 * queue up the release of the unmap to save the 1/6th of the
2393 * cpu used up by the iotlb flush operation...
2394 */
5e0d2a6f 2395 }
ba395927
KA
2396}
2397
5b6985ce
FY
2398void *intel_alloc_coherent(struct device *hwdev, size_t size,
2399 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2400{
2401 void *vaddr;
2402 int order;
2403
5b6985ce 2404 size = PAGE_ALIGN(size);
ba395927
KA
2405 order = get_order(size);
2406 flags &= ~(GFP_DMA | GFP_DMA32);
2407
2408 vaddr = (void *)__get_free_pages(flags, order);
2409 if (!vaddr)
2410 return NULL;
2411 memset(vaddr, 0, size);
2412
bb9e6d65
FT
2413 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2414 DMA_BIDIRECTIONAL,
2415 hwdev->coherent_dma_mask);
ba395927
KA
2416 if (*dma_handle)
2417 return vaddr;
2418 free_pages((unsigned long)vaddr, order);
2419 return NULL;
2420}
2421
5b6985ce
FY
2422void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2423 dma_addr_t dma_handle)
ba395927
KA
2424{
2425 int order;
2426
5b6985ce 2427 size = PAGE_ALIGN(size);
ba395927
KA
2428 order = get_order(size);
2429
2430 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2431 free_pages((unsigned long)vaddr, order);
2432}
2433
12d4d40e 2434#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
5b6985ce
FY
2435
2436void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2437 int nelems, int dir)
ba395927
KA
2438{
2439 int i;
2440 struct pci_dev *pdev = to_pci_dev(hwdev);
2441 struct dmar_domain *domain;
f76aec76
KA
2442 unsigned long start_addr;
2443 struct iova *iova;
2444 size_t size = 0;
2445 void *addr;
c03ab37c 2446 struct scatterlist *sg;
8c11e798 2447 struct intel_iommu *iommu;
ba395927 2448
358dd8ac 2449 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2450 return;
2451
2452 domain = find_domain(pdev);
8c11e798
WH
2453 BUG_ON(!domain);
2454
2455 iommu = domain_get_iommu(domain);
ba395927 2456
c03ab37c 2457 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2458 if (!iova)
2459 return;
c03ab37c 2460 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2461 addr = SG_ENT_VIRT_ADDRESS(sg);
2462 size += aligned_size((u64)addr, sg->length);
2463 }
2464
5b6985ce 2465 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76
KA
2466
2467 /* clear the whole page */
2468 dma_pte_clear_range(domain, start_addr, start_addr + size);
2469 /* free page tables */
2470 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2471
8c11e798 2472 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
5b6985ce 2473 size >> VTD_PAGE_SHIFT, 0))
8c11e798 2474 iommu_flush_write_buffer(iommu);
f76aec76
KA
2475
2476 /* free iova */
2477 __free_iova(&domain->iovad, iova);
ba395927
KA
2478}
2479
ba395927 2480static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2481 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2482{
2483 int i;
c03ab37c 2484 struct scatterlist *sg;
ba395927 2485
c03ab37c 2486 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2487 BUG_ON(!sg_page(sg));
c03ab37c
FT
2488 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2489 sg->dma_length = sg->length;
ba395927
KA
2490 }
2491 return nelems;
2492}
2493
5b6985ce
FY
2494int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2495 int dir)
ba395927
KA
2496{
2497 void *addr;
2498 int i;
ba395927
KA
2499 struct pci_dev *pdev = to_pci_dev(hwdev);
2500 struct dmar_domain *domain;
f76aec76
KA
2501 size_t size = 0;
2502 int prot = 0;
2503 size_t offset = 0;
2504 struct iova *iova = NULL;
2505 int ret;
c03ab37c 2506 struct scatterlist *sg;
f76aec76 2507 unsigned long start_addr;
8c11e798 2508 struct intel_iommu *iommu;
ba395927
KA
2509
2510 BUG_ON(dir == DMA_NONE);
358dd8ac 2511 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2512 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2513
f76aec76
KA
2514 domain = get_valid_domain_for_dev(pdev);
2515 if (!domain)
2516 return 0;
2517
8c11e798
WH
2518 iommu = domain_get_iommu(domain);
2519
c03ab37c 2520 for_each_sg(sglist, sg, nelems, i) {
ba395927 2521 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2522 addr = (void *)virt_to_phys(addr);
2523 size += aligned_size((u64)addr, sg->length);
2524 }
2525
bb9e6d65 2526 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
f76aec76 2527 if (!iova) {
c03ab37c 2528 sglist->dma_length = 0;
f76aec76
KA
2529 return 0;
2530 }
2531
2532 /*
2533 * Check if DMAR supports zero-length reads on write only
2534 * mappings..
2535 */
2536 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2537 !cap_zlr(iommu->cap))
f76aec76
KA
2538 prot |= DMA_PTE_READ;
2539 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2540 prot |= DMA_PTE_WRITE;
2541
5b6985ce 2542 start_addr = iova->pfn_lo << PAGE_SHIFT;
f76aec76 2543 offset = 0;
c03ab37c 2544 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2545 addr = SG_ENT_VIRT_ADDRESS(sg);
2546 addr = (void *)virt_to_phys(addr);
2547 size = aligned_size((u64)addr, sg->length);
2548 ret = domain_page_mapping(domain, start_addr + offset,
5b6985ce 2549 ((u64)addr) & PAGE_MASK,
f76aec76
KA
2550 size, prot);
2551 if (ret) {
2552 /* clear the page */
2553 dma_pte_clear_range(domain, start_addr,
2554 start_addr + offset);
2555 /* free page tables */
2556 dma_pte_free_pagetable(domain, start_addr,
2557 start_addr + offset);
2558 /* free iova */
2559 __free_iova(&domain->iovad, iova);
ba395927
KA
2560 return 0;
2561 }
f76aec76 2562 sg->dma_address = start_addr + offset +
5b6985ce 2563 ((u64)addr & (~PAGE_MASK));
ba395927 2564 sg->dma_length = sg->length;
f76aec76 2565 offset += size;
ba395927
KA
2566 }
2567
ba395927 2568 /* it's a non-present to present mapping */
8c11e798 2569 if (iommu_flush_iotlb_psi(iommu, domain->id,
5b6985ce 2570 start_addr, offset >> VTD_PAGE_SHIFT, 1))
8c11e798 2571 iommu_flush_write_buffer(iommu);
ba395927
KA
2572 return nelems;
2573}
2574
2575static struct dma_mapping_ops intel_dma_ops = {
2576 .alloc_coherent = intel_alloc_coherent,
2577 .free_coherent = intel_free_coherent,
2578 .map_single = intel_map_single,
2579 .unmap_single = intel_unmap_single,
2580 .map_sg = intel_map_sg,
2581 .unmap_sg = intel_unmap_sg,
2582};
2583
2584static inline int iommu_domain_cache_init(void)
2585{
2586 int ret = 0;
2587
2588 iommu_domain_cache = kmem_cache_create("iommu_domain",
2589 sizeof(struct dmar_domain),
2590 0,
2591 SLAB_HWCACHE_ALIGN,
2592
2593 NULL);
2594 if (!iommu_domain_cache) {
2595 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2596 ret = -ENOMEM;
2597 }
2598
2599 return ret;
2600}
2601
2602static inline int iommu_devinfo_cache_init(void)
2603{
2604 int ret = 0;
2605
2606 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2607 sizeof(struct device_domain_info),
2608 0,
2609 SLAB_HWCACHE_ALIGN,
ba395927
KA
2610 NULL);
2611 if (!iommu_devinfo_cache) {
2612 printk(KERN_ERR "Couldn't create devinfo cache\n");
2613 ret = -ENOMEM;
2614 }
2615
2616 return ret;
2617}
2618
2619static inline int iommu_iova_cache_init(void)
2620{
2621 int ret = 0;
2622
2623 iommu_iova_cache = kmem_cache_create("iommu_iova",
2624 sizeof(struct iova),
2625 0,
2626 SLAB_HWCACHE_ALIGN,
ba395927
KA
2627 NULL);
2628 if (!iommu_iova_cache) {
2629 printk(KERN_ERR "Couldn't create iova cache\n");
2630 ret = -ENOMEM;
2631 }
2632
2633 return ret;
2634}
2635
2636static int __init iommu_init_mempool(void)
2637{
2638 int ret;
2639 ret = iommu_iova_cache_init();
2640 if (ret)
2641 return ret;
2642
2643 ret = iommu_domain_cache_init();
2644 if (ret)
2645 goto domain_error;
2646
2647 ret = iommu_devinfo_cache_init();
2648 if (!ret)
2649 return ret;
2650
2651 kmem_cache_destroy(iommu_domain_cache);
2652domain_error:
2653 kmem_cache_destroy(iommu_iova_cache);
2654
2655 return -ENOMEM;
2656}
2657
2658static void __init iommu_exit_mempool(void)
2659{
2660 kmem_cache_destroy(iommu_devinfo_cache);
2661 kmem_cache_destroy(iommu_domain_cache);
2662 kmem_cache_destroy(iommu_iova_cache);
2663
2664}
2665
ba395927
KA
2666static void __init init_no_remapping_devices(void)
2667{
2668 struct dmar_drhd_unit *drhd;
2669
2670 for_each_drhd_unit(drhd) {
2671 if (!drhd->include_all) {
2672 int i;
2673 for (i = 0; i < drhd->devices_cnt; i++)
2674 if (drhd->devices[i] != NULL)
2675 break;
2676 /* ignore DMAR unit if no pci devices exist */
2677 if (i == drhd->devices_cnt)
2678 drhd->ignored = 1;
2679 }
2680 }
2681
2682 if (dmar_map_gfx)
2683 return;
2684
2685 for_each_drhd_unit(drhd) {
2686 int i;
2687 if (drhd->ignored || drhd->include_all)
2688 continue;
2689
2690 for (i = 0; i < drhd->devices_cnt; i++)
2691 if (drhd->devices[i] &&
2692 !IS_GFX_DEVICE(drhd->devices[i]))
2693 break;
2694
2695 if (i < drhd->devices_cnt)
2696 continue;
2697
2698 /* bypass IOMMU if it is just for gfx devices */
2699 drhd->ignored = 1;
2700 for (i = 0; i < drhd->devices_cnt; i++) {
2701 if (!drhd->devices[i])
2702 continue;
358dd8ac 2703 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2704 }
2705 }
2706}
2707
2708int __init intel_iommu_init(void)
2709{
2710 int ret = 0;
2711
ba395927
KA
2712 if (dmar_table_init())
2713 return -ENODEV;
2714
1886e8a9
SS
2715 if (dmar_dev_scope_init())
2716 return -ENODEV;
2717
2ae21010
SS
2718 /*
2719 * Check the need for DMA-remapping initialization now.
2720 * Above initialization will also be used by Interrupt-remapping.
2721 */
2722 if (no_iommu || swiotlb || dmar_disabled)
2723 return -ENODEV;
2724
ba395927
KA
2725 iommu_init_mempool();
2726 dmar_init_reserved_ranges();
2727
2728 init_no_remapping_devices();
2729
2730 ret = init_dmars();
2731 if (ret) {
2732 printk(KERN_ERR "IOMMU: dmar init failed\n");
2733 put_iova_domain(&reserved_iova_list);
2734 iommu_exit_mempool();
2735 return ret;
2736 }
2737 printk(KERN_INFO
2738 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2739
5e0d2a6f 2740 init_timer(&unmap_timer);
ba395927
KA
2741 force_iommu = 1;
2742 dma_ops = &intel_dma_ops;
a8bcbb0d
JR
2743
2744 register_iommu(&intel_iommu_ops);
2745
ba395927
KA
2746 return 0;
2747}
e820482c 2748
c7151a8d
WH
2749static int vm_domain_add_dev_info(struct dmar_domain *domain,
2750 struct pci_dev *pdev)
2751{
2752 struct device_domain_info *info;
2753 unsigned long flags;
2754
2755 info = alloc_devinfo_mem();
2756 if (!info)
2757 return -ENOMEM;
2758
2759 info->bus = pdev->bus->number;
2760 info->devfn = pdev->devfn;
2761 info->dev = pdev;
2762 info->domain = domain;
2763
2764 spin_lock_irqsave(&device_domain_lock, flags);
2765 list_add(&info->link, &domain->devices);
2766 list_add(&info->global, &device_domain_list);
2767 pdev->dev.archdata.iommu = info;
2768 spin_unlock_irqrestore(&device_domain_lock, flags);
2769
2770 return 0;
2771}
2772
2773static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2774 struct pci_dev *pdev)
2775{
2776 struct device_domain_info *info;
2777 struct intel_iommu *iommu;
2778 unsigned long flags;
2779 int found = 0;
2780 struct list_head *entry, *tmp;
2781
2782 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2783 if (!iommu)
2784 return;
2785
2786 spin_lock_irqsave(&device_domain_lock, flags);
2787 list_for_each_safe(entry, tmp, &domain->devices) {
2788 info = list_entry(entry, struct device_domain_info, link);
2789 if (info->bus == pdev->bus->number &&
2790 info->devfn == pdev->devfn) {
2791 list_del(&info->link);
2792 list_del(&info->global);
2793 if (info->dev)
2794 info->dev->dev.archdata.iommu = NULL;
2795 spin_unlock_irqrestore(&device_domain_lock, flags);
2796
2797 iommu_detach_dev(iommu, info->bus, info->devfn);
2798 free_devinfo_mem(info);
2799
2800 spin_lock_irqsave(&device_domain_lock, flags);
2801
2802 if (found)
2803 break;
2804 else
2805 continue;
2806 }
2807
2808 /* if there is no other devices under the same iommu
2809 * owned by this domain, clear this iommu in iommu_bmp
2810 * update iommu count and coherency
2811 */
2812 if (device_to_iommu(info->bus, info->devfn) == iommu)
2813 found = 1;
2814 }
2815
2816 if (found == 0) {
2817 unsigned long tmp_flags;
2818 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2819 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2820 domain->iommu_count--;
2821 domain_update_iommu_coherency(domain);
2822 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2823 }
2824
2825 spin_unlock_irqrestore(&device_domain_lock, flags);
2826}
2827
2828static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2829{
2830 struct device_domain_info *info;
2831 struct intel_iommu *iommu;
2832 unsigned long flags1, flags2;
2833
2834 spin_lock_irqsave(&device_domain_lock, flags1);
2835 while (!list_empty(&domain->devices)) {
2836 info = list_entry(domain->devices.next,
2837 struct device_domain_info, link);
2838 list_del(&info->link);
2839 list_del(&info->global);
2840 if (info->dev)
2841 info->dev->dev.archdata.iommu = NULL;
2842
2843 spin_unlock_irqrestore(&device_domain_lock, flags1);
2844
2845 iommu = device_to_iommu(info->bus, info->devfn);
2846 iommu_detach_dev(iommu, info->bus, info->devfn);
2847
2848 /* clear this iommu in iommu_bmp, update iommu count
2849 * and coherency
2850 */
2851 spin_lock_irqsave(&domain->iommu_lock, flags2);
2852 if (test_and_clear_bit(iommu->seq_id,
2853 &domain->iommu_bmp)) {
2854 domain->iommu_count--;
2855 domain_update_iommu_coherency(domain);
2856 }
2857 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2858
2859 free_devinfo_mem(info);
2860 spin_lock_irqsave(&device_domain_lock, flags1);
2861 }
2862 spin_unlock_irqrestore(&device_domain_lock, flags1);
2863}
2864
5e98c4b1
WH
2865/* domain id for virtual machine, it won't be set in context */
2866static unsigned long vm_domid;
2867
fe40f1e0
WH
2868static int vm_domain_min_agaw(struct dmar_domain *domain)
2869{
2870 int i;
2871 int min_agaw = domain->agaw;
2872
2873 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
2874 for (; i < g_num_of_iommus; ) {
2875 if (min_agaw > g_iommus[i]->agaw)
2876 min_agaw = g_iommus[i]->agaw;
2877
2878 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
2879 }
2880
2881 return min_agaw;
2882}
2883
5e98c4b1
WH
2884static struct dmar_domain *iommu_alloc_vm_domain(void)
2885{
2886 struct dmar_domain *domain;
2887
2888 domain = alloc_domain_mem();
2889 if (!domain)
2890 return NULL;
2891
2892 domain->id = vm_domid++;
2893 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2894 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2895
2896 return domain;
2897}
2898
2899static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2900{
2901 int adjust_width;
2902
2903 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2904 spin_lock_init(&domain->mapping_lock);
2905 spin_lock_init(&domain->iommu_lock);
2906
2907 domain_reserve_special_ranges(domain);
2908
2909 /* calculate AGAW */
2910 domain->gaw = guest_width;
2911 adjust_width = guestwidth_to_adjustwidth(guest_width);
2912 domain->agaw = width_to_agaw(adjust_width);
2913
2914 INIT_LIST_HEAD(&domain->devices);
2915
2916 domain->iommu_count = 0;
2917 domain->iommu_coherency = 0;
fe40f1e0 2918 domain->max_addr = 0;
5e98c4b1
WH
2919
2920 /* always allocate the top pgd */
2921 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2922 if (!domain->pgd)
2923 return -ENOMEM;
2924 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2925 return 0;
2926}
2927
2928static void iommu_free_vm_domain(struct dmar_domain *domain)
2929{
2930 unsigned long flags;
2931 struct dmar_drhd_unit *drhd;
2932 struct intel_iommu *iommu;
2933 unsigned long i;
2934 unsigned long ndomains;
2935
2936 for_each_drhd_unit(drhd) {
2937 if (drhd->ignored)
2938 continue;
2939 iommu = drhd->iommu;
2940
2941 ndomains = cap_ndoms(iommu->cap);
2942 i = find_first_bit(iommu->domain_ids, ndomains);
2943 for (; i < ndomains; ) {
2944 if (iommu->domains[i] == domain) {
2945 spin_lock_irqsave(&iommu->lock, flags);
2946 clear_bit(i, iommu->domain_ids);
2947 iommu->domains[i] = NULL;
2948 spin_unlock_irqrestore(&iommu->lock, flags);
2949 break;
2950 }
2951 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2952 }
2953 }
2954}
2955
2956static void vm_domain_exit(struct dmar_domain *domain)
2957{
2958 u64 end;
2959
2960 /* Domain 0 is reserved, so dont process it */
2961 if (!domain)
2962 return;
2963
2964 vm_domain_remove_all_dev_info(domain);
2965 /* destroy iovas */
2966 put_iova_domain(&domain->iovad);
2967 end = DOMAIN_MAX_ADDR(domain->gaw);
2968 end = end & (~VTD_PAGE_MASK);
2969
2970 /* clear ptes */
2971 dma_pte_clear_range(domain, 0, end);
2972
2973 /* free page tables */
2974 dma_pte_free_pagetable(domain, 0, end);
2975
2976 iommu_free_vm_domain(domain);
2977 free_domain_mem(domain);
2978}
2979
5d450806 2980static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 2981{
5d450806 2982 struct dmar_domain *dmar_domain;
38717946 2983
5d450806
JR
2984 dmar_domain = iommu_alloc_vm_domain();
2985 if (!dmar_domain) {
38717946 2986 printk(KERN_ERR
5d450806
JR
2987 "intel_iommu_domain_init: dmar_domain == NULL\n");
2988 return -ENOMEM;
38717946 2989 }
5d450806 2990 if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 2991 printk(KERN_ERR
5d450806
JR
2992 "intel_iommu_domain_init() failed\n");
2993 vm_domain_exit(dmar_domain);
2994 return -ENOMEM;
38717946 2995 }
5d450806 2996 domain->priv = dmar_domain;
faa3d6f5 2997
5d450806 2998 return 0;
38717946 2999}
38717946 3000
5d450806 3001static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3002{
5d450806
JR
3003 struct dmar_domain *dmar_domain = domain->priv;
3004
3005 domain->priv = NULL;
3006 vm_domain_exit(dmar_domain);
38717946 3007}
38717946 3008
4c5478c9
JR
3009static int intel_iommu_attach_device(struct iommu_domain *domain,
3010 struct device *dev)
38717946 3011{
4c5478c9
JR
3012 struct dmar_domain *dmar_domain = domain->priv;
3013 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3014 struct intel_iommu *iommu;
3015 int addr_width;
3016 u64 end;
faa3d6f5
WH
3017 int ret;
3018
3019 /* normally pdev is not mapped */
3020 if (unlikely(domain_context_mapped(pdev))) {
3021 struct dmar_domain *old_domain;
3022
3023 old_domain = find_domain(pdev);
3024 if (old_domain) {
4c5478c9 3025 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
faa3d6f5
WH
3026 vm_domain_remove_one_dev_info(old_domain, pdev);
3027 else
3028 domain_remove_dev_info(old_domain);
3029 }
3030 }
3031
fe40f1e0
WH
3032 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
3033 if (!iommu)
3034 return -ENODEV;
3035
3036 /* check if this iommu agaw is sufficient for max mapped address */
3037 addr_width = agaw_to_width(iommu->agaw);
3038 end = DOMAIN_MAX_ADDR(addr_width);
3039 end = end & VTD_PAGE_MASK;
4c5478c9 3040 if (end < dmar_domain->max_addr) {
fe40f1e0
WH
3041 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3042 "sufficient for the mapped address (%llx)\n",
4c5478c9 3043 __func__, iommu->agaw, dmar_domain->max_addr);
fe40f1e0
WH
3044 return -EFAULT;
3045 }
3046
4c5478c9 3047 ret = domain_context_mapping(dmar_domain, pdev);
faa3d6f5
WH
3048 if (ret)
3049 return ret;
3050
4c5478c9 3051 ret = vm_domain_add_dev_info(dmar_domain, pdev);
faa3d6f5 3052 return ret;
38717946 3053}
38717946 3054
4c5478c9
JR
3055static void intel_iommu_detach_device(struct iommu_domain *domain,
3056 struct device *dev)
38717946 3057{
4c5478c9
JR
3058 struct dmar_domain *dmar_domain = domain->priv;
3059 struct pci_dev *pdev = to_pci_dev(dev);
3060
3061 vm_domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3062}
c7151a8d 3063
dde57a21
JR
3064static int intel_iommu_map_range(struct iommu_domain *domain,
3065 unsigned long iova, phys_addr_t hpa,
3066 size_t size, int iommu_prot)
faa3d6f5 3067{
dde57a21 3068 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
3069 u64 max_addr;
3070 int addr_width;
dde57a21 3071 int prot = 0;
faa3d6f5 3072 int ret;
fe40f1e0 3073
dde57a21
JR
3074 if (iommu_prot & IOMMU_READ)
3075 prot |= DMA_PTE_READ;
3076 if (iommu_prot & IOMMU_WRITE)
3077 prot |= DMA_PTE_WRITE;
3078
fe40f1e0 3079 max_addr = (iova & VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size);
dde57a21 3080 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3081 int min_agaw;
3082 u64 end;
3083
3084 /* check if minimum agaw is sufficient for mapped address */
dde57a21 3085 min_agaw = vm_domain_min_agaw(dmar_domain);
fe40f1e0
WH
3086 addr_width = agaw_to_width(min_agaw);
3087 end = DOMAIN_MAX_ADDR(addr_width);
3088 end = end & VTD_PAGE_MASK;
3089 if (end < max_addr) {
3090 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3091 "sufficient for the mapped address (%llx)\n",
3092 __func__, min_agaw, max_addr);
3093 return -EFAULT;
3094 }
dde57a21 3095 dmar_domain->max_addr = max_addr;
fe40f1e0
WH
3096 }
3097
dde57a21 3098 ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot);
faa3d6f5 3099 return ret;
38717946 3100}
38717946 3101
dde57a21
JR
3102static void intel_iommu_unmap_range(struct iommu_domain *domain,
3103 unsigned long iova, size_t size)
38717946 3104{
dde57a21 3105 struct dmar_domain *dmar_domain = domain->priv;
faa3d6f5
WH
3106 dma_addr_t base;
3107
3108 /* The address might not be aligned */
3109 base = iova & VTD_PAGE_MASK;
3110 size = VTD_PAGE_ALIGN(size);
dde57a21 3111 dma_pte_clear_range(dmar_domain, base, base + size);
fe40f1e0 3112
dde57a21
JR
3113 if (dmar_domain->max_addr == base + size)
3114 dmar_domain->max_addr = base;
38717946 3115}
38717946 3116
d14d6577
JR
3117static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3118 unsigned long iova)
38717946 3119{
d14d6577 3120 struct dmar_domain *dmar_domain = domain->priv;
38717946 3121 struct dma_pte *pte;
faa3d6f5 3122 u64 phys = 0;
38717946 3123
d14d6577 3124 pte = addr_to_dma_pte(dmar_domain, iova);
38717946 3125 if (pte)
faa3d6f5 3126 phys = dma_pte_addr(pte);
38717946 3127
faa3d6f5 3128 return phys;
38717946 3129}
a8bcbb0d
JR
3130
3131static struct iommu_ops intel_iommu_ops = {
3132 .domain_init = intel_iommu_domain_init,
3133 .domain_destroy = intel_iommu_domain_destroy,
3134 .attach_dev = intel_iommu_attach_device,
3135 .detach_dev = intel_iommu_detach_device,
3136 .map = intel_iommu_map_range,
3137 .unmap = intel_iommu_unmap_range,
3138 .iova_to_phys = intel_iommu_iova_to_phys,
3139};
This page took 0.311413 seconds and 5 git commands to generate.