intel-iommu: Use correct domain ID when caching mode is enabled
[deliverable/linux.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
f59c7b69 39#include <linux/sysdev.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
ba395927 42#include <asm/cacheflush.h>
46a7fa27 43#include <asm/iommu.h>
ba395927
KA
44#include "pci.h"
45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
ba395927
KA
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
52
53#define IOAPIC_RANGE_START (0xfee00000)
54#define IOAPIC_RANGE_END (0xfeefffff)
55#define IOVA_START_ADDR (0x1000)
56
57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
4ed0d3e6
FY
59#define MAX_AGAW_WIDTH 64
60
2ebe3151
DW
61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 69
f27be03b 70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 73
fd18de50 74
dd4e8319
DW
75/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
76 are never going to work. */
77static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
78{
79 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
80}
81
82static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
83{
84 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
85}
86static inline unsigned long page_to_dma_pfn(struct page *pg)
87{
88 return mm_to_dma_pfn(page_to_pfn(pg));
89}
90static inline unsigned long virt_to_dma_pfn(void *p)
91{
92 return page_to_dma_pfn(virt_to_page(p));
93}
94
d9630fe9
WH
95/* global iommu list, set NULL for ignored DMAR units */
96static struct intel_iommu **g_iommus;
97
e0fc7e0b 98static void __init check_tylersburg_isoch(void);
9af88143
DW
99static int rwbf_quirk;
100
46b08e1a
MM
101/*
102 * 0: Present
103 * 1-11: Reserved
104 * 12-63: Context Ptr (12 - (haw-1))
105 * 64-127: Reserved
106 */
107struct root_entry {
108 u64 val;
109 u64 rsvd1;
110};
111#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
112static inline bool root_present(struct root_entry *root)
113{
114 return (root->val & 1);
115}
116static inline void set_root_present(struct root_entry *root)
117{
118 root->val |= 1;
119}
120static inline void set_root_value(struct root_entry *root, unsigned long value)
121{
122 root->val |= value & VTD_PAGE_MASK;
123}
124
125static inline struct context_entry *
126get_context_addr_from_root(struct root_entry *root)
127{
128 return (struct context_entry *)
129 (root_present(root)?phys_to_virt(
130 root->val & VTD_PAGE_MASK) :
131 NULL);
132}
133
7a8fc25e
MM
134/*
135 * low 64 bits:
136 * 0: present
137 * 1: fault processing disable
138 * 2-3: translation type
139 * 12-63: address space root
140 * high 64 bits:
141 * 0-2: address width
142 * 3-6: aval
143 * 8-23: domain id
144 */
145struct context_entry {
146 u64 lo;
147 u64 hi;
148};
c07e7d21
MM
149
150static inline bool context_present(struct context_entry *context)
151{
152 return (context->lo & 1);
153}
154static inline void context_set_present(struct context_entry *context)
155{
156 context->lo |= 1;
157}
158
159static inline void context_set_fault_enable(struct context_entry *context)
160{
161 context->lo &= (((u64)-1) << 2) | 1;
162}
163
c07e7d21
MM
164static inline void context_set_translation_type(struct context_entry *context,
165 unsigned long value)
166{
167 context->lo &= (((u64)-1) << 4) | 3;
168 context->lo |= (value & 3) << 2;
169}
170
171static inline void context_set_address_root(struct context_entry *context,
172 unsigned long value)
173{
174 context->lo |= value & VTD_PAGE_MASK;
175}
176
177static inline void context_set_address_width(struct context_entry *context,
178 unsigned long value)
179{
180 context->hi |= value & 7;
181}
182
183static inline void context_set_domain_id(struct context_entry *context,
184 unsigned long value)
185{
186 context->hi |= (value & ((1 << 16) - 1)) << 8;
187}
188
189static inline void context_clear_entry(struct context_entry *context)
190{
191 context->lo = 0;
192 context->hi = 0;
193}
7a8fc25e 194
622ba12a
MM
195/*
196 * 0: readable
197 * 1: writable
198 * 2-6: reserved
199 * 7: super page
9cf06697
SY
200 * 8-10: available
201 * 11: snoop behavior
622ba12a
MM
202 * 12-63: Host physcial address
203 */
204struct dma_pte {
205 u64 val;
206};
622ba12a 207
19c239ce
MM
208static inline void dma_clear_pte(struct dma_pte *pte)
209{
210 pte->val = 0;
211}
212
213static inline void dma_set_pte_readable(struct dma_pte *pte)
214{
215 pte->val |= DMA_PTE_READ;
216}
217
218static inline void dma_set_pte_writable(struct dma_pte *pte)
219{
220 pte->val |= DMA_PTE_WRITE;
221}
222
9cf06697
SY
223static inline void dma_set_pte_snp(struct dma_pte *pte)
224{
225 pte->val |= DMA_PTE_SNP;
226}
227
19c239ce
MM
228static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
229{
230 pte->val = (pte->val & ~3) | (prot & 3);
231}
232
233static inline u64 dma_pte_addr(struct dma_pte *pte)
234{
c85994e4
DW
235#ifdef CONFIG_64BIT
236 return pte->val & VTD_PAGE_MASK;
237#else
238 /* Must have a full atomic 64-bit read */
239 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
240#endif
19c239ce
MM
241}
242
dd4e8319 243static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 244{
dd4e8319 245 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
246}
247
248static inline bool dma_pte_present(struct dma_pte *pte)
249{
250 return (pte->val & 3) != 0;
251}
622ba12a 252
75e6bf96
DW
253static inline int first_pte_in_page(struct dma_pte *pte)
254{
255 return !((unsigned long)pte & ~VTD_PAGE_MASK);
256}
257
2c2e2c38
FY
258/*
259 * This domain is a statically identity mapping domain.
260 * 1. This domain creats a static 1:1 mapping to all usable memory.
261 * 2. It maps to each iommu if successful.
262 * 3. Each iommu mapps to this domain if successful.
263 */
19943b0e
DW
264static struct dmar_domain *si_domain;
265static int hw_pass_through = 1;
2c2e2c38 266
3b5410e7 267/* devices under the same p2p bridge are owned in one domain */
cdc7b837 268#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 269
1ce28feb
WH
270/* domain represents a virtual machine, more than one devices
271 * across iommus may be owned in one domain, e.g. kvm guest.
272 */
273#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
274
2c2e2c38
FY
275/* si_domain contains mulitple devices */
276#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
277
99126f7c
MM
278struct dmar_domain {
279 int id; /* domain id */
4c923d47 280 int nid; /* node id */
8c11e798 281 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
282
283 struct list_head devices; /* all devices' list */
284 struct iova_domain iovad; /* iova's that belong to this domain */
285
286 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
287 int gaw; /* max guest address width */
288
289 /* adjusted guest address width, 0 is level 2 30-bit */
290 int agaw;
291
3b5410e7 292 int flags; /* flags to find out type of domain */
8e604097
WH
293
294 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 295 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d
WH
296 int iommu_count; /* reference count of iommu */
297 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 298 u64 max_addr; /* maximum mapped address */
99126f7c
MM
299};
300
a647dacb
MM
301/* PCI domain-device relationship */
302struct device_domain_info {
303 struct list_head link; /* link to domain siblings */
304 struct list_head global; /* link to global list */
276dbf99
DW
305 int segment; /* PCI domain */
306 u8 bus; /* PCI bus number */
a647dacb 307 u8 devfn; /* PCI devfn number */
45e829ea 308 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 309 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
310 struct dmar_domain *domain; /* pointer to domain */
311};
312
5e0d2a6f 313static void flush_unmaps_timeout(unsigned long data);
314
315DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
316
80b20dd8 317#define HIGH_WATER_MARK 250
318struct deferred_flush_tables {
319 int next;
320 struct iova *iova[HIGH_WATER_MARK];
321 struct dmar_domain *domain[HIGH_WATER_MARK];
322};
323
324static struct deferred_flush_tables *deferred_flush;
325
5e0d2a6f 326/* bitmap for indexing intel_iommus */
5e0d2a6f 327static int g_num_of_iommus;
328
329static DEFINE_SPINLOCK(async_umap_flush_lock);
330static LIST_HEAD(unmaps_to_do);
331
332static int timer_on;
333static long list_size;
5e0d2a6f 334
ba395927
KA
335static void domain_remove_dev_info(struct dmar_domain *domain);
336
0cd5c3c8
KM
337#ifdef CONFIG_DMAR_DEFAULT_ON
338int dmar_disabled = 0;
339#else
340int dmar_disabled = 1;
341#endif /*CONFIG_DMAR_DEFAULT_ON*/
342
ba395927 343static int __initdata dmar_map_gfx = 1;
7d3b03ce 344static int dmar_forcedac;
5e0d2a6f 345static int intel_iommu_strict;
ba395927
KA
346
347#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
348static DEFINE_SPINLOCK(device_domain_lock);
349static LIST_HEAD(device_domain_list);
350
a8bcbb0d
JR
351static struct iommu_ops intel_iommu_ops;
352
ba395927
KA
353static int __init intel_iommu_setup(char *str)
354{
355 if (!str)
356 return -EINVAL;
357 while (*str) {
0cd5c3c8
KM
358 if (!strncmp(str, "on", 2)) {
359 dmar_disabled = 0;
360 printk(KERN_INFO "Intel-IOMMU: enabled\n");
361 } else if (!strncmp(str, "off", 3)) {
ba395927 362 dmar_disabled = 1;
0cd5c3c8 363 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
364 } else if (!strncmp(str, "igfx_off", 8)) {
365 dmar_map_gfx = 0;
366 printk(KERN_INFO
367 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 368 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 369 printk(KERN_INFO
7d3b03ce
KA
370 "Intel-IOMMU: Forcing DAC for PCI devices\n");
371 dmar_forcedac = 1;
5e0d2a6f 372 } else if (!strncmp(str, "strict", 6)) {
373 printk(KERN_INFO
374 "Intel-IOMMU: disable batched IOTLB flush\n");
375 intel_iommu_strict = 1;
ba395927
KA
376 }
377
378 str += strcspn(str, ",");
379 while (*str == ',')
380 str++;
381 }
382 return 0;
383}
384__setup("intel_iommu=", intel_iommu_setup);
385
386static struct kmem_cache *iommu_domain_cache;
387static struct kmem_cache *iommu_devinfo_cache;
388static struct kmem_cache *iommu_iova_cache;
389
4c923d47 390static inline void *alloc_pgtable_page(int node)
eb3fa7cb 391{
4c923d47
SS
392 struct page *page;
393 void *vaddr = NULL;
eb3fa7cb 394
4c923d47
SS
395 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
396 if (page)
397 vaddr = page_address(page);
eb3fa7cb 398 return vaddr;
ba395927
KA
399}
400
401static inline void free_pgtable_page(void *vaddr)
402{
403 free_page((unsigned long)vaddr);
404}
405
406static inline void *alloc_domain_mem(void)
407{
354bb65e 408 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
409}
410
38717946 411static void free_domain_mem(void *vaddr)
ba395927
KA
412{
413 kmem_cache_free(iommu_domain_cache, vaddr);
414}
415
416static inline void * alloc_devinfo_mem(void)
417{
354bb65e 418 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
419}
420
421static inline void free_devinfo_mem(void *vaddr)
422{
423 kmem_cache_free(iommu_devinfo_cache, vaddr);
424}
425
426struct iova *alloc_iova_mem(void)
427{
354bb65e 428 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
429}
430
431void free_iova_mem(struct iova *iova)
432{
433 kmem_cache_free(iommu_iova_cache, iova);
434}
435
1b573683
WH
436
437static inline int width_to_agaw(int width);
438
4ed0d3e6 439static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
440{
441 unsigned long sagaw;
442 int agaw = -1;
443
444 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 445 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
446 agaw >= 0; agaw--) {
447 if (test_bit(agaw, &sagaw))
448 break;
449 }
450
451 return agaw;
452}
453
4ed0d3e6
FY
454/*
455 * Calculate max SAGAW for each iommu.
456 */
457int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
458{
459 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
460}
461
462/*
463 * calculate agaw for each iommu.
464 * "SAGAW" may be different across iommus, use a default agaw, and
465 * get a supported less agaw for iommus that don't support the default agaw.
466 */
467int iommu_calculate_agaw(struct intel_iommu *iommu)
468{
469 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
470}
471
2c2e2c38 472/* This functionin only returns single iommu in a domain */
8c11e798
WH
473static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
474{
475 int iommu_id;
476
2c2e2c38 477 /* si_domain and vm domain should not get here. */
1ce28feb 478 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 479 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 480
8c11e798
WH
481 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
482 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
483 return NULL;
484
485 return g_iommus[iommu_id];
486}
487
8e604097
WH
488static void domain_update_iommu_coherency(struct dmar_domain *domain)
489{
490 int i;
491
492 domain->iommu_coherency = 1;
493
a45946ab 494 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
495 if (!ecap_coherent(g_iommus[i]->ecap)) {
496 domain->iommu_coherency = 0;
497 break;
498 }
8e604097
WH
499 }
500}
501
58c610bd
SY
502static void domain_update_iommu_snooping(struct dmar_domain *domain)
503{
504 int i;
505
506 domain->iommu_snooping = 1;
507
a45946ab 508 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
509 if (!ecap_sc_support(g_iommus[i]->ecap)) {
510 domain->iommu_snooping = 0;
511 break;
512 }
58c610bd
SY
513 }
514}
515
516/* Some capabilities may be different across iommus */
517static void domain_update_iommu_cap(struct dmar_domain *domain)
518{
519 domain_update_iommu_coherency(domain);
520 domain_update_iommu_snooping(domain);
521}
522
276dbf99 523static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
524{
525 struct dmar_drhd_unit *drhd = NULL;
526 int i;
527
528 for_each_drhd_unit(drhd) {
529 if (drhd->ignored)
530 continue;
276dbf99
DW
531 if (segment != drhd->segment)
532 continue;
c7151a8d 533
924b6231 534 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
535 if (drhd->devices[i] &&
536 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
537 drhd->devices[i]->devfn == devfn)
538 return drhd->iommu;
4958c5dc
DW
539 if (drhd->devices[i] &&
540 drhd->devices[i]->subordinate &&
924b6231
DW
541 drhd->devices[i]->subordinate->number <= bus &&
542 drhd->devices[i]->subordinate->subordinate >= bus)
543 return drhd->iommu;
544 }
c7151a8d
WH
545
546 if (drhd->include_all)
547 return drhd->iommu;
548 }
549
550 return NULL;
551}
552
5331fe6f
WH
553static void domain_flush_cache(struct dmar_domain *domain,
554 void *addr, int size)
555{
556 if (!domain->iommu_coherency)
557 clflush_cache_range(addr, size);
558}
559
ba395927
KA
560/* Gets context entry for a given bus and devfn */
561static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
562 u8 bus, u8 devfn)
563{
564 struct root_entry *root;
565 struct context_entry *context;
566 unsigned long phy_addr;
567 unsigned long flags;
568
569 spin_lock_irqsave(&iommu->lock, flags);
570 root = &iommu->root_entry[bus];
571 context = get_context_addr_from_root(root);
572 if (!context) {
4c923d47
SS
573 context = (struct context_entry *)
574 alloc_pgtable_page(iommu->node);
ba395927
KA
575 if (!context) {
576 spin_unlock_irqrestore(&iommu->lock, flags);
577 return NULL;
578 }
5b6985ce 579 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
580 phy_addr = virt_to_phys((void *)context);
581 set_root_value(root, phy_addr);
582 set_root_present(root);
583 __iommu_flush_cache(iommu, root, sizeof(*root));
584 }
585 spin_unlock_irqrestore(&iommu->lock, flags);
586 return &context[devfn];
587}
588
589static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
590{
591 struct root_entry *root;
592 struct context_entry *context;
593 int ret;
594 unsigned long flags;
595
596 spin_lock_irqsave(&iommu->lock, flags);
597 root = &iommu->root_entry[bus];
598 context = get_context_addr_from_root(root);
599 if (!context) {
600 ret = 0;
601 goto out;
602 }
c07e7d21 603 ret = context_present(&context[devfn]);
ba395927
KA
604out:
605 spin_unlock_irqrestore(&iommu->lock, flags);
606 return ret;
607}
608
609static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
610{
611 struct root_entry *root;
612 struct context_entry *context;
613 unsigned long flags;
614
615 spin_lock_irqsave(&iommu->lock, flags);
616 root = &iommu->root_entry[bus];
617 context = get_context_addr_from_root(root);
618 if (context) {
c07e7d21 619 context_clear_entry(&context[devfn]);
ba395927
KA
620 __iommu_flush_cache(iommu, &context[devfn], \
621 sizeof(*context));
622 }
623 spin_unlock_irqrestore(&iommu->lock, flags);
624}
625
626static void free_context_table(struct intel_iommu *iommu)
627{
628 struct root_entry *root;
629 int i;
630 unsigned long flags;
631 struct context_entry *context;
632
633 spin_lock_irqsave(&iommu->lock, flags);
634 if (!iommu->root_entry) {
635 goto out;
636 }
637 for (i = 0; i < ROOT_ENTRY_NR; i++) {
638 root = &iommu->root_entry[i];
639 context = get_context_addr_from_root(root);
640 if (context)
641 free_pgtable_page(context);
642 }
643 free_pgtable_page(iommu->root_entry);
644 iommu->root_entry = NULL;
645out:
646 spin_unlock_irqrestore(&iommu->lock, flags);
647}
648
649/* page table handling */
650#define LEVEL_STRIDE (9)
651#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
652
653static inline int agaw_to_level(int agaw)
654{
655 return agaw + 2;
656}
657
658static inline int agaw_to_width(int agaw)
659{
660 return 30 + agaw * LEVEL_STRIDE;
661
662}
663
664static inline int width_to_agaw(int width)
665{
666 return (width - 30) / LEVEL_STRIDE;
667}
668
669static inline unsigned int level_to_offset_bits(int level)
670{
6660c63a 671 return (level - 1) * LEVEL_STRIDE;
ba395927
KA
672}
673
77dfa56c 674static inline int pfn_level_offset(unsigned long pfn, int level)
ba395927 675{
6660c63a 676 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
ba395927
KA
677}
678
6660c63a 679static inline unsigned long level_mask(int level)
ba395927 680{
6660c63a 681 return -1UL << level_to_offset_bits(level);
ba395927
KA
682}
683
6660c63a 684static inline unsigned long level_size(int level)
ba395927 685{
6660c63a 686 return 1UL << level_to_offset_bits(level);
ba395927
KA
687}
688
6660c63a 689static inline unsigned long align_to_level(unsigned long pfn, int level)
ba395927 690{
6660c63a 691 return (pfn + level_size(level) - 1) & level_mask(level);
ba395927
KA
692}
693
b026fd28
DW
694static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
695 unsigned long pfn)
ba395927 696{
b026fd28 697 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
698 struct dma_pte *parent, *pte = NULL;
699 int level = agaw_to_level(domain->agaw);
700 int offset;
ba395927
KA
701
702 BUG_ON(!domain->pgd);
b026fd28 703 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
704 parent = domain->pgd;
705
ba395927
KA
706 while (level > 0) {
707 void *tmp_page;
708
b026fd28 709 offset = pfn_level_offset(pfn, level);
ba395927
KA
710 pte = &parent[offset];
711 if (level == 1)
712 break;
713
19c239ce 714 if (!dma_pte_present(pte)) {
c85994e4
DW
715 uint64_t pteval;
716
4c923d47 717 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 718
206a73c1 719 if (!tmp_page)
ba395927 720 return NULL;
206a73c1 721
c85994e4 722 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 723 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
724 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
725 /* Someone else set it while we were thinking; use theirs. */
726 free_pgtable_page(tmp_page);
727 } else {
728 dma_pte_addr(pte);
729 domain_flush_cache(domain, pte, sizeof(*pte));
730 }
ba395927 731 }
19c239ce 732 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
733 level--;
734 }
735
ba395927
KA
736 return pte;
737}
738
739/* return address's pte at specific level */
90dcfb5e
DW
740static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
741 unsigned long pfn,
742 int level)
ba395927
KA
743{
744 struct dma_pte *parent, *pte = NULL;
745 int total = agaw_to_level(domain->agaw);
746 int offset;
747
748 parent = domain->pgd;
749 while (level <= total) {
90dcfb5e 750 offset = pfn_level_offset(pfn, total);
ba395927
KA
751 pte = &parent[offset];
752 if (level == total)
753 return pte;
754
19c239ce 755 if (!dma_pte_present(pte))
ba395927 756 break;
19c239ce 757 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
758 total--;
759 }
760 return NULL;
761}
762
ba395927 763/* clear last level pte, a tlb flush should be followed */
595badf5
DW
764static void dma_pte_clear_range(struct dmar_domain *domain,
765 unsigned long start_pfn,
766 unsigned long last_pfn)
ba395927 767{
04b18e65 768 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
310a5ab9 769 struct dma_pte *first_pte, *pte;
66eae846 770
04b18e65 771 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 772 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 773 BUG_ON(start_pfn > last_pfn);
ba395927 774
04b18e65 775 /* we don't need lock here; nobody else touches the iova range */
59c36286 776 do {
310a5ab9
DW
777 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
778 if (!pte) {
779 start_pfn = align_to_level(start_pfn + 1, 2);
780 continue;
781 }
75e6bf96 782 do {
310a5ab9
DW
783 dma_clear_pte(pte);
784 start_pfn++;
785 pte++;
75e6bf96
DW
786 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
787
310a5ab9
DW
788 domain_flush_cache(domain, first_pte,
789 (void *)pte - (void *)first_pte);
59c36286
DW
790
791 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
792}
793
794/* free page table pages. last level pte should already be cleared */
795static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
796 unsigned long start_pfn,
797 unsigned long last_pfn)
ba395927 798{
6660c63a 799 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 800 struct dma_pte *first_pte, *pte;
ba395927
KA
801 int total = agaw_to_level(domain->agaw);
802 int level;
6660c63a 803 unsigned long tmp;
ba395927 804
6660c63a
DW
805 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
806 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 807 BUG_ON(start_pfn > last_pfn);
ba395927 808
f3a0a52f 809 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
810 level = 2;
811 while (level <= total) {
6660c63a
DW
812 tmp = align_to_level(start_pfn, level);
813
f3a0a52f 814 /* If we can't even clear one PTE at this level, we're done */
6660c63a 815 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
816 return;
817
59c36286 818 do {
f3a0a52f
DW
819 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
820 if (!pte) {
821 tmp = align_to_level(tmp + 1, level + 1);
822 continue;
823 }
75e6bf96 824 do {
6a43e574
DW
825 if (dma_pte_present(pte)) {
826 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
827 dma_clear_pte(pte);
828 }
f3a0a52f
DW
829 pte++;
830 tmp += level_size(level);
75e6bf96
DW
831 } while (!first_pte_in_page(pte) &&
832 tmp + level_size(level) - 1 <= last_pfn);
833
f3a0a52f
DW
834 domain_flush_cache(domain, first_pte,
835 (void *)pte - (void *)first_pte);
836
59c36286 837 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
838 level++;
839 }
840 /* free pgd */
d794dc9b 841 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
842 free_pgtable_page(domain->pgd);
843 domain->pgd = NULL;
844 }
845}
846
847/* iommu handling */
848static int iommu_alloc_root_entry(struct intel_iommu *iommu)
849{
850 struct root_entry *root;
851 unsigned long flags;
852
4c923d47 853 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
854 if (!root)
855 return -ENOMEM;
856
5b6985ce 857 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
858
859 spin_lock_irqsave(&iommu->lock, flags);
860 iommu->root_entry = root;
861 spin_unlock_irqrestore(&iommu->lock, flags);
862
863 return 0;
864}
865
ba395927
KA
866static void iommu_set_root_entry(struct intel_iommu *iommu)
867{
868 void *addr;
c416daa9 869 u32 sts;
ba395927
KA
870 unsigned long flag;
871
872 addr = iommu->root_entry;
873
874 spin_lock_irqsave(&iommu->register_lock, flag);
875 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
876
c416daa9 877 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
878
879 /* Make sure hardware complete it */
880 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 881 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
882
883 spin_unlock_irqrestore(&iommu->register_lock, flag);
884}
885
886static void iommu_flush_write_buffer(struct intel_iommu *iommu)
887{
888 u32 val;
889 unsigned long flag;
890
9af88143 891 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 892 return;
ba395927
KA
893
894 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 895 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
896
897 /* Make sure hardware complete it */
898 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 899 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
900
901 spin_unlock_irqrestore(&iommu->register_lock, flag);
902}
903
904/* return value determine if we need a write buffer flush */
4c25a2c1
DW
905static void __iommu_flush_context(struct intel_iommu *iommu,
906 u16 did, u16 source_id, u8 function_mask,
907 u64 type)
ba395927
KA
908{
909 u64 val = 0;
910 unsigned long flag;
911
ba395927
KA
912 switch (type) {
913 case DMA_CCMD_GLOBAL_INVL:
914 val = DMA_CCMD_GLOBAL_INVL;
915 break;
916 case DMA_CCMD_DOMAIN_INVL:
917 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
918 break;
919 case DMA_CCMD_DEVICE_INVL:
920 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
921 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
922 break;
923 default:
924 BUG();
925 }
926 val |= DMA_CCMD_ICC;
927
928 spin_lock_irqsave(&iommu->register_lock, flag);
929 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
930
931 /* Make sure hardware complete it */
932 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
933 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
934
935 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
936}
937
ba395927 938/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
939static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
940 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
941{
942 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
943 u64 val = 0, val_iva = 0;
944 unsigned long flag;
945
ba395927
KA
946 switch (type) {
947 case DMA_TLB_GLOBAL_FLUSH:
948 /* global flush doesn't need set IVA_REG */
949 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
950 break;
951 case DMA_TLB_DSI_FLUSH:
952 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
953 break;
954 case DMA_TLB_PSI_FLUSH:
955 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
956 /* Note: always flush non-leaf currently */
957 val_iva = size_order | addr;
958 break;
959 default:
960 BUG();
961 }
962 /* Note: set drain read/write */
963#if 0
964 /*
965 * This is probably to be super secure.. Looks like we can
966 * ignore it without any impact.
967 */
968 if (cap_read_drain(iommu->cap))
969 val |= DMA_TLB_READ_DRAIN;
970#endif
971 if (cap_write_drain(iommu->cap))
972 val |= DMA_TLB_WRITE_DRAIN;
973
974 spin_lock_irqsave(&iommu->register_lock, flag);
975 /* Note: Only uses first TLB reg currently */
976 if (val_iva)
977 dmar_writeq(iommu->reg + tlb_offset, val_iva);
978 dmar_writeq(iommu->reg + tlb_offset + 8, val);
979
980 /* Make sure hardware complete it */
981 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
982 dmar_readq, (!(val & DMA_TLB_IVT)), val);
983
984 spin_unlock_irqrestore(&iommu->register_lock, flag);
985
986 /* check IOTLB invalidation granularity */
987 if (DMA_TLB_IAIG(val) == 0)
988 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
989 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
990 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
991 (unsigned long long)DMA_TLB_IIRG(type),
992 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
993}
994
93a23a72
YZ
995static struct device_domain_info *iommu_support_dev_iotlb(
996 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
997{
998 int found = 0;
999 unsigned long flags;
1000 struct device_domain_info *info;
1001 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1002
1003 if (!ecap_dev_iotlb_support(iommu->ecap))
1004 return NULL;
1005
1006 if (!iommu->qi)
1007 return NULL;
1008
1009 spin_lock_irqsave(&device_domain_lock, flags);
1010 list_for_each_entry(info, &domain->devices, link)
1011 if (info->bus == bus && info->devfn == devfn) {
1012 found = 1;
1013 break;
1014 }
1015 spin_unlock_irqrestore(&device_domain_lock, flags);
1016
1017 if (!found || !info->dev)
1018 return NULL;
1019
1020 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1021 return NULL;
1022
1023 if (!dmar_find_matched_atsr_unit(info->dev))
1024 return NULL;
1025
1026 info->iommu = iommu;
1027
1028 return info;
1029}
1030
1031static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1032{
93a23a72
YZ
1033 if (!info)
1034 return;
1035
1036 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1037}
1038
1039static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1040{
1041 if (!info->dev || !pci_ats_enabled(info->dev))
1042 return;
1043
1044 pci_disable_ats(info->dev);
1045}
1046
1047static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1048 u64 addr, unsigned mask)
1049{
1050 u16 sid, qdep;
1051 unsigned long flags;
1052 struct device_domain_info *info;
1053
1054 spin_lock_irqsave(&device_domain_lock, flags);
1055 list_for_each_entry(info, &domain->devices, link) {
1056 if (!info->dev || !pci_ats_enabled(info->dev))
1057 continue;
1058
1059 sid = info->bus << 8 | info->devfn;
1060 qdep = pci_ats_queue_depth(info->dev);
1061 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1062 }
1063 spin_unlock_irqrestore(&device_domain_lock, flags);
1064}
1065
1f0ef2aa 1066static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1067 unsigned long pfn, unsigned int pages, int map)
ba395927 1068{
9dd2fe89 1069 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1070 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1071
ba395927
KA
1072 BUG_ON(pages == 0);
1073
ba395927 1074 /*
9dd2fe89
YZ
1075 * Fallback to domain selective flush if no PSI support or the size is
1076 * too big.
ba395927
KA
1077 * PSI requires page size to be 2 ^ x, and the base address is naturally
1078 * aligned to the size
1079 */
9dd2fe89
YZ
1080 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1081 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1082 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1083 else
1084 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1085 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1086
1087 /*
82653633
NA
1088 * In caching mode, changes of pages from non-present to present require
1089 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1090 */
82653633 1091 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1092 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1093}
1094
f8bab735 1095static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1096{
1097 u32 pmen;
1098 unsigned long flags;
1099
1100 spin_lock_irqsave(&iommu->register_lock, flags);
1101 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1102 pmen &= ~DMA_PMEN_EPM;
1103 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1104
1105 /* wait for the protected region status bit to clear */
1106 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1107 readl, !(pmen & DMA_PMEN_PRS), pmen);
1108
1109 spin_unlock_irqrestore(&iommu->register_lock, flags);
1110}
1111
ba395927
KA
1112static int iommu_enable_translation(struct intel_iommu *iommu)
1113{
1114 u32 sts;
1115 unsigned long flags;
1116
1117 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1118 iommu->gcmd |= DMA_GCMD_TE;
1119 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1120
1121 /* Make sure hardware complete it */
1122 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1123 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1124
ba395927
KA
1125 spin_unlock_irqrestore(&iommu->register_lock, flags);
1126 return 0;
1127}
1128
1129static int iommu_disable_translation(struct intel_iommu *iommu)
1130{
1131 u32 sts;
1132 unsigned long flag;
1133
1134 spin_lock_irqsave(&iommu->register_lock, flag);
1135 iommu->gcmd &= ~DMA_GCMD_TE;
1136 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1137
1138 /* Make sure hardware complete it */
1139 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1140 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1141
1142 spin_unlock_irqrestore(&iommu->register_lock, flag);
1143 return 0;
1144}
1145
3460a6d9 1146
ba395927
KA
1147static int iommu_init_domains(struct intel_iommu *iommu)
1148{
1149 unsigned long ndomains;
1150 unsigned long nlongs;
1151
1152 ndomains = cap_ndoms(iommu->cap);
1153 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1154 nlongs = BITS_TO_LONGS(ndomains);
1155
94a91b50
DD
1156 spin_lock_init(&iommu->lock);
1157
ba395927
KA
1158 /* TBD: there might be 64K domains,
1159 * consider other allocation for future chip
1160 */
1161 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1162 if (!iommu->domain_ids) {
1163 printk(KERN_ERR "Allocating domain id array failed\n");
1164 return -ENOMEM;
1165 }
1166 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1167 GFP_KERNEL);
1168 if (!iommu->domains) {
1169 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1170 return -ENOMEM;
1171 }
1172
1173 /*
1174 * if Caching mode is set, then invalid translations are tagged
1175 * with domainid 0. Hence we need to pre-allocate it.
1176 */
1177 if (cap_caching_mode(iommu->cap))
1178 set_bit(0, iommu->domain_ids);
1179 return 0;
1180}
ba395927 1181
ba395927
KA
1182
1183static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1184static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1185
1186void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1187{
1188 struct dmar_domain *domain;
1189 int i;
c7151a8d 1190 unsigned long flags;
ba395927 1191
94a91b50 1192 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1193 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1194 domain = iommu->domains[i];
1195 clear_bit(i, iommu->domain_ids);
1196
1197 spin_lock_irqsave(&domain->iommu_lock, flags);
1198 if (--domain->iommu_count == 0) {
1199 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1200 vm_domain_exit(domain);
1201 else
1202 domain_exit(domain);
1203 }
1204 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1205 }
ba395927
KA
1206 }
1207
1208 if (iommu->gcmd & DMA_GCMD_TE)
1209 iommu_disable_translation(iommu);
1210
1211 if (iommu->irq) {
1212 set_irq_data(iommu->irq, NULL);
1213 /* This will mask the irq */
1214 free_irq(iommu->irq, iommu);
1215 destroy_irq(iommu->irq);
1216 }
1217
1218 kfree(iommu->domains);
1219 kfree(iommu->domain_ids);
1220
d9630fe9
WH
1221 g_iommus[iommu->seq_id] = NULL;
1222
1223 /* if all iommus are freed, free g_iommus */
1224 for (i = 0; i < g_num_of_iommus; i++) {
1225 if (g_iommus[i])
1226 break;
1227 }
1228
1229 if (i == g_num_of_iommus)
1230 kfree(g_iommus);
1231
ba395927
KA
1232 /* free context mapping */
1233 free_context_table(iommu);
ba395927
KA
1234}
1235
2c2e2c38 1236static struct dmar_domain *alloc_domain(void)
ba395927 1237{
ba395927 1238 struct dmar_domain *domain;
ba395927
KA
1239
1240 domain = alloc_domain_mem();
1241 if (!domain)
1242 return NULL;
1243
4c923d47 1244 domain->nid = -1;
2c2e2c38
FY
1245 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1246 domain->flags = 0;
1247
1248 return domain;
1249}
1250
1251static int iommu_attach_domain(struct dmar_domain *domain,
1252 struct intel_iommu *iommu)
1253{
1254 int num;
1255 unsigned long ndomains;
1256 unsigned long flags;
1257
ba395927
KA
1258 ndomains = cap_ndoms(iommu->cap);
1259
1260 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1261
ba395927
KA
1262 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1263 if (num >= ndomains) {
1264 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1265 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1266 return -ENOMEM;
ba395927
KA
1267 }
1268
ba395927 1269 domain->id = num;
2c2e2c38 1270 set_bit(num, iommu->domain_ids);
8c11e798 1271 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1272 iommu->domains[num] = domain;
1273 spin_unlock_irqrestore(&iommu->lock, flags);
1274
2c2e2c38 1275 return 0;
ba395927
KA
1276}
1277
2c2e2c38
FY
1278static void iommu_detach_domain(struct dmar_domain *domain,
1279 struct intel_iommu *iommu)
ba395927
KA
1280{
1281 unsigned long flags;
2c2e2c38
FY
1282 int num, ndomains;
1283 int found = 0;
ba395927 1284
8c11e798 1285 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1286 ndomains = cap_ndoms(iommu->cap);
a45946ab 1287 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1288 if (iommu->domains[num] == domain) {
1289 found = 1;
1290 break;
1291 }
2c2e2c38
FY
1292 }
1293
1294 if (found) {
1295 clear_bit(num, iommu->domain_ids);
1296 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1297 iommu->domains[num] = NULL;
1298 }
8c11e798 1299 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1300}
1301
1302static struct iova_domain reserved_iova_list;
8a443df4 1303static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1304
1305static void dmar_init_reserved_ranges(void)
1306{
1307 struct pci_dev *pdev = NULL;
1308 struct iova *iova;
1309 int i;
ba395927 1310
f661197e 1311 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1312
8a443df4
MG
1313 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1314 &reserved_rbtree_key);
1315
ba395927
KA
1316 /* IOAPIC ranges shouldn't be accessed by DMA */
1317 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1318 IOVA_PFN(IOAPIC_RANGE_END));
1319 if (!iova)
1320 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1321
1322 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1323 for_each_pci_dev(pdev) {
1324 struct resource *r;
1325
1326 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1327 r = &pdev->resource[i];
1328 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1329 continue;
1a4a4551
DW
1330 iova = reserve_iova(&reserved_iova_list,
1331 IOVA_PFN(r->start),
1332 IOVA_PFN(r->end));
ba395927
KA
1333 if (!iova)
1334 printk(KERN_ERR "Reserve iova failed\n");
1335 }
1336 }
1337
1338}
1339
1340static void domain_reserve_special_ranges(struct dmar_domain *domain)
1341{
1342 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1343}
1344
1345static inline int guestwidth_to_adjustwidth(int gaw)
1346{
1347 int agaw;
1348 int r = (gaw - 12) % 9;
1349
1350 if (r == 0)
1351 agaw = gaw;
1352 else
1353 agaw = gaw + 9 - r;
1354 if (agaw > 64)
1355 agaw = 64;
1356 return agaw;
1357}
1358
1359static int domain_init(struct dmar_domain *domain, int guest_width)
1360{
1361 struct intel_iommu *iommu;
1362 int adjust_width, agaw;
1363 unsigned long sagaw;
1364
f661197e 1365 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1366 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1367
1368 domain_reserve_special_ranges(domain);
1369
1370 /* calculate AGAW */
8c11e798 1371 iommu = domain_get_iommu(domain);
ba395927
KA
1372 if (guest_width > cap_mgaw(iommu->cap))
1373 guest_width = cap_mgaw(iommu->cap);
1374 domain->gaw = guest_width;
1375 adjust_width = guestwidth_to_adjustwidth(guest_width);
1376 agaw = width_to_agaw(adjust_width);
1377 sagaw = cap_sagaw(iommu->cap);
1378 if (!test_bit(agaw, &sagaw)) {
1379 /* hardware doesn't support it, choose a bigger one */
1380 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1381 agaw = find_next_bit(&sagaw, 5, agaw);
1382 if (agaw >= 5)
1383 return -ENODEV;
1384 }
1385 domain->agaw = agaw;
1386 INIT_LIST_HEAD(&domain->devices);
1387
8e604097
WH
1388 if (ecap_coherent(iommu->ecap))
1389 domain->iommu_coherency = 1;
1390 else
1391 domain->iommu_coherency = 0;
1392
58c610bd
SY
1393 if (ecap_sc_support(iommu->ecap))
1394 domain->iommu_snooping = 1;
1395 else
1396 domain->iommu_snooping = 0;
1397
c7151a8d 1398 domain->iommu_count = 1;
4c923d47 1399 domain->nid = iommu->node;
c7151a8d 1400
ba395927 1401 /* always allocate the top pgd */
4c923d47 1402 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1403 if (!domain->pgd)
1404 return -ENOMEM;
5b6985ce 1405 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1406 return 0;
1407}
1408
1409static void domain_exit(struct dmar_domain *domain)
1410{
2c2e2c38
FY
1411 struct dmar_drhd_unit *drhd;
1412 struct intel_iommu *iommu;
ba395927
KA
1413
1414 /* Domain 0 is reserved, so dont process it */
1415 if (!domain)
1416 return;
1417
1418 domain_remove_dev_info(domain);
1419 /* destroy iovas */
1420 put_iova_domain(&domain->iovad);
ba395927
KA
1421
1422 /* clear ptes */
595badf5 1423 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1424
1425 /* free page tables */
d794dc9b 1426 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1427
2c2e2c38
FY
1428 for_each_active_iommu(iommu, drhd)
1429 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1430 iommu_detach_domain(domain, iommu);
1431
ba395927
KA
1432 free_domain_mem(domain);
1433}
1434
4ed0d3e6
FY
1435static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1436 u8 bus, u8 devfn, int translation)
ba395927
KA
1437{
1438 struct context_entry *context;
ba395927 1439 unsigned long flags;
5331fe6f 1440 struct intel_iommu *iommu;
ea6606b0
WH
1441 struct dma_pte *pgd;
1442 unsigned long num;
1443 unsigned long ndomains;
1444 int id;
1445 int agaw;
93a23a72 1446 struct device_domain_info *info = NULL;
ba395927
KA
1447
1448 pr_debug("Set context mapping for %02x:%02x.%d\n",
1449 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1450
ba395927 1451 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1452 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1453 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1454
276dbf99 1455 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1456 if (!iommu)
1457 return -ENODEV;
1458
ba395927
KA
1459 context = device_to_context_entry(iommu, bus, devfn);
1460 if (!context)
1461 return -ENOMEM;
1462 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1463 if (context_present(context)) {
ba395927
KA
1464 spin_unlock_irqrestore(&iommu->lock, flags);
1465 return 0;
1466 }
1467
ea6606b0
WH
1468 id = domain->id;
1469 pgd = domain->pgd;
1470
2c2e2c38
FY
1471 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1472 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1473 int found = 0;
1474
1475 /* find an available domain id for this device in iommu */
1476 ndomains = cap_ndoms(iommu->cap);
a45946ab 1477 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1478 if (iommu->domains[num] == domain) {
1479 id = num;
1480 found = 1;
1481 break;
1482 }
ea6606b0
WH
1483 }
1484
1485 if (found == 0) {
1486 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1487 if (num >= ndomains) {
1488 spin_unlock_irqrestore(&iommu->lock, flags);
1489 printk(KERN_ERR "IOMMU: no free domain ids\n");
1490 return -EFAULT;
1491 }
1492
1493 set_bit(num, iommu->domain_ids);
1494 iommu->domains[num] = domain;
1495 id = num;
1496 }
1497
1498 /* Skip top levels of page tables for
1499 * iommu which has less agaw than default.
1672af11 1500 * Unnecessary for PT mode.
ea6606b0 1501 */
1672af11
CW
1502 if (translation != CONTEXT_TT_PASS_THROUGH) {
1503 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1504 pgd = phys_to_virt(dma_pte_addr(pgd));
1505 if (!dma_pte_present(pgd)) {
1506 spin_unlock_irqrestore(&iommu->lock, flags);
1507 return -ENOMEM;
1508 }
ea6606b0
WH
1509 }
1510 }
1511 }
1512
1513 context_set_domain_id(context, id);
4ed0d3e6 1514
93a23a72
YZ
1515 if (translation != CONTEXT_TT_PASS_THROUGH) {
1516 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1517 translation = info ? CONTEXT_TT_DEV_IOTLB :
1518 CONTEXT_TT_MULTI_LEVEL;
1519 }
4ed0d3e6
FY
1520 /*
1521 * In pass through mode, AW must be programmed to indicate the largest
1522 * AGAW value supported by hardware. And ASR is ignored by hardware.
1523 */
93a23a72 1524 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1525 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1526 else {
1527 context_set_address_root(context, virt_to_phys(pgd));
1528 context_set_address_width(context, iommu->agaw);
1529 }
4ed0d3e6
FY
1530
1531 context_set_translation_type(context, translation);
c07e7d21
MM
1532 context_set_fault_enable(context);
1533 context_set_present(context);
5331fe6f 1534 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1535
4c25a2c1
DW
1536 /*
1537 * It's a non-present to present mapping. If hardware doesn't cache
1538 * non-present entry we only need to flush the write-buffer. If the
1539 * _does_ cache non-present entries, then it does so in the special
1540 * domain #0, which we have to flush:
1541 */
1542 if (cap_caching_mode(iommu->cap)) {
1543 iommu->flush.flush_context(iommu, 0,
1544 (((u16)bus) << 8) | devfn,
1545 DMA_CCMD_MASK_NOBIT,
1546 DMA_CCMD_DEVICE_INVL);
82653633 1547 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1548 } else {
ba395927 1549 iommu_flush_write_buffer(iommu);
4c25a2c1 1550 }
93a23a72 1551 iommu_enable_dev_iotlb(info);
ba395927 1552 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1553
1554 spin_lock_irqsave(&domain->iommu_lock, flags);
1555 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1556 domain->iommu_count++;
4c923d47
SS
1557 if (domain->iommu_count == 1)
1558 domain->nid = iommu->node;
58c610bd 1559 domain_update_iommu_cap(domain);
c7151a8d
WH
1560 }
1561 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1562 return 0;
1563}
1564
1565static int
4ed0d3e6
FY
1566domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1567 int translation)
ba395927
KA
1568{
1569 int ret;
1570 struct pci_dev *tmp, *parent;
1571
276dbf99 1572 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1573 pdev->bus->number, pdev->devfn,
1574 translation);
ba395927
KA
1575 if (ret)
1576 return ret;
1577
1578 /* dependent device mapping */
1579 tmp = pci_find_upstream_pcie_bridge(pdev);
1580 if (!tmp)
1581 return 0;
1582 /* Secondary interface's bus number and devfn 0 */
1583 parent = pdev->bus->self;
1584 while (parent != tmp) {
276dbf99
DW
1585 ret = domain_context_mapping_one(domain,
1586 pci_domain_nr(parent->bus),
1587 parent->bus->number,
4ed0d3e6 1588 parent->devfn, translation);
ba395927
KA
1589 if (ret)
1590 return ret;
1591 parent = parent->bus->self;
1592 }
45e829ea 1593 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1594 return domain_context_mapping_one(domain,
276dbf99 1595 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1596 tmp->subordinate->number, 0,
1597 translation);
ba395927
KA
1598 else /* this is a legacy PCI bridge */
1599 return domain_context_mapping_one(domain,
276dbf99
DW
1600 pci_domain_nr(tmp->bus),
1601 tmp->bus->number,
4ed0d3e6
FY
1602 tmp->devfn,
1603 translation);
ba395927
KA
1604}
1605
5331fe6f 1606static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1607{
1608 int ret;
1609 struct pci_dev *tmp, *parent;
5331fe6f
WH
1610 struct intel_iommu *iommu;
1611
276dbf99
DW
1612 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1613 pdev->devfn);
5331fe6f
WH
1614 if (!iommu)
1615 return -ENODEV;
ba395927 1616
276dbf99 1617 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1618 if (!ret)
1619 return ret;
1620 /* dependent device mapping */
1621 tmp = pci_find_upstream_pcie_bridge(pdev);
1622 if (!tmp)
1623 return ret;
1624 /* Secondary interface's bus number and devfn 0 */
1625 parent = pdev->bus->self;
1626 while (parent != tmp) {
8c11e798 1627 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1628 parent->devfn);
ba395927
KA
1629 if (!ret)
1630 return ret;
1631 parent = parent->bus->self;
1632 }
5f4d91a1 1633 if (pci_is_pcie(tmp))
276dbf99
DW
1634 return device_context_mapped(iommu, tmp->subordinate->number,
1635 0);
ba395927 1636 else
276dbf99
DW
1637 return device_context_mapped(iommu, tmp->bus->number,
1638 tmp->devfn);
ba395927
KA
1639}
1640
f532959b
FY
1641/* Returns a number of VTD pages, but aligned to MM page size */
1642static inline unsigned long aligned_nrpages(unsigned long host_addr,
1643 size_t size)
1644{
1645 host_addr &= ~PAGE_MASK;
1646 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1647}
1648
9051aa02
DW
1649static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1650 struct scatterlist *sg, unsigned long phys_pfn,
1651 unsigned long nr_pages, int prot)
e1605495
DW
1652{
1653 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1654 phys_addr_t uninitialized_var(pteval);
e1605495 1655 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1656 unsigned long sg_res;
e1605495
DW
1657
1658 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1659
1660 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1661 return -EINVAL;
1662
1663 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1664
9051aa02
DW
1665 if (sg)
1666 sg_res = 0;
1667 else {
1668 sg_res = nr_pages + 1;
1669 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1670 }
1671
e1605495 1672 while (nr_pages--) {
c85994e4
DW
1673 uint64_t tmp;
1674
e1605495 1675 if (!sg_res) {
f532959b 1676 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1677 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1678 sg->dma_length = sg->length;
1679 pteval = page_to_phys(sg_page(sg)) | prot;
1680 }
1681 if (!pte) {
1682 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1683 if (!pte)
1684 return -ENOMEM;
1685 }
1686 /* We don't need lock here, nobody else
1687 * touches the iova range
1688 */
7766a3fb 1689 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1690 if (tmp) {
1bf20f0d 1691 static int dumps = 5;
c85994e4
DW
1692 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1693 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1694 if (dumps) {
1695 dumps--;
1696 debug_dma_dump_mappings(NULL);
1697 }
1698 WARN_ON(1);
1699 }
e1605495 1700 pte++;
75e6bf96 1701 if (!nr_pages || first_pte_in_page(pte)) {
e1605495
DW
1702 domain_flush_cache(domain, first_pte,
1703 (void *)pte - (void *)first_pte);
1704 pte = NULL;
1705 }
1706 iov_pfn++;
1707 pteval += VTD_PAGE_SIZE;
1708 sg_res--;
1709 if (!sg_res)
1710 sg = sg_next(sg);
1711 }
1712 return 0;
1713}
1714
9051aa02
DW
1715static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1716 struct scatterlist *sg, unsigned long nr_pages,
1717 int prot)
ba395927 1718{
9051aa02
DW
1719 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1720}
6f6a00e4 1721
9051aa02
DW
1722static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1723 unsigned long phys_pfn, unsigned long nr_pages,
1724 int prot)
1725{
1726 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1727}
1728
c7151a8d 1729static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1730{
c7151a8d
WH
1731 if (!iommu)
1732 return;
8c11e798
WH
1733
1734 clear_context_table(iommu, bus, devfn);
1735 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1736 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1737 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1738}
1739
1740static void domain_remove_dev_info(struct dmar_domain *domain)
1741{
1742 struct device_domain_info *info;
1743 unsigned long flags;
c7151a8d 1744 struct intel_iommu *iommu;
ba395927
KA
1745
1746 spin_lock_irqsave(&device_domain_lock, flags);
1747 while (!list_empty(&domain->devices)) {
1748 info = list_entry(domain->devices.next,
1749 struct device_domain_info, link);
1750 list_del(&info->link);
1751 list_del(&info->global);
1752 if (info->dev)
358dd8ac 1753 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1754 spin_unlock_irqrestore(&device_domain_lock, flags);
1755
93a23a72 1756 iommu_disable_dev_iotlb(info);
276dbf99 1757 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1758 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1759 free_devinfo_mem(info);
1760
1761 spin_lock_irqsave(&device_domain_lock, flags);
1762 }
1763 spin_unlock_irqrestore(&device_domain_lock, flags);
1764}
1765
1766/*
1767 * find_domain
358dd8ac 1768 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1769 */
38717946 1770static struct dmar_domain *
ba395927
KA
1771find_domain(struct pci_dev *pdev)
1772{
1773 struct device_domain_info *info;
1774
1775 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1776 info = pdev->dev.archdata.iommu;
ba395927
KA
1777 if (info)
1778 return info->domain;
1779 return NULL;
1780}
1781
ba395927
KA
1782/* domain is initialized */
1783static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1784{
1785 struct dmar_domain *domain, *found = NULL;
1786 struct intel_iommu *iommu;
1787 struct dmar_drhd_unit *drhd;
1788 struct device_domain_info *info, *tmp;
1789 struct pci_dev *dev_tmp;
1790 unsigned long flags;
1791 int bus = 0, devfn = 0;
276dbf99 1792 int segment;
2c2e2c38 1793 int ret;
ba395927
KA
1794
1795 domain = find_domain(pdev);
1796 if (domain)
1797 return domain;
1798
276dbf99
DW
1799 segment = pci_domain_nr(pdev->bus);
1800
ba395927
KA
1801 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1802 if (dev_tmp) {
5f4d91a1 1803 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1804 bus = dev_tmp->subordinate->number;
1805 devfn = 0;
1806 } else {
1807 bus = dev_tmp->bus->number;
1808 devfn = dev_tmp->devfn;
1809 }
1810 spin_lock_irqsave(&device_domain_lock, flags);
1811 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1812 if (info->segment == segment &&
1813 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1814 found = info->domain;
1815 break;
1816 }
1817 }
1818 spin_unlock_irqrestore(&device_domain_lock, flags);
1819 /* pcie-pci bridge already has a domain, uses it */
1820 if (found) {
1821 domain = found;
1822 goto found_domain;
1823 }
1824 }
1825
2c2e2c38
FY
1826 domain = alloc_domain();
1827 if (!domain)
1828 goto error;
1829
ba395927
KA
1830 /* Allocate new domain for the device */
1831 drhd = dmar_find_matched_drhd_unit(pdev);
1832 if (!drhd) {
1833 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1834 pci_name(pdev));
1835 return NULL;
1836 }
1837 iommu = drhd->iommu;
1838
2c2e2c38
FY
1839 ret = iommu_attach_domain(domain, iommu);
1840 if (ret) {
1841 domain_exit(domain);
ba395927 1842 goto error;
2c2e2c38 1843 }
ba395927
KA
1844
1845 if (domain_init(domain, gaw)) {
1846 domain_exit(domain);
1847 goto error;
1848 }
1849
1850 /* register pcie-to-pci device */
1851 if (dev_tmp) {
1852 info = alloc_devinfo_mem();
1853 if (!info) {
1854 domain_exit(domain);
1855 goto error;
1856 }
276dbf99 1857 info->segment = segment;
ba395927
KA
1858 info->bus = bus;
1859 info->devfn = devfn;
1860 info->dev = NULL;
1861 info->domain = domain;
1862 /* This domain is shared by devices under p2p bridge */
3b5410e7 1863 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1864
1865 /* pcie-to-pci bridge already has a domain, uses it */
1866 found = NULL;
1867 spin_lock_irqsave(&device_domain_lock, flags);
1868 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
1869 if (tmp->segment == segment &&
1870 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
1871 found = tmp->domain;
1872 break;
1873 }
1874 }
1875 if (found) {
1876 free_devinfo_mem(info);
1877 domain_exit(domain);
1878 domain = found;
1879 } else {
1880 list_add(&info->link, &domain->devices);
1881 list_add(&info->global, &device_domain_list);
1882 }
1883 spin_unlock_irqrestore(&device_domain_lock, flags);
1884 }
1885
1886found_domain:
1887 info = alloc_devinfo_mem();
1888 if (!info)
1889 goto error;
276dbf99 1890 info->segment = segment;
ba395927
KA
1891 info->bus = pdev->bus->number;
1892 info->devfn = pdev->devfn;
1893 info->dev = pdev;
1894 info->domain = domain;
1895 spin_lock_irqsave(&device_domain_lock, flags);
1896 /* somebody is fast */
1897 found = find_domain(pdev);
1898 if (found != NULL) {
1899 spin_unlock_irqrestore(&device_domain_lock, flags);
1900 if (found != domain) {
1901 domain_exit(domain);
1902 domain = found;
1903 }
1904 free_devinfo_mem(info);
1905 return domain;
1906 }
1907 list_add(&info->link, &domain->devices);
1908 list_add(&info->global, &device_domain_list);
358dd8ac 1909 pdev->dev.archdata.iommu = info;
ba395927
KA
1910 spin_unlock_irqrestore(&device_domain_lock, flags);
1911 return domain;
1912error:
1913 /* recheck it here, maybe others set it */
1914 return find_domain(pdev);
1915}
1916
2c2e2c38 1917static int iommu_identity_mapping;
e0fc7e0b
DW
1918#define IDENTMAP_ALL 1
1919#define IDENTMAP_GFX 2
1920#define IDENTMAP_AZALIA 4
2c2e2c38 1921
b213203e
DW
1922static int iommu_domain_identity_map(struct dmar_domain *domain,
1923 unsigned long long start,
1924 unsigned long long end)
ba395927 1925{
c5395d5c
DW
1926 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1927 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1928
1929 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1930 dma_to_mm_pfn(last_vpfn))) {
ba395927 1931 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 1932 return -ENOMEM;
ba395927
KA
1933 }
1934
c5395d5c
DW
1935 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1936 start, end, domain->id);
ba395927
KA
1937 /*
1938 * RMRR range might have overlap with physical memory range,
1939 * clear it first
1940 */
c5395d5c 1941 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 1942
c5395d5c
DW
1943 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1944 last_vpfn - first_vpfn + 1,
61df7443 1945 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
1946}
1947
1948static int iommu_prepare_identity_map(struct pci_dev *pdev,
1949 unsigned long long start,
1950 unsigned long long end)
1951{
1952 struct dmar_domain *domain;
1953 int ret;
1954
c7ab48d2 1955 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
1956 if (!domain)
1957 return -ENOMEM;
1958
19943b0e
DW
1959 /* For _hardware_ passthrough, don't bother. But for software
1960 passthrough, we do it anyway -- it may indicate a memory
1961 range which is reserved in E820, so which didn't get set
1962 up to start with in si_domain */
1963 if (domain == si_domain && hw_pass_through) {
1964 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1965 pci_name(pdev), start, end);
1966 return 0;
1967 }
1968
1969 printk(KERN_INFO
1970 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1971 pci_name(pdev), start, end);
2ff729f5 1972
5595b528
DW
1973 if (end < start) {
1974 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
1975 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1976 dmi_get_system_info(DMI_BIOS_VENDOR),
1977 dmi_get_system_info(DMI_BIOS_VERSION),
1978 dmi_get_system_info(DMI_PRODUCT_VERSION));
1979 ret = -EIO;
1980 goto error;
1981 }
1982
2ff729f5
DW
1983 if (end >> agaw_to_width(domain->agaw)) {
1984 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
1985 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
1986 agaw_to_width(domain->agaw),
1987 dmi_get_system_info(DMI_BIOS_VENDOR),
1988 dmi_get_system_info(DMI_BIOS_VERSION),
1989 dmi_get_system_info(DMI_PRODUCT_VERSION));
1990 ret = -EIO;
1991 goto error;
1992 }
19943b0e 1993
b213203e 1994 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
1995 if (ret)
1996 goto error;
1997
1998 /* context entry init */
4ed0d3e6 1999 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2000 if (ret)
2001 goto error;
2002
2003 return 0;
2004
2005 error:
ba395927
KA
2006 domain_exit(domain);
2007 return ret;
ba395927
KA
2008}
2009
2010static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2011 struct pci_dev *pdev)
2012{
358dd8ac 2013 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2014 return 0;
2015 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2016 rmrr->end_address + 1);
2017}
2018
49a0429e
KA
2019#ifdef CONFIG_DMAR_FLOPPY_WA
2020static inline void iommu_prepare_isa(void)
2021{
2022 struct pci_dev *pdev;
2023 int ret;
2024
2025 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2026 if (!pdev)
2027 return;
2028
c7ab48d2 2029 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
49a0429e
KA
2030 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2031
2032 if (ret)
c7ab48d2
DW
2033 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2034 "floppy might not work\n");
49a0429e
KA
2035
2036}
2037#else
2038static inline void iommu_prepare_isa(void)
2039{
2040 return;
2041}
2042#endif /* !CONFIG_DMAR_FLPY_WA */
2043
2c2e2c38 2044static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2045
2046static int __init si_domain_work_fn(unsigned long start_pfn,
2047 unsigned long end_pfn, void *datax)
2048{
2049 int *ret = datax;
2050
2051 *ret = iommu_domain_identity_map(si_domain,
2052 (uint64_t)start_pfn << PAGE_SHIFT,
2053 (uint64_t)end_pfn << PAGE_SHIFT);
2054 return *ret;
2055
2056}
2057
071e1374 2058static int __init si_domain_init(int hw)
2c2e2c38
FY
2059{
2060 struct dmar_drhd_unit *drhd;
2061 struct intel_iommu *iommu;
c7ab48d2 2062 int nid, ret = 0;
2c2e2c38
FY
2063
2064 si_domain = alloc_domain();
2065 if (!si_domain)
2066 return -EFAULT;
2067
c7ab48d2 2068 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2069
2070 for_each_active_iommu(iommu, drhd) {
2071 ret = iommu_attach_domain(si_domain, iommu);
2072 if (ret) {
2073 domain_exit(si_domain);
2074 return -EFAULT;
2075 }
2076 }
2077
2078 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2079 domain_exit(si_domain);
2080 return -EFAULT;
2081 }
2082
2083 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2084
19943b0e
DW
2085 if (hw)
2086 return 0;
2087
c7ab48d2
DW
2088 for_each_online_node(nid) {
2089 work_with_active_regions(nid, si_domain_work_fn, &ret);
2090 if (ret)
2091 return ret;
2092 }
2093
2c2e2c38
FY
2094 return 0;
2095}
2096
2097static void domain_remove_one_dev_info(struct dmar_domain *domain,
2098 struct pci_dev *pdev);
2099static int identity_mapping(struct pci_dev *pdev)
2100{
2101 struct device_domain_info *info;
2102
2103 if (likely(!iommu_identity_mapping))
2104 return 0;
2105
2106
2107 list_for_each_entry(info, &si_domain->devices, link)
2108 if (info->dev == pdev)
2109 return 1;
2110 return 0;
2111}
2112
2113static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2114 struct pci_dev *pdev,
2115 int translation)
2c2e2c38
FY
2116{
2117 struct device_domain_info *info;
2118 unsigned long flags;
5fe60f4e 2119 int ret;
2c2e2c38
FY
2120
2121 info = alloc_devinfo_mem();
2122 if (!info)
2123 return -ENOMEM;
2124
5fe60f4e
DW
2125 ret = domain_context_mapping(domain, pdev, translation);
2126 if (ret) {
2127 free_devinfo_mem(info);
2128 return ret;
2129 }
2130
2c2e2c38
FY
2131 info->segment = pci_domain_nr(pdev->bus);
2132 info->bus = pdev->bus->number;
2133 info->devfn = pdev->devfn;
2134 info->dev = pdev;
2135 info->domain = domain;
2136
2137 spin_lock_irqsave(&device_domain_lock, flags);
2138 list_add(&info->link, &domain->devices);
2139 list_add(&info->global, &device_domain_list);
2140 pdev->dev.archdata.iommu = info;
2141 spin_unlock_irqrestore(&device_domain_lock, flags);
2142
2143 return 0;
2144}
2145
6941af28
DW
2146static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2147{
e0fc7e0b
DW
2148 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2149 return 1;
2150
2151 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2152 return 1;
2153
2154 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2155 return 0;
6941af28 2156
3dfc813d
DW
2157 /*
2158 * We want to start off with all devices in the 1:1 domain, and
2159 * take them out later if we find they can't access all of memory.
2160 *
2161 * However, we can't do this for PCI devices behind bridges,
2162 * because all PCI devices behind the same bridge will end up
2163 * with the same source-id on their transactions.
2164 *
2165 * Practically speaking, we can't change things around for these
2166 * devices at run-time, because we can't be sure there'll be no
2167 * DMA transactions in flight for any of their siblings.
2168 *
2169 * So PCI devices (unless they're on the root bus) as well as
2170 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2171 * the 1:1 domain, just in _case_ one of their siblings turns out
2172 * not to be able to map all of memory.
2173 */
5f4d91a1 2174 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2175 if (!pci_is_root_bus(pdev->bus))
2176 return 0;
2177 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2178 return 0;
2179 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2180 return 0;
2181
2182 /*
2183 * At boot time, we don't yet know if devices will be 64-bit capable.
2184 * Assume that they will -- if they turn out not to be, then we can
2185 * take them out of the 1:1 domain later.
2186 */
6941af28
DW
2187 if (!startup)
2188 return pdev->dma_mask > DMA_BIT_MASK(32);
2189
2190 return 1;
2191}
2192
071e1374 2193static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2194{
2c2e2c38
FY
2195 struct pci_dev *pdev = NULL;
2196 int ret;
2197
19943b0e 2198 ret = si_domain_init(hw);
2c2e2c38
FY
2199 if (ret)
2200 return -EFAULT;
2201
2c2e2c38 2202 for_each_pci_dev(pdev) {
6941af28 2203 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2204 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2205 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2206
5fe60f4e 2207 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2208 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2209 CONTEXT_TT_MULTI_LEVEL);
2210 if (ret)
2211 return ret;
62edf5dc 2212 }
2c2e2c38
FY
2213 }
2214
2215 return 0;
2216}
2217
2218int __init init_dmars(void)
ba395927
KA
2219{
2220 struct dmar_drhd_unit *drhd;
2221 struct dmar_rmrr_unit *rmrr;
2222 struct pci_dev *pdev;
2223 struct intel_iommu *iommu;
9d783ba0 2224 int i, ret;
2c2e2c38 2225
ba395927
KA
2226 /*
2227 * for each drhd
2228 * allocate root
2229 * initialize and program root entry to not present
2230 * endfor
2231 */
2232 for_each_drhd_unit(drhd) {
5e0d2a6f 2233 g_num_of_iommus++;
2234 /*
2235 * lock not needed as this is only incremented in the single
2236 * threaded kernel __init code path all other access are read
2237 * only
2238 */
2239 }
2240
d9630fe9
WH
2241 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2242 GFP_KERNEL);
2243 if (!g_iommus) {
2244 printk(KERN_ERR "Allocating global iommu array failed\n");
2245 ret = -ENOMEM;
2246 goto error;
2247 }
2248
80b20dd8 2249 deferred_flush = kzalloc(g_num_of_iommus *
2250 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2251 if (!deferred_flush) {
5e0d2a6f 2252 ret = -ENOMEM;
2253 goto error;
2254 }
2255
5e0d2a6f 2256 for_each_drhd_unit(drhd) {
2257 if (drhd->ignored)
2258 continue;
1886e8a9
SS
2259
2260 iommu = drhd->iommu;
d9630fe9 2261 g_iommus[iommu->seq_id] = iommu;
ba395927 2262
e61d98d8
SS
2263 ret = iommu_init_domains(iommu);
2264 if (ret)
2265 goto error;
2266
ba395927
KA
2267 /*
2268 * TBD:
2269 * we could share the same root & context tables
2270 * amoung all IOMMU's. Need to Split it later.
2271 */
2272 ret = iommu_alloc_root_entry(iommu);
2273 if (ret) {
2274 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2275 goto error;
2276 }
4ed0d3e6 2277 if (!ecap_pass_through(iommu->ecap))
19943b0e 2278 hw_pass_through = 0;
ba395927
KA
2279 }
2280
1531a6a6
SS
2281 /*
2282 * Start from the sane iommu hardware state.
2283 */
a77b67d4
YS
2284 for_each_drhd_unit(drhd) {
2285 if (drhd->ignored)
2286 continue;
2287
2288 iommu = drhd->iommu;
1531a6a6
SS
2289
2290 /*
2291 * If the queued invalidation is already initialized by us
2292 * (for example, while enabling interrupt-remapping) then
2293 * we got the things already rolling from a sane state.
2294 */
2295 if (iommu->qi)
2296 continue;
2297
2298 /*
2299 * Clear any previous faults.
2300 */
2301 dmar_fault(-1, iommu);
2302 /*
2303 * Disable queued invalidation if supported and already enabled
2304 * before OS handover.
2305 */
2306 dmar_disable_qi(iommu);
2307 }
2308
2309 for_each_drhd_unit(drhd) {
2310 if (drhd->ignored)
2311 continue;
2312
2313 iommu = drhd->iommu;
2314
a77b67d4
YS
2315 if (dmar_enable_qi(iommu)) {
2316 /*
2317 * Queued Invalidate not enabled, use Register Based
2318 * Invalidate
2319 */
2320 iommu->flush.flush_context = __iommu_flush_context;
2321 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2322 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
2323 "invalidation\n",
2324 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2325 } else {
2326 iommu->flush.flush_context = qi_flush_context;
2327 iommu->flush.flush_iotlb = qi_flush_iotlb;
2328 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2329 "invalidation\n",
2330 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2331 }
2332 }
2333
19943b0e 2334 if (iommu_pass_through)
e0fc7e0b
DW
2335 iommu_identity_mapping |= IDENTMAP_ALL;
2336
19943b0e 2337#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2338 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2339#endif
e0fc7e0b
DW
2340
2341 check_tylersburg_isoch();
2342
ba395927 2343 /*
19943b0e
DW
2344 * If pass through is not set or not enabled, setup context entries for
2345 * identity mappings for rmrr, gfx, and isa and may fall back to static
2346 * identity mapping if iommu_identity_mapping is set.
ba395927 2347 */
19943b0e
DW
2348 if (iommu_identity_mapping) {
2349 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2350 if (ret) {
19943b0e
DW
2351 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2352 goto error;
ba395927
KA
2353 }
2354 }
ba395927 2355 /*
19943b0e
DW
2356 * For each rmrr
2357 * for each dev attached to rmrr
2358 * do
2359 * locate drhd for dev, alloc domain for dev
2360 * allocate free domain
2361 * allocate page table entries for rmrr
2362 * if context not allocated for bus
2363 * allocate and init context
2364 * set present in root table for this bus
2365 * init context with domain, translation etc
2366 * endfor
2367 * endfor
ba395927 2368 */
19943b0e
DW
2369 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2370 for_each_rmrr_units(rmrr) {
2371 for (i = 0; i < rmrr->devices_cnt; i++) {
2372 pdev = rmrr->devices[i];
2373 /*
2374 * some BIOS lists non-exist devices in DMAR
2375 * table.
2376 */
2377 if (!pdev)
2378 continue;
2379 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2380 if (ret)
2381 printk(KERN_ERR
2382 "IOMMU: mapping reserved region failed\n");
ba395927 2383 }
4ed0d3e6 2384 }
49a0429e 2385
19943b0e
DW
2386 iommu_prepare_isa();
2387
ba395927
KA
2388 /*
2389 * for each drhd
2390 * enable fault log
2391 * global invalidate context cache
2392 * global invalidate iotlb
2393 * enable translation
2394 */
2395 for_each_drhd_unit(drhd) {
2396 if (drhd->ignored)
2397 continue;
2398 iommu = drhd->iommu;
ba395927
KA
2399
2400 iommu_flush_write_buffer(iommu);
2401
3460a6d9
KA
2402 ret = dmar_set_interrupt(iommu);
2403 if (ret)
2404 goto error;
2405
ba395927
KA
2406 iommu_set_root_entry(iommu);
2407
4c25a2c1 2408 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2409 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2410
ba395927
KA
2411 ret = iommu_enable_translation(iommu);
2412 if (ret)
2413 goto error;
b94996c9
DW
2414
2415 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2416 }
2417
2418 return 0;
2419error:
2420 for_each_drhd_unit(drhd) {
2421 if (drhd->ignored)
2422 continue;
2423 iommu = drhd->iommu;
2424 free_iommu(iommu);
2425 }
d9630fe9 2426 kfree(g_iommus);
ba395927
KA
2427 return ret;
2428}
2429
5a5e02a6 2430/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2431static struct iova *intel_alloc_iova(struct device *dev,
2432 struct dmar_domain *domain,
2433 unsigned long nrpages, uint64_t dma_mask)
ba395927 2434{
ba395927 2435 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2436 struct iova *iova = NULL;
ba395927 2437
875764de
DW
2438 /* Restrict dma_mask to the width that the iommu can handle */
2439 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2440
2441 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2442 /*
2443 * First try to allocate an io virtual address in
284901a9 2444 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2445 * from higher range
ba395927 2446 */
875764de
DW
2447 iova = alloc_iova(&domain->iovad, nrpages,
2448 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2449 if (iova)
2450 return iova;
2451 }
2452 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2453 if (unlikely(!iova)) {
2454 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2455 nrpages, pci_name(pdev));
f76aec76
KA
2456 return NULL;
2457 }
2458
2459 return iova;
2460}
2461
147202aa 2462static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2463{
2464 struct dmar_domain *domain;
2465 int ret;
2466
2467 domain = get_domain_for_dev(pdev,
2468 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2469 if (!domain) {
2470 printk(KERN_ERR
2471 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2472 return NULL;
ba395927
KA
2473 }
2474
2475 /* make sure context mapping is ok */
5331fe6f 2476 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2477 ret = domain_context_mapping(domain, pdev,
2478 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2479 if (ret) {
2480 printk(KERN_ERR
2481 "Domain context map for %s failed",
2482 pci_name(pdev));
4fe05bbc 2483 return NULL;
f76aec76 2484 }
ba395927
KA
2485 }
2486
f76aec76
KA
2487 return domain;
2488}
2489
147202aa
DW
2490static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2491{
2492 struct device_domain_info *info;
2493
2494 /* No lock here, assumes no domain exit in normal case */
2495 info = dev->dev.archdata.iommu;
2496 if (likely(info))
2497 return info->domain;
2498
2499 return __get_valid_domain_for_dev(dev);
2500}
2501
2c2e2c38
FY
2502static int iommu_dummy(struct pci_dev *pdev)
2503{
2504 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2505}
2506
2507/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2508static int iommu_no_mapping(struct device *dev)
2c2e2c38 2509{
73676832 2510 struct pci_dev *pdev;
2c2e2c38
FY
2511 int found;
2512
73676832
DW
2513 if (unlikely(dev->bus != &pci_bus_type))
2514 return 1;
2515
2516 pdev = to_pci_dev(dev);
1e4c64c4
DW
2517 if (iommu_dummy(pdev))
2518 return 1;
2519
2c2e2c38 2520 if (!iommu_identity_mapping)
1e4c64c4 2521 return 0;
2c2e2c38
FY
2522
2523 found = identity_mapping(pdev);
2524 if (found) {
6941af28 2525 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2526 return 1;
2527 else {
2528 /*
2529 * 32 bit DMA is removed from si_domain and fall back
2530 * to non-identity mapping.
2531 */
2532 domain_remove_one_dev_info(si_domain, pdev);
2533 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2534 pci_name(pdev));
2535 return 0;
2536 }
2537 } else {
2538 /*
2539 * In case of a detached 64 bit DMA device from vm, the device
2540 * is put into si_domain for identity mapping.
2541 */
6941af28 2542 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2543 int ret;
5fe60f4e
DW
2544 ret = domain_add_dev_info(si_domain, pdev,
2545 hw_pass_through ?
2546 CONTEXT_TT_PASS_THROUGH :
2547 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2548 if (!ret) {
2549 printk(KERN_INFO "64bit %s uses identity mapping\n",
2550 pci_name(pdev));
2551 return 1;
2552 }
2553 }
2554 }
2555
1e4c64c4 2556 return 0;
2c2e2c38
FY
2557}
2558
bb9e6d65
FT
2559static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2560 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2561{
2562 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2563 struct dmar_domain *domain;
5b6985ce 2564 phys_addr_t start_paddr;
f76aec76
KA
2565 struct iova *iova;
2566 int prot = 0;
6865f0d1 2567 int ret;
8c11e798 2568 struct intel_iommu *iommu;
33041ec0 2569 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2570
2571 BUG_ON(dir == DMA_NONE);
2c2e2c38 2572
73676832 2573 if (iommu_no_mapping(hwdev))
6865f0d1 2574 return paddr;
f76aec76
KA
2575
2576 domain = get_valid_domain_for_dev(pdev);
2577 if (!domain)
2578 return 0;
2579
8c11e798 2580 iommu = domain_get_iommu(domain);
88cb6a74 2581 size = aligned_nrpages(paddr, size);
f76aec76 2582
5a5e02a6
DW
2583 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2584 pdev->dma_mask);
f76aec76
KA
2585 if (!iova)
2586 goto error;
2587
ba395927
KA
2588 /*
2589 * Check if DMAR supports zero-length reads on write only
2590 * mappings..
2591 */
2592 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2593 !cap_zlr(iommu->cap))
ba395927
KA
2594 prot |= DMA_PTE_READ;
2595 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2596 prot |= DMA_PTE_WRITE;
2597 /*
6865f0d1 2598 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2599 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2600 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2601 * is not a big problem
2602 */
0ab36de2 2603 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2604 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2605 if (ret)
2606 goto error;
2607
1f0ef2aa
DW
2608 /* it's a non-present to present mapping. Only flush if caching mode */
2609 if (cap_caching_mode(iommu->cap))
82653633 2610 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2611 else
8c11e798 2612 iommu_flush_write_buffer(iommu);
f76aec76 2613
03d6a246
DW
2614 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2615 start_paddr += paddr & ~PAGE_MASK;
2616 return start_paddr;
ba395927 2617
ba395927 2618error:
f76aec76
KA
2619 if (iova)
2620 __free_iova(&domain->iovad, iova);
4cf2e75d 2621 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2622 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2623 return 0;
2624}
2625
ffbbef5c
FT
2626static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2627 unsigned long offset, size_t size,
2628 enum dma_data_direction dir,
2629 struct dma_attrs *attrs)
bb9e6d65 2630{
ffbbef5c
FT
2631 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2632 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2633}
2634
5e0d2a6f 2635static void flush_unmaps(void)
2636{
80b20dd8 2637 int i, j;
5e0d2a6f 2638
5e0d2a6f 2639 timer_on = 0;
2640
2641 /* just flush them all */
2642 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2643 struct intel_iommu *iommu = g_iommus[i];
2644 if (!iommu)
2645 continue;
c42d9f32 2646
9dd2fe89
YZ
2647 if (!deferred_flush[i].next)
2648 continue;
2649
2650 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2651 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2652 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2653 unsigned long mask;
2654 struct iova *iova = deferred_flush[i].iova[j];
2655
64de5af0 2656 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
93a23a72 2657 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
64de5af0 2658 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
93a23a72 2659 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2660 }
9dd2fe89 2661 deferred_flush[i].next = 0;
5e0d2a6f 2662 }
2663
5e0d2a6f 2664 list_size = 0;
5e0d2a6f 2665}
2666
2667static void flush_unmaps_timeout(unsigned long data)
2668{
80b20dd8 2669 unsigned long flags;
2670
2671 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2672 flush_unmaps();
80b20dd8 2673 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2674}
2675
2676static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2677{
2678 unsigned long flags;
80b20dd8 2679 int next, iommu_id;
8c11e798 2680 struct intel_iommu *iommu;
5e0d2a6f 2681
2682 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2683 if (list_size == HIGH_WATER_MARK)
2684 flush_unmaps();
2685
8c11e798
WH
2686 iommu = domain_get_iommu(dom);
2687 iommu_id = iommu->seq_id;
c42d9f32 2688
80b20dd8 2689 next = deferred_flush[iommu_id].next;
2690 deferred_flush[iommu_id].domain[next] = dom;
2691 deferred_flush[iommu_id].iova[next] = iova;
2692 deferred_flush[iommu_id].next++;
5e0d2a6f 2693
2694 if (!timer_on) {
2695 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2696 timer_on = 1;
2697 }
2698 list_size++;
2699 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2700}
2701
ffbbef5c
FT
2702static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2703 size_t size, enum dma_data_direction dir,
2704 struct dma_attrs *attrs)
ba395927 2705{
ba395927 2706 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2707 struct dmar_domain *domain;
d794dc9b 2708 unsigned long start_pfn, last_pfn;
ba395927 2709 struct iova *iova;
8c11e798 2710 struct intel_iommu *iommu;
ba395927 2711
73676832 2712 if (iommu_no_mapping(dev))
f76aec76 2713 return;
2c2e2c38 2714
ba395927
KA
2715 domain = find_domain(pdev);
2716 BUG_ON(!domain);
2717
8c11e798
WH
2718 iommu = domain_get_iommu(domain);
2719
ba395927 2720 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2721 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2722 (unsigned long long)dev_addr))
ba395927 2723 return;
ba395927 2724
d794dc9b
DW
2725 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2726 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2727
d794dc9b
DW
2728 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2729 pci_name(pdev), start_pfn, last_pfn);
ba395927 2730
f76aec76 2731 /* clear the whole page */
d794dc9b
DW
2732 dma_pte_clear_range(domain, start_pfn, last_pfn);
2733
f76aec76 2734 /* free page tables */
d794dc9b
DW
2735 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2736
5e0d2a6f 2737 if (intel_iommu_strict) {
03d6a246 2738 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2739 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2740 /* free iova */
2741 __free_iova(&domain->iovad, iova);
2742 } else {
2743 add_unmap(domain, iova);
2744 /*
2745 * queue up the release of the unmap to save the 1/6th of the
2746 * cpu used up by the iotlb flush operation...
2747 */
5e0d2a6f 2748 }
ba395927
KA
2749}
2750
d7ab5c46
FT
2751static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2752 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2753{
2754 void *vaddr;
2755 int order;
2756
5b6985ce 2757 size = PAGE_ALIGN(size);
ba395927 2758 order = get_order(size);
e8bb910d
AW
2759
2760 if (!iommu_no_mapping(hwdev))
2761 flags &= ~(GFP_DMA | GFP_DMA32);
2762 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2763 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2764 flags |= GFP_DMA;
2765 else
2766 flags |= GFP_DMA32;
2767 }
ba395927
KA
2768
2769 vaddr = (void *)__get_free_pages(flags, order);
2770 if (!vaddr)
2771 return NULL;
2772 memset(vaddr, 0, size);
2773
bb9e6d65
FT
2774 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2775 DMA_BIDIRECTIONAL,
2776 hwdev->coherent_dma_mask);
ba395927
KA
2777 if (*dma_handle)
2778 return vaddr;
2779 free_pages((unsigned long)vaddr, order);
2780 return NULL;
2781}
2782
d7ab5c46
FT
2783static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2784 dma_addr_t dma_handle)
ba395927
KA
2785{
2786 int order;
2787
5b6985ce 2788 size = PAGE_ALIGN(size);
ba395927
KA
2789 order = get_order(size);
2790
0db9b7ae 2791 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2792 free_pages((unsigned long)vaddr, order);
2793}
2794
d7ab5c46
FT
2795static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2796 int nelems, enum dma_data_direction dir,
2797 struct dma_attrs *attrs)
ba395927 2798{
ba395927
KA
2799 struct pci_dev *pdev = to_pci_dev(hwdev);
2800 struct dmar_domain *domain;
d794dc9b 2801 unsigned long start_pfn, last_pfn;
f76aec76 2802 struct iova *iova;
8c11e798 2803 struct intel_iommu *iommu;
ba395927 2804
73676832 2805 if (iommu_no_mapping(hwdev))
ba395927
KA
2806 return;
2807
2808 domain = find_domain(pdev);
8c11e798
WH
2809 BUG_ON(!domain);
2810
2811 iommu = domain_get_iommu(domain);
ba395927 2812
c03ab37c 2813 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2814 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2815 (unsigned long long)sglist[0].dma_address))
f76aec76 2816 return;
f76aec76 2817
d794dc9b
DW
2818 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2819 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2820
2821 /* clear the whole page */
d794dc9b
DW
2822 dma_pte_clear_range(domain, start_pfn, last_pfn);
2823
f76aec76 2824 /* free page tables */
d794dc9b 2825 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2826
acea0018
DW
2827 if (intel_iommu_strict) {
2828 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2829 last_pfn - start_pfn + 1, 0);
acea0018
DW
2830 /* free iova */
2831 __free_iova(&domain->iovad, iova);
2832 } else {
2833 add_unmap(domain, iova);
2834 /*
2835 * queue up the release of the unmap to save the 1/6th of the
2836 * cpu used up by the iotlb flush operation...
2837 */
2838 }
ba395927
KA
2839}
2840
ba395927 2841static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2842 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2843{
2844 int i;
c03ab37c 2845 struct scatterlist *sg;
ba395927 2846
c03ab37c 2847 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2848 BUG_ON(!sg_page(sg));
4cf2e75d 2849 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 2850 sg->dma_length = sg->length;
ba395927
KA
2851 }
2852 return nelems;
2853}
2854
d7ab5c46
FT
2855static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2856 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 2857{
ba395927 2858 int i;
ba395927
KA
2859 struct pci_dev *pdev = to_pci_dev(hwdev);
2860 struct dmar_domain *domain;
f76aec76
KA
2861 size_t size = 0;
2862 int prot = 0;
f76aec76
KA
2863 struct iova *iova = NULL;
2864 int ret;
c03ab37c 2865 struct scatterlist *sg;
b536d24d 2866 unsigned long start_vpfn;
8c11e798 2867 struct intel_iommu *iommu;
ba395927
KA
2868
2869 BUG_ON(dir == DMA_NONE);
73676832 2870 if (iommu_no_mapping(hwdev))
c03ab37c 2871 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2872
f76aec76
KA
2873 domain = get_valid_domain_for_dev(pdev);
2874 if (!domain)
2875 return 0;
2876
8c11e798
WH
2877 iommu = domain_get_iommu(domain);
2878
b536d24d 2879 for_each_sg(sglist, sg, nelems, i)
88cb6a74 2880 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 2881
5a5e02a6
DW
2882 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2883 pdev->dma_mask);
f76aec76 2884 if (!iova) {
c03ab37c 2885 sglist->dma_length = 0;
f76aec76
KA
2886 return 0;
2887 }
2888
2889 /*
2890 * Check if DMAR supports zero-length reads on write only
2891 * mappings..
2892 */
2893 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2894 !cap_zlr(iommu->cap))
f76aec76
KA
2895 prot |= DMA_PTE_READ;
2896 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2897 prot |= DMA_PTE_WRITE;
2898
b536d24d 2899 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 2900
f532959b 2901 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
2902 if (unlikely(ret)) {
2903 /* clear the page */
2904 dma_pte_clear_range(domain, start_vpfn,
2905 start_vpfn + size - 1);
2906 /* free page tables */
2907 dma_pte_free_pagetable(domain, start_vpfn,
2908 start_vpfn + size - 1);
2909 /* free iova */
2910 __free_iova(&domain->iovad, iova);
2911 return 0;
ba395927
KA
2912 }
2913
1f0ef2aa
DW
2914 /* it's a non-present to present mapping. Only flush if caching mode */
2915 if (cap_caching_mode(iommu->cap))
82653633 2916 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 2917 else
8c11e798 2918 iommu_flush_write_buffer(iommu);
1f0ef2aa 2919
ba395927
KA
2920 return nelems;
2921}
2922
dfb805e8
FT
2923static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
2924{
2925 return !dma_addr;
2926}
2927
160c1d8e 2928struct dma_map_ops intel_dma_ops = {
ba395927
KA
2929 .alloc_coherent = intel_alloc_coherent,
2930 .free_coherent = intel_free_coherent,
ba395927
KA
2931 .map_sg = intel_map_sg,
2932 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
2933 .map_page = intel_map_page,
2934 .unmap_page = intel_unmap_page,
dfb805e8 2935 .mapping_error = intel_mapping_error,
ba395927
KA
2936};
2937
2938static inline int iommu_domain_cache_init(void)
2939{
2940 int ret = 0;
2941
2942 iommu_domain_cache = kmem_cache_create("iommu_domain",
2943 sizeof(struct dmar_domain),
2944 0,
2945 SLAB_HWCACHE_ALIGN,
2946
2947 NULL);
2948 if (!iommu_domain_cache) {
2949 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2950 ret = -ENOMEM;
2951 }
2952
2953 return ret;
2954}
2955
2956static inline int iommu_devinfo_cache_init(void)
2957{
2958 int ret = 0;
2959
2960 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2961 sizeof(struct device_domain_info),
2962 0,
2963 SLAB_HWCACHE_ALIGN,
ba395927
KA
2964 NULL);
2965 if (!iommu_devinfo_cache) {
2966 printk(KERN_ERR "Couldn't create devinfo cache\n");
2967 ret = -ENOMEM;
2968 }
2969
2970 return ret;
2971}
2972
2973static inline int iommu_iova_cache_init(void)
2974{
2975 int ret = 0;
2976
2977 iommu_iova_cache = kmem_cache_create("iommu_iova",
2978 sizeof(struct iova),
2979 0,
2980 SLAB_HWCACHE_ALIGN,
ba395927
KA
2981 NULL);
2982 if (!iommu_iova_cache) {
2983 printk(KERN_ERR "Couldn't create iova cache\n");
2984 ret = -ENOMEM;
2985 }
2986
2987 return ret;
2988}
2989
2990static int __init iommu_init_mempool(void)
2991{
2992 int ret;
2993 ret = iommu_iova_cache_init();
2994 if (ret)
2995 return ret;
2996
2997 ret = iommu_domain_cache_init();
2998 if (ret)
2999 goto domain_error;
3000
3001 ret = iommu_devinfo_cache_init();
3002 if (!ret)
3003 return ret;
3004
3005 kmem_cache_destroy(iommu_domain_cache);
3006domain_error:
3007 kmem_cache_destroy(iommu_iova_cache);
3008
3009 return -ENOMEM;
3010}
3011
3012static void __init iommu_exit_mempool(void)
3013{
3014 kmem_cache_destroy(iommu_devinfo_cache);
3015 kmem_cache_destroy(iommu_domain_cache);
3016 kmem_cache_destroy(iommu_iova_cache);
3017
3018}
3019
ba395927
KA
3020static void __init init_no_remapping_devices(void)
3021{
3022 struct dmar_drhd_unit *drhd;
3023
3024 for_each_drhd_unit(drhd) {
3025 if (!drhd->include_all) {
3026 int i;
3027 for (i = 0; i < drhd->devices_cnt; i++)
3028 if (drhd->devices[i] != NULL)
3029 break;
3030 /* ignore DMAR unit if no pci devices exist */
3031 if (i == drhd->devices_cnt)
3032 drhd->ignored = 1;
3033 }
3034 }
3035
3036 if (dmar_map_gfx)
3037 return;
3038
3039 for_each_drhd_unit(drhd) {
3040 int i;
3041 if (drhd->ignored || drhd->include_all)
3042 continue;
3043
3044 for (i = 0; i < drhd->devices_cnt; i++)
3045 if (drhd->devices[i] &&
3046 !IS_GFX_DEVICE(drhd->devices[i]))
3047 break;
3048
3049 if (i < drhd->devices_cnt)
3050 continue;
3051
3052 /* bypass IOMMU if it is just for gfx devices */
3053 drhd->ignored = 1;
3054 for (i = 0; i < drhd->devices_cnt; i++) {
3055 if (!drhd->devices[i])
3056 continue;
358dd8ac 3057 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3058 }
3059 }
3060}
3061
f59c7b69
FY
3062#ifdef CONFIG_SUSPEND
3063static int init_iommu_hw(void)
3064{
3065 struct dmar_drhd_unit *drhd;
3066 struct intel_iommu *iommu = NULL;
3067
3068 for_each_active_iommu(iommu, drhd)
3069 if (iommu->qi)
3070 dmar_reenable_qi(iommu);
3071
3072 for_each_active_iommu(iommu, drhd) {
3073 iommu_flush_write_buffer(iommu);
3074
3075 iommu_set_root_entry(iommu);
3076
3077 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3078 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3079 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3080 DMA_TLB_GLOBAL_FLUSH);
f59c7b69 3081 iommu_enable_translation(iommu);
b94996c9 3082 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3083 }
3084
3085 return 0;
3086}
3087
3088static void iommu_flush_all(void)
3089{
3090 struct dmar_drhd_unit *drhd;
3091 struct intel_iommu *iommu;
3092
3093 for_each_active_iommu(iommu, drhd) {
3094 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3095 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3096 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3097 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3098 }
3099}
3100
3101static int iommu_suspend(struct sys_device *dev, pm_message_t state)
3102{
3103 struct dmar_drhd_unit *drhd;
3104 struct intel_iommu *iommu = NULL;
3105 unsigned long flag;
3106
3107 for_each_active_iommu(iommu, drhd) {
3108 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3109 GFP_ATOMIC);
3110 if (!iommu->iommu_state)
3111 goto nomem;
3112 }
3113
3114 iommu_flush_all();
3115
3116 for_each_active_iommu(iommu, drhd) {
3117 iommu_disable_translation(iommu);
3118
3119 spin_lock_irqsave(&iommu->register_lock, flag);
3120
3121 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3122 readl(iommu->reg + DMAR_FECTL_REG);
3123 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3124 readl(iommu->reg + DMAR_FEDATA_REG);
3125 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3126 readl(iommu->reg + DMAR_FEADDR_REG);
3127 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3128 readl(iommu->reg + DMAR_FEUADDR_REG);
3129
3130 spin_unlock_irqrestore(&iommu->register_lock, flag);
3131 }
3132 return 0;
3133
3134nomem:
3135 for_each_active_iommu(iommu, drhd)
3136 kfree(iommu->iommu_state);
3137
3138 return -ENOMEM;
3139}
3140
3141static int iommu_resume(struct sys_device *dev)
3142{
3143 struct dmar_drhd_unit *drhd;
3144 struct intel_iommu *iommu = NULL;
3145 unsigned long flag;
3146
3147 if (init_iommu_hw()) {
3148 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3149 return -EIO;
3150 }
3151
3152 for_each_active_iommu(iommu, drhd) {
3153
3154 spin_lock_irqsave(&iommu->register_lock, flag);
3155
3156 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3157 iommu->reg + DMAR_FECTL_REG);
3158 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3159 iommu->reg + DMAR_FEDATA_REG);
3160 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3161 iommu->reg + DMAR_FEADDR_REG);
3162 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3163 iommu->reg + DMAR_FEUADDR_REG);
3164
3165 spin_unlock_irqrestore(&iommu->register_lock, flag);
3166 }
3167
3168 for_each_active_iommu(iommu, drhd)
3169 kfree(iommu->iommu_state);
3170
3171 return 0;
3172}
3173
3174static struct sysdev_class iommu_sysclass = {
3175 .name = "iommu",
3176 .resume = iommu_resume,
3177 .suspend = iommu_suspend,
3178};
3179
3180static struct sys_device device_iommu = {
3181 .cls = &iommu_sysclass,
3182};
3183
3184static int __init init_iommu_sysfs(void)
3185{
3186 int error;
3187
3188 error = sysdev_class_register(&iommu_sysclass);
3189 if (error)
3190 return error;
3191
3192 error = sysdev_register(&device_iommu);
3193 if (error)
3194 sysdev_class_unregister(&iommu_sysclass);
3195
3196 return error;
3197}
3198
3199#else
3200static int __init init_iommu_sysfs(void)
3201{
3202 return 0;
3203}
3204#endif /* CONFIG_PM */
3205
99dcaded
FY
3206/*
3207 * Here we only respond to action of unbound device from driver.
3208 *
3209 * Added device is not attached to its DMAR domain here yet. That will happen
3210 * when mapping the device to iova.
3211 */
3212static int device_notifier(struct notifier_block *nb,
3213 unsigned long action, void *data)
3214{
3215 struct device *dev = data;
3216 struct pci_dev *pdev = to_pci_dev(dev);
3217 struct dmar_domain *domain;
3218
44cd613c
DW
3219 if (iommu_no_mapping(dev))
3220 return 0;
3221
99dcaded
FY
3222 domain = find_domain(pdev);
3223 if (!domain)
3224 return 0;
3225
3226 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through)
3227 domain_remove_one_dev_info(domain, pdev);
3228
3229 return 0;
3230}
3231
3232static struct notifier_block device_nb = {
3233 .notifier_call = device_notifier,
3234};
3235
ba395927
KA
3236int __init intel_iommu_init(void)
3237{
3238 int ret = 0;
a59b50e9 3239 int force_on = 0;
ba395927 3240
a59b50e9
JC
3241 /* VT-d is required for a TXT/tboot launch, so enforce that */
3242 force_on = tboot_force_iommu();
3243
3244 if (dmar_table_init()) {
3245 if (force_on)
3246 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3247 return -ENODEV;
a59b50e9 3248 }
ba395927 3249
a59b50e9
JC
3250 if (dmar_dev_scope_init()) {
3251 if (force_on)
3252 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3253 return -ENODEV;
a59b50e9 3254 }
1886e8a9 3255
2ae21010
SS
3256 /*
3257 * Check the need for DMA-remapping initialization now.
3258 * Above initialization will also be used by Interrupt-remapping.
3259 */
75f1cdf1 3260 if (no_iommu || dmar_disabled)
2ae21010
SS
3261 return -ENODEV;
3262
ba395927
KA
3263 iommu_init_mempool();
3264 dmar_init_reserved_ranges();
3265
3266 init_no_remapping_devices();
3267
3268 ret = init_dmars();
3269 if (ret) {
a59b50e9
JC
3270 if (force_on)
3271 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3272 printk(KERN_ERR "IOMMU: dmar init failed\n");
3273 put_iova_domain(&reserved_iova_list);
3274 iommu_exit_mempool();
3275 return ret;
3276 }
3277 printk(KERN_INFO
3278 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3279
5e0d2a6f 3280 init_timer(&unmap_timer);
75f1cdf1
FT
3281#ifdef CONFIG_SWIOTLB
3282 swiotlb = 0;
3283#endif
19943b0e 3284 dma_ops = &intel_dma_ops;
4ed0d3e6 3285
f59c7b69 3286 init_iommu_sysfs();
a8bcbb0d
JR
3287
3288 register_iommu(&intel_iommu_ops);
3289
99dcaded
FY
3290 bus_register_notifier(&pci_bus_type, &device_nb);
3291
ba395927
KA
3292 return 0;
3293}
e820482c 3294
3199aa6b
HW
3295static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3296 struct pci_dev *pdev)
3297{
3298 struct pci_dev *tmp, *parent;
3299
3300 if (!iommu || !pdev)
3301 return;
3302
3303 /* dependent device detach */
3304 tmp = pci_find_upstream_pcie_bridge(pdev);
3305 /* Secondary interface's bus number and devfn 0 */
3306 if (tmp) {
3307 parent = pdev->bus->self;
3308 while (parent != tmp) {
3309 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3310 parent->devfn);
3199aa6b
HW
3311 parent = parent->bus->self;
3312 }
45e829ea 3313 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3314 iommu_detach_dev(iommu,
3315 tmp->subordinate->number, 0);
3316 else /* this is a legacy PCI bridge */
276dbf99
DW
3317 iommu_detach_dev(iommu, tmp->bus->number,
3318 tmp->devfn);
3199aa6b
HW
3319 }
3320}
3321
2c2e2c38 3322static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3323 struct pci_dev *pdev)
3324{
3325 struct device_domain_info *info;
3326 struct intel_iommu *iommu;
3327 unsigned long flags;
3328 int found = 0;
3329 struct list_head *entry, *tmp;
3330
276dbf99
DW
3331 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3332 pdev->devfn);
c7151a8d
WH
3333 if (!iommu)
3334 return;
3335
3336 spin_lock_irqsave(&device_domain_lock, flags);
3337 list_for_each_safe(entry, tmp, &domain->devices) {
3338 info = list_entry(entry, struct device_domain_info, link);
276dbf99 3339 /* No need to compare PCI domain; it has to be the same */
c7151a8d
WH
3340 if (info->bus == pdev->bus->number &&
3341 info->devfn == pdev->devfn) {
3342 list_del(&info->link);
3343 list_del(&info->global);
3344 if (info->dev)
3345 info->dev->dev.archdata.iommu = NULL;
3346 spin_unlock_irqrestore(&device_domain_lock, flags);
3347
93a23a72 3348 iommu_disable_dev_iotlb(info);
c7151a8d 3349 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3350 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3351 free_devinfo_mem(info);
3352
3353 spin_lock_irqsave(&device_domain_lock, flags);
3354
3355 if (found)
3356 break;
3357 else
3358 continue;
3359 }
3360
3361 /* if there is no other devices under the same iommu
3362 * owned by this domain, clear this iommu in iommu_bmp
3363 * update iommu count and coherency
3364 */
276dbf99
DW
3365 if (iommu == device_to_iommu(info->segment, info->bus,
3366 info->devfn))
c7151a8d
WH
3367 found = 1;
3368 }
3369
3370 if (found == 0) {
3371 unsigned long tmp_flags;
3372 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3373 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3374 domain->iommu_count--;
58c610bd 3375 domain_update_iommu_cap(domain);
c7151a8d
WH
3376 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3377 }
3378
3379 spin_unlock_irqrestore(&device_domain_lock, flags);
3380}
3381
3382static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3383{
3384 struct device_domain_info *info;
3385 struct intel_iommu *iommu;
3386 unsigned long flags1, flags2;
3387
3388 spin_lock_irqsave(&device_domain_lock, flags1);
3389 while (!list_empty(&domain->devices)) {
3390 info = list_entry(domain->devices.next,
3391 struct device_domain_info, link);
3392 list_del(&info->link);
3393 list_del(&info->global);
3394 if (info->dev)
3395 info->dev->dev.archdata.iommu = NULL;
3396
3397 spin_unlock_irqrestore(&device_domain_lock, flags1);
3398
93a23a72 3399 iommu_disable_dev_iotlb(info);
276dbf99 3400 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3401 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3402 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3403
3404 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3405 * and capabilities
c7151a8d
WH
3406 */
3407 spin_lock_irqsave(&domain->iommu_lock, flags2);
3408 if (test_and_clear_bit(iommu->seq_id,
3409 &domain->iommu_bmp)) {
3410 domain->iommu_count--;
58c610bd 3411 domain_update_iommu_cap(domain);
c7151a8d
WH
3412 }
3413 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3414
3415 free_devinfo_mem(info);
3416 spin_lock_irqsave(&device_domain_lock, flags1);
3417 }
3418 spin_unlock_irqrestore(&device_domain_lock, flags1);
3419}
3420
5e98c4b1
WH
3421/* domain id for virtual machine, it won't be set in context */
3422static unsigned long vm_domid;
3423
fe40f1e0
WH
3424static int vm_domain_min_agaw(struct dmar_domain *domain)
3425{
3426 int i;
3427 int min_agaw = domain->agaw;
3428
a45946ab 3429 for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
fe40f1e0
WH
3430 if (min_agaw > g_iommus[i]->agaw)
3431 min_agaw = g_iommus[i]->agaw;
fe40f1e0
WH
3432 }
3433
3434 return min_agaw;
3435}
3436
5e98c4b1
WH
3437static struct dmar_domain *iommu_alloc_vm_domain(void)
3438{
3439 struct dmar_domain *domain;
3440
3441 domain = alloc_domain_mem();
3442 if (!domain)
3443 return NULL;
3444
3445 domain->id = vm_domid++;
4c923d47 3446 domain->nid = -1;
5e98c4b1
WH
3447 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3448 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3449
3450 return domain;
3451}
3452
2c2e2c38 3453static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3454{
3455 int adjust_width;
3456
3457 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3458 spin_lock_init(&domain->iommu_lock);
3459
3460 domain_reserve_special_ranges(domain);
3461
3462 /* calculate AGAW */
3463 domain->gaw = guest_width;
3464 adjust_width = guestwidth_to_adjustwidth(guest_width);
3465 domain->agaw = width_to_agaw(adjust_width);
3466
3467 INIT_LIST_HEAD(&domain->devices);
3468
3469 domain->iommu_count = 0;
3470 domain->iommu_coherency = 0;
c5b15255 3471 domain->iommu_snooping = 0;
fe40f1e0 3472 domain->max_addr = 0;
4c923d47 3473 domain->nid = -1;
5e98c4b1
WH
3474
3475 /* always allocate the top pgd */
4c923d47 3476 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3477 if (!domain->pgd)
3478 return -ENOMEM;
3479 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3480 return 0;
3481}
3482
3483static void iommu_free_vm_domain(struct dmar_domain *domain)
3484{
3485 unsigned long flags;
3486 struct dmar_drhd_unit *drhd;
3487 struct intel_iommu *iommu;
3488 unsigned long i;
3489 unsigned long ndomains;
3490
3491 for_each_drhd_unit(drhd) {
3492 if (drhd->ignored)
3493 continue;
3494 iommu = drhd->iommu;
3495
3496 ndomains = cap_ndoms(iommu->cap);
a45946ab 3497 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3498 if (iommu->domains[i] == domain) {
3499 spin_lock_irqsave(&iommu->lock, flags);
3500 clear_bit(i, iommu->domain_ids);
3501 iommu->domains[i] = NULL;
3502 spin_unlock_irqrestore(&iommu->lock, flags);
3503 break;
3504 }
5e98c4b1
WH
3505 }
3506 }
3507}
3508
3509static void vm_domain_exit(struct dmar_domain *domain)
3510{
5e98c4b1
WH
3511 /* Domain 0 is reserved, so dont process it */
3512 if (!domain)
3513 return;
3514
3515 vm_domain_remove_all_dev_info(domain);
3516 /* destroy iovas */
3517 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3518
3519 /* clear ptes */
595badf5 3520 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3521
3522 /* free page tables */
d794dc9b 3523 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3524
3525 iommu_free_vm_domain(domain);
3526 free_domain_mem(domain);
3527}
3528
5d450806 3529static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3530{
5d450806 3531 struct dmar_domain *dmar_domain;
38717946 3532
5d450806
JR
3533 dmar_domain = iommu_alloc_vm_domain();
3534 if (!dmar_domain) {
38717946 3535 printk(KERN_ERR
5d450806
JR
3536 "intel_iommu_domain_init: dmar_domain == NULL\n");
3537 return -ENOMEM;
38717946 3538 }
2c2e2c38 3539 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3540 printk(KERN_ERR
5d450806
JR
3541 "intel_iommu_domain_init() failed\n");
3542 vm_domain_exit(dmar_domain);
3543 return -ENOMEM;
38717946 3544 }
5d450806 3545 domain->priv = dmar_domain;
faa3d6f5 3546
5d450806 3547 return 0;
38717946 3548}
38717946 3549
5d450806 3550static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3551{
5d450806
JR
3552 struct dmar_domain *dmar_domain = domain->priv;
3553
3554 domain->priv = NULL;
3555 vm_domain_exit(dmar_domain);
38717946 3556}
38717946 3557
4c5478c9
JR
3558static int intel_iommu_attach_device(struct iommu_domain *domain,
3559 struct device *dev)
38717946 3560{
4c5478c9
JR
3561 struct dmar_domain *dmar_domain = domain->priv;
3562 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3563 struct intel_iommu *iommu;
3564 int addr_width;
3565 u64 end;
faa3d6f5
WH
3566
3567 /* normally pdev is not mapped */
3568 if (unlikely(domain_context_mapped(pdev))) {
3569 struct dmar_domain *old_domain;
3570
3571 old_domain = find_domain(pdev);
3572 if (old_domain) {
2c2e2c38
FY
3573 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3574 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3575 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3576 else
3577 domain_remove_dev_info(old_domain);
3578 }
3579 }
3580
276dbf99
DW
3581 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3582 pdev->devfn);
fe40f1e0
WH
3583 if (!iommu)
3584 return -ENODEV;
3585
3586 /* check if this iommu agaw is sufficient for max mapped address */
3587 addr_width = agaw_to_width(iommu->agaw);
3588 end = DOMAIN_MAX_ADDR(addr_width);
3589 end = end & VTD_PAGE_MASK;
4c5478c9 3590 if (end < dmar_domain->max_addr) {
fe40f1e0
WH
3591 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3592 "sufficient for the mapped address (%llx)\n",
4c5478c9 3593 __func__, iommu->agaw, dmar_domain->max_addr);
fe40f1e0
WH
3594 return -EFAULT;
3595 }
3596
5fe60f4e 3597 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3598}
38717946 3599
4c5478c9
JR
3600static void intel_iommu_detach_device(struct iommu_domain *domain,
3601 struct device *dev)
38717946 3602{
4c5478c9
JR
3603 struct dmar_domain *dmar_domain = domain->priv;
3604 struct pci_dev *pdev = to_pci_dev(dev);
3605
2c2e2c38 3606 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3607}
c7151a8d 3608
dde57a21
JR
3609static int intel_iommu_map_range(struct iommu_domain *domain,
3610 unsigned long iova, phys_addr_t hpa,
3611 size_t size, int iommu_prot)
faa3d6f5 3612{
dde57a21 3613 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
3614 u64 max_addr;
3615 int addr_width;
dde57a21 3616 int prot = 0;
faa3d6f5 3617 int ret;
fe40f1e0 3618
dde57a21
JR
3619 if (iommu_prot & IOMMU_READ)
3620 prot |= DMA_PTE_READ;
3621 if (iommu_prot & IOMMU_WRITE)
3622 prot |= DMA_PTE_WRITE;
9cf06697
SY
3623 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3624 prot |= DMA_PTE_SNP;
dde57a21 3625
163cc52c 3626 max_addr = iova + size;
dde57a21 3627 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3628 int min_agaw;
3629 u64 end;
3630
3631 /* check if minimum agaw is sufficient for mapped address */
dde57a21 3632 min_agaw = vm_domain_min_agaw(dmar_domain);
fe40f1e0
WH
3633 addr_width = agaw_to_width(min_agaw);
3634 end = DOMAIN_MAX_ADDR(addr_width);
3635 end = end & VTD_PAGE_MASK;
3636 if (end < max_addr) {
3637 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3638 "sufficient for the mapped address (%llx)\n",
3639 __func__, min_agaw, max_addr);
3640 return -EFAULT;
3641 }
dde57a21 3642 dmar_domain->max_addr = max_addr;
fe40f1e0 3643 }
ad051221
DW
3644 /* Round up size to next multiple of PAGE_SIZE, if it and
3645 the low bits of hpa would take us onto the next page */
88cb6a74 3646 size = aligned_nrpages(hpa, size);
ad051221
DW
3647 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3648 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3649 return ret;
38717946 3650}
38717946 3651
dde57a21
JR
3652static void intel_iommu_unmap_range(struct iommu_domain *domain,
3653 unsigned long iova, size_t size)
38717946 3654{
dde57a21 3655 struct dmar_domain *dmar_domain = domain->priv;
faa3d6f5 3656
4b99d352
SY
3657 if (!size)
3658 return;
3659
163cc52c
DW
3660 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3661 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 3662
163cc52c
DW
3663 if (dmar_domain->max_addr == iova + size)
3664 dmar_domain->max_addr = iova;
38717946 3665}
38717946 3666
d14d6577
JR
3667static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3668 unsigned long iova)
38717946 3669{
d14d6577 3670 struct dmar_domain *dmar_domain = domain->priv;
38717946 3671 struct dma_pte *pte;
faa3d6f5 3672 u64 phys = 0;
38717946 3673
b026fd28 3674 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
38717946 3675 if (pte)
faa3d6f5 3676 phys = dma_pte_addr(pte);
38717946 3677
faa3d6f5 3678 return phys;
38717946 3679}
a8bcbb0d 3680
dbb9fd86
SY
3681static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3682 unsigned long cap)
3683{
3684 struct dmar_domain *dmar_domain = domain->priv;
3685
3686 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3687 return dmar_domain->iommu_snooping;
3688
3689 return 0;
3690}
3691
a8bcbb0d
JR
3692static struct iommu_ops intel_iommu_ops = {
3693 .domain_init = intel_iommu_domain_init,
3694 .domain_destroy = intel_iommu_domain_destroy,
3695 .attach_dev = intel_iommu_attach_device,
3696 .detach_dev = intel_iommu_detach_device,
3697 .map = intel_iommu_map_range,
3698 .unmap = intel_iommu_unmap_range,
3699 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3700 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3701};
9af88143
DW
3702
3703static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3704{
3705 /*
3706 * Mobile 4 Series Chipset neglects to set RWBF capability,
3707 * but needs it:
3708 */
3709 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3710 rwbf_quirk = 1;
3711}
3712
3713DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b
DW
3714
3715/* On Tylersburg chipsets, some BIOSes have been known to enable the
3716 ISOCH DMAR unit for the Azalia sound device, but not give it any
3717 TLB entries, which causes it to deadlock. Check for that. We do
3718 this in a function called from init_dmars(), instead of in a PCI
3719 quirk, because we don't want to print the obnoxious "BIOS broken"
3720 message if VT-d is actually disabled.
3721*/
3722static void __init check_tylersburg_isoch(void)
3723{
3724 struct pci_dev *pdev;
3725 uint32_t vtisochctrl;
3726
3727 /* If there's no Azalia in the system anyway, forget it. */
3728 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3729 if (!pdev)
3730 return;
3731 pci_dev_put(pdev);
3732
3733 /* System Management Registers. Might be hidden, in which case
3734 we can't do the sanity check. But that's OK, because the
3735 known-broken BIOSes _don't_ actually hide it, so far. */
3736 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3737 if (!pdev)
3738 return;
3739
3740 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3741 pci_dev_put(pdev);
3742 return;
3743 }
3744
3745 pci_dev_put(pdev);
3746
3747 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3748 if (vtisochctrl & 1)
3749 return;
3750
3751 /* Drop all bits other than the number of TLB entries */
3752 vtisochctrl &= 0x1c;
3753
3754 /* If we have the recommended number of TLB entries (16), fine. */
3755 if (vtisochctrl == 0x10)
3756 return;
3757
3758 /* Zero TLB entries? You get to ride the short bus to school. */
3759 if (!vtisochctrl) {
3760 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3761 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3762 dmi_get_system_info(DMI_BIOS_VENDOR),
3763 dmi_get_system_info(DMI_BIOS_VERSION),
3764 dmi_get_system_info(DMI_PRODUCT_VERSION));
3765 iommu_identity_mapping |= IDENTMAP_AZALIA;
3766 return;
3767 }
3768
3769 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3770 vtisochctrl);
3771}
This page took 1.066186 seconds and 5 git commands to generate.