iommu/vt-d: fix invalid memory access when freeing DMAR irq
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
54485c30 27#include <linux/export.h>
ba395927
KA
28#include <linux/slab.h>
29#include <linux/irq.h>
30#include <linux/interrupt.h>
ba395927
KA
31#include <linux/spinlock.h>
32#include <linux/pci.h>
33#include <linux/dmar.h>
34#include <linux/dma-mapping.h>
35#include <linux/mempool.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
8a8f422d 45#include <asm/irq_remapping.h>
ba395927 46#include <asm/cacheflush.h>
46a7fa27 47#include <asm/iommu.h>
ba395927 48
078e1ee2 49#include "irq_remapping.h"
61e015ac 50#include "pci.h"
078e1ee2 51
5b6985ce
FY
52#define ROOT_SIZE VTD_PAGE_SIZE
53#define CONTEXT_SIZE VTD_PAGE_SIZE
54
ba395927
KA
55#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
56#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 57#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
58
59#define IOAPIC_RANGE_START (0xfee00000)
60#define IOAPIC_RANGE_END (0xfeefffff)
61#define IOVA_START_ADDR (0x1000)
62
63#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
64
4ed0d3e6 65#define MAX_AGAW_WIDTH 64
5c645b35 66#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 67
2ebe3151
DW
68#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
69#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
70
71/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
72 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
73#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
74 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
75#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 76
f27be03b 77#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 78#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 79#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 80
df08cdc7
AM
81/* page table handling */
82#define LEVEL_STRIDE (9)
83#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
84
6d1c56a9
OBC
85/*
86 * This bitmap is used to advertise the page sizes our hardware support
87 * to the IOMMU core, which will then use this information to split
88 * physically contiguous memory regions it is mapping into page sizes
89 * that we support.
90 *
91 * Traditionally the IOMMU core just handed us the mappings directly,
92 * after making sure the size is an order of a 4KiB page and that the
93 * mapping has natural alignment.
94 *
95 * To retain this behavior, we currently advertise that we support
96 * all page sizes that are an order of 4KiB.
97 *
98 * If at some point we'd like to utilize the IOMMU core's new behavior,
99 * we could change this to advertise the real page sizes we support.
100 */
101#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
102
df08cdc7
AM
103static inline int agaw_to_level(int agaw)
104{
105 return agaw + 2;
106}
107
108static inline int agaw_to_width(int agaw)
109{
5c645b35 110 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
111}
112
113static inline int width_to_agaw(int width)
114{
5c645b35 115 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
116}
117
118static inline unsigned int level_to_offset_bits(int level)
119{
120 return (level - 1) * LEVEL_STRIDE;
121}
122
123static inline int pfn_level_offset(unsigned long pfn, int level)
124{
125 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
126}
127
128static inline unsigned long level_mask(int level)
129{
130 return -1UL << level_to_offset_bits(level);
131}
132
133static inline unsigned long level_size(int level)
134{
135 return 1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long align_to_level(unsigned long pfn, int level)
139{
140 return (pfn + level_size(level) - 1) & level_mask(level);
141}
fd18de50 142
6dd9a7c7
YS
143static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
144{
5c645b35 145 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
146}
147
dd4e8319
DW
148/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
149 are never going to work. */
150static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
151{
152 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
153}
154
155static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
156{
157 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159static inline unsigned long page_to_dma_pfn(struct page *pg)
160{
161 return mm_to_dma_pfn(page_to_pfn(pg));
162}
163static inline unsigned long virt_to_dma_pfn(void *p)
164{
165 return page_to_dma_pfn(virt_to_page(p));
166}
167
d9630fe9
WH
168/* global iommu list, set NULL for ignored DMAR units */
169static struct intel_iommu **g_iommus;
170
e0fc7e0b 171static void __init check_tylersburg_isoch(void);
9af88143
DW
172static int rwbf_quirk;
173
b779260b
JC
174/*
175 * set to 1 to panic kernel if can't successfully enable VT-d
176 * (used when kernel is launched w/ TXT)
177 */
178static int force_on = 0;
179
46b08e1a
MM
180/*
181 * 0: Present
182 * 1-11: Reserved
183 * 12-63: Context Ptr (12 - (haw-1))
184 * 64-127: Reserved
185 */
186struct root_entry {
187 u64 val;
188 u64 rsvd1;
189};
190#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
191static inline bool root_present(struct root_entry *root)
192{
193 return (root->val & 1);
194}
195static inline void set_root_present(struct root_entry *root)
196{
197 root->val |= 1;
198}
199static inline void set_root_value(struct root_entry *root, unsigned long value)
200{
201 root->val |= value & VTD_PAGE_MASK;
202}
203
204static inline struct context_entry *
205get_context_addr_from_root(struct root_entry *root)
206{
207 return (struct context_entry *)
208 (root_present(root)?phys_to_virt(
209 root->val & VTD_PAGE_MASK) :
210 NULL);
211}
212
7a8fc25e
MM
213/*
214 * low 64 bits:
215 * 0: present
216 * 1: fault processing disable
217 * 2-3: translation type
218 * 12-63: address space root
219 * high 64 bits:
220 * 0-2: address width
221 * 3-6: aval
222 * 8-23: domain id
223 */
224struct context_entry {
225 u64 lo;
226 u64 hi;
227};
c07e7d21
MM
228
229static inline bool context_present(struct context_entry *context)
230{
231 return (context->lo & 1);
232}
233static inline void context_set_present(struct context_entry *context)
234{
235 context->lo |= 1;
236}
237
238static inline void context_set_fault_enable(struct context_entry *context)
239{
240 context->lo &= (((u64)-1) << 2) | 1;
241}
242
c07e7d21
MM
243static inline void context_set_translation_type(struct context_entry *context,
244 unsigned long value)
245{
246 context->lo &= (((u64)-1) << 4) | 3;
247 context->lo |= (value & 3) << 2;
248}
249
250static inline void context_set_address_root(struct context_entry *context,
251 unsigned long value)
252{
253 context->lo |= value & VTD_PAGE_MASK;
254}
255
256static inline void context_set_address_width(struct context_entry *context,
257 unsigned long value)
258{
259 context->hi |= value & 7;
260}
261
262static inline void context_set_domain_id(struct context_entry *context,
263 unsigned long value)
264{
265 context->hi |= (value & ((1 << 16) - 1)) << 8;
266}
267
268static inline void context_clear_entry(struct context_entry *context)
269{
270 context->lo = 0;
271 context->hi = 0;
272}
7a8fc25e 273
622ba12a
MM
274/*
275 * 0: readable
276 * 1: writable
277 * 2-6: reserved
278 * 7: super page
9cf06697
SY
279 * 8-10: available
280 * 11: snoop behavior
622ba12a
MM
281 * 12-63: Host physcial address
282 */
283struct dma_pte {
284 u64 val;
285};
622ba12a 286
19c239ce
MM
287static inline void dma_clear_pte(struct dma_pte *pte)
288{
289 pte->val = 0;
290}
291
19c239ce
MM
292static inline u64 dma_pte_addr(struct dma_pte *pte)
293{
c85994e4
DW
294#ifdef CONFIG_64BIT
295 return pte->val & VTD_PAGE_MASK;
296#else
297 /* Must have a full atomic 64-bit read */
1a8bd481 298 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 299#endif
19c239ce
MM
300}
301
19c239ce
MM
302static inline bool dma_pte_present(struct dma_pte *pte)
303{
304 return (pte->val & 3) != 0;
305}
622ba12a 306
4399c8bf
AK
307static inline bool dma_pte_superpage(struct dma_pte *pte)
308{
309 return (pte->val & (1 << 7));
310}
311
75e6bf96
DW
312static inline int first_pte_in_page(struct dma_pte *pte)
313{
314 return !((unsigned long)pte & ~VTD_PAGE_MASK);
315}
316
2c2e2c38
FY
317/*
318 * This domain is a statically identity mapping domain.
319 * 1. This domain creats a static 1:1 mapping to all usable memory.
320 * 2. It maps to each iommu if successful.
321 * 3. Each iommu mapps to this domain if successful.
322 */
19943b0e
DW
323static struct dmar_domain *si_domain;
324static int hw_pass_through = 1;
2c2e2c38 325
3b5410e7 326/* devices under the same p2p bridge are owned in one domain */
cdc7b837 327#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 328
1ce28feb
WH
329/* domain represents a virtual machine, more than one devices
330 * across iommus may be owned in one domain, e.g. kvm guest.
331 */
332#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
333
2c2e2c38
FY
334/* si_domain contains mulitple devices */
335#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
336
1b198bb0
MT
337/* define the limit of IOMMUs supported in each domain */
338#ifdef CONFIG_X86
339# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
340#else
341# define IOMMU_UNITS_SUPPORTED 64
342#endif
343
99126f7c
MM
344struct dmar_domain {
345 int id; /* domain id */
4c923d47 346 int nid; /* node id */
1b198bb0
MT
347 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
348 /* bitmap of iommus this domain uses*/
99126f7c
MM
349
350 struct list_head devices; /* all devices' list */
351 struct iova_domain iovad; /* iova's that belong to this domain */
352
353 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
354 int gaw; /* max guest address width */
355
356 /* adjusted guest address width, 0 is level 2 30-bit */
357 int agaw;
358
3b5410e7 359 int flags; /* flags to find out type of domain */
8e604097
WH
360
361 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 362 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 363 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
364 int iommu_superpage;/* Level of superpages supported:
365 0 == 4KiB (no superpages), 1 == 2MiB,
366 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 367 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 368 u64 max_addr; /* maximum mapped address */
99126f7c
MM
369};
370
a647dacb
MM
371/* PCI domain-device relationship */
372struct device_domain_info {
373 struct list_head link; /* link to domain siblings */
374 struct list_head global; /* link to global list */
276dbf99
DW
375 int segment; /* PCI domain */
376 u8 bus; /* PCI bus number */
a647dacb 377 u8 devfn; /* PCI devfn number */
45e829ea 378 struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 379 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
380 struct dmar_domain *domain; /* pointer to domain */
381};
382
5e0d2a6f 383static void flush_unmaps_timeout(unsigned long data);
384
385DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
386
80b20dd8 387#define HIGH_WATER_MARK 250
388struct deferred_flush_tables {
389 int next;
390 struct iova *iova[HIGH_WATER_MARK];
391 struct dmar_domain *domain[HIGH_WATER_MARK];
392};
393
394static struct deferred_flush_tables *deferred_flush;
395
5e0d2a6f 396/* bitmap for indexing intel_iommus */
5e0d2a6f 397static int g_num_of_iommus;
398
399static DEFINE_SPINLOCK(async_umap_flush_lock);
400static LIST_HEAD(unmaps_to_do);
401
402static int timer_on;
403static long list_size;
5e0d2a6f 404
ba395927
KA
405static void domain_remove_dev_info(struct dmar_domain *domain);
406
d3f13810 407#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
408int dmar_disabled = 0;
409#else
410int dmar_disabled = 1;
d3f13810 411#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 412
8bc1f85c
ED
413int intel_iommu_enabled = 0;
414EXPORT_SYMBOL_GPL(intel_iommu_enabled);
415
2d9e667e 416static int dmar_map_gfx = 1;
7d3b03ce 417static int dmar_forcedac;
5e0d2a6f 418static int intel_iommu_strict;
6dd9a7c7 419static int intel_iommu_superpage = 1;
ba395927 420
c0771df8
DW
421int intel_iommu_gfx_mapped;
422EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
423
ba395927
KA
424#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
425static DEFINE_SPINLOCK(device_domain_lock);
426static LIST_HEAD(device_domain_list);
427
a8bcbb0d
JR
428static struct iommu_ops intel_iommu_ops;
429
ba395927
KA
430static int __init intel_iommu_setup(char *str)
431{
432 if (!str)
433 return -EINVAL;
434 while (*str) {
0cd5c3c8
KM
435 if (!strncmp(str, "on", 2)) {
436 dmar_disabled = 0;
437 printk(KERN_INFO "Intel-IOMMU: enabled\n");
438 } else if (!strncmp(str, "off", 3)) {
ba395927 439 dmar_disabled = 1;
0cd5c3c8 440 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
441 } else if (!strncmp(str, "igfx_off", 8)) {
442 dmar_map_gfx = 0;
443 printk(KERN_INFO
444 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 445 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 446 printk(KERN_INFO
7d3b03ce
KA
447 "Intel-IOMMU: Forcing DAC for PCI devices\n");
448 dmar_forcedac = 1;
5e0d2a6f 449 } else if (!strncmp(str, "strict", 6)) {
450 printk(KERN_INFO
451 "Intel-IOMMU: disable batched IOTLB flush\n");
452 intel_iommu_strict = 1;
6dd9a7c7
YS
453 } else if (!strncmp(str, "sp_off", 6)) {
454 printk(KERN_INFO
455 "Intel-IOMMU: disable supported super page\n");
456 intel_iommu_superpage = 0;
ba395927
KA
457 }
458
459 str += strcspn(str, ",");
460 while (*str == ',')
461 str++;
462 }
463 return 0;
464}
465__setup("intel_iommu=", intel_iommu_setup);
466
467static struct kmem_cache *iommu_domain_cache;
468static struct kmem_cache *iommu_devinfo_cache;
469static struct kmem_cache *iommu_iova_cache;
470
4c923d47 471static inline void *alloc_pgtable_page(int node)
eb3fa7cb 472{
4c923d47
SS
473 struct page *page;
474 void *vaddr = NULL;
eb3fa7cb 475
4c923d47
SS
476 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
477 if (page)
478 vaddr = page_address(page);
eb3fa7cb 479 return vaddr;
ba395927
KA
480}
481
482static inline void free_pgtable_page(void *vaddr)
483{
484 free_page((unsigned long)vaddr);
485}
486
487static inline void *alloc_domain_mem(void)
488{
354bb65e 489 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
490}
491
38717946 492static void free_domain_mem(void *vaddr)
ba395927
KA
493{
494 kmem_cache_free(iommu_domain_cache, vaddr);
495}
496
497static inline void * alloc_devinfo_mem(void)
498{
354bb65e 499 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
500}
501
502static inline void free_devinfo_mem(void *vaddr)
503{
504 kmem_cache_free(iommu_devinfo_cache, vaddr);
505}
506
507struct iova *alloc_iova_mem(void)
508{
354bb65e 509 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
510}
511
512void free_iova_mem(struct iova *iova)
513{
514 kmem_cache_free(iommu_iova_cache, iova);
515}
516
1b573683 517
4ed0d3e6 518static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
519{
520 unsigned long sagaw;
521 int agaw = -1;
522
523 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 524 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
525 agaw >= 0; agaw--) {
526 if (test_bit(agaw, &sagaw))
527 break;
528 }
529
530 return agaw;
531}
532
4ed0d3e6
FY
533/*
534 * Calculate max SAGAW for each iommu.
535 */
536int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
537{
538 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
539}
540
541/*
542 * calculate agaw for each iommu.
543 * "SAGAW" may be different across iommus, use a default agaw, and
544 * get a supported less agaw for iommus that don't support the default agaw.
545 */
546int iommu_calculate_agaw(struct intel_iommu *iommu)
547{
548 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
549}
550
2c2e2c38 551/* This functionin only returns single iommu in a domain */
8c11e798
WH
552static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
553{
554 int iommu_id;
555
2c2e2c38 556 /* si_domain and vm domain should not get here. */
1ce28feb 557 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 558 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 559
1b198bb0 560 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
561 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
562 return NULL;
563
564 return g_iommus[iommu_id];
565}
566
8e604097
WH
567static void domain_update_iommu_coherency(struct dmar_domain *domain)
568{
569 int i;
570
2e12bc29
AW
571 i = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
572
573 domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0;
8e604097 574
1b198bb0 575 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
8e604097
WH
576 if (!ecap_coherent(g_iommus[i]->ecap)) {
577 domain->iommu_coherency = 0;
578 break;
579 }
8e604097
WH
580 }
581}
582
58c610bd
SY
583static void domain_update_iommu_snooping(struct dmar_domain *domain)
584{
585 int i;
586
587 domain->iommu_snooping = 1;
588
1b198bb0 589 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
590 if (!ecap_sc_support(g_iommus[i]->ecap)) {
591 domain->iommu_snooping = 0;
592 break;
593 }
58c610bd
SY
594 }
595}
596
6dd9a7c7
YS
597static void domain_update_iommu_superpage(struct dmar_domain *domain)
598{
8140a95d
AK
599 struct dmar_drhd_unit *drhd;
600 struct intel_iommu *iommu = NULL;
601 int mask = 0xf;
6dd9a7c7
YS
602
603 if (!intel_iommu_superpage) {
604 domain->iommu_superpage = 0;
605 return;
606 }
607
8140a95d
AK
608 /* set iommu_superpage to the smallest common denominator */
609 for_each_active_iommu(iommu, drhd) {
610 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
611 if (!mask) {
612 break;
613 }
614 }
615 domain->iommu_superpage = fls(mask);
616}
617
58c610bd
SY
618/* Some capabilities may be different across iommus */
619static void domain_update_iommu_cap(struct dmar_domain *domain)
620{
621 domain_update_iommu_coherency(domain);
622 domain_update_iommu_snooping(domain);
6dd9a7c7 623 domain_update_iommu_superpage(domain);
58c610bd
SY
624}
625
276dbf99 626static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
627{
628 struct dmar_drhd_unit *drhd = NULL;
629 int i;
630
7c919779 631 for_each_active_drhd_unit(drhd) {
276dbf99
DW
632 if (segment != drhd->segment)
633 continue;
c7151a8d 634
924b6231 635 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
636 if (drhd->devices[i] &&
637 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
638 drhd->devices[i]->devfn == devfn)
639 return drhd->iommu;
4958c5dc
DW
640 if (drhd->devices[i] &&
641 drhd->devices[i]->subordinate &&
924b6231 642 drhd->devices[i]->subordinate->number <= bus &&
b918c62e 643 drhd->devices[i]->subordinate->busn_res.end >= bus)
924b6231
DW
644 return drhd->iommu;
645 }
c7151a8d
WH
646
647 if (drhd->include_all)
648 return drhd->iommu;
649 }
650
651 return NULL;
652}
653
5331fe6f
WH
654static void domain_flush_cache(struct dmar_domain *domain,
655 void *addr, int size)
656{
657 if (!domain->iommu_coherency)
658 clflush_cache_range(addr, size);
659}
660
ba395927
KA
661/* Gets context entry for a given bus and devfn */
662static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
663 u8 bus, u8 devfn)
664{
665 struct root_entry *root;
666 struct context_entry *context;
667 unsigned long phy_addr;
668 unsigned long flags;
669
670 spin_lock_irqsave(&iommu->lock, flags);
671 root = &iommu->root_entry[bus];
672 context = get_context_addr_from_root(root);
673 if (!context) {
4c923d47
SS
674 context = (struct context_entry *)
675 alloc_pgtable_page(iommu->node);
ba395927
KA
676 if (!context) {
677 spin_unlock_irqrestore(&iommu->lock, flags);
678 return NULL;
679 }
5b6985ce 680 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
681 phy_addr = virt_to_phys((void *)context);
682 set_root_value(root, phy_addr);
683 set_root_present(root);
684 __iommu_flush_cache(iommu, root, sizeof(*root));
685 }
686 spin_unlock_irqrestore(&iommu->lock, flags);
687 return &context[devfn];
688}
689
690static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
691{
692 struct root_entry *root;
693 struct context_entry *context;
694 int ret;
695 unsigned long flags;
696
697 spin_lock_irqsave(&iommu->lock, flags);
698 root = &iommu->root_entry[bus];
699 context = get_context_addr_from_root(root);
700 if (!context) {
701 ret = 0;
702 goto out;
703 }
c07e7d21 704 ret = context_present(&context[devfn]);
ba395927
KA
705out:
706 spin_unlock_irqrestore(&iommu->lock, flags);
707 return ret;
708}
709
710static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
711{
712 struct root_entry *root;
713 struct context_entry *context;
714 unsigned long flags;
715
716 spin_lock_irqsave(&iommu->lock, flags);
717 root = &iommu->root_entry[bus];
718 context = get_context_addr_from_root(root);
719 if (context) {
c07e7d21 720 context_clear_entry(&context[devfn]);
ba395927
KA
721 __iommu_flush_cache(iommu, &context[devfn], \
722 sizeof(*context));
723 }
724 spin_unlock_irqrestore(&iommu->lock, flags);
725}
726
727static void free_context_table(struct intel_iommu *iommu)
728{
729 struct root_entry *root;
730 int i;
731 unsigned long flags;
732 struct context_entry *context;
733
734 spin_lock_irqsave(&iommu->lock, flags);
735 if (!iommu->root_entry) {
736 goto out;
737 }
738 for (i = 0; i < ROOT_ENTRY_NR; i++) {
739 root = &iommu->root_entry[i];
740 context = get_context_addr_from_root(root);
741 if (context)
742 free_pgtable_page(context);
743 }
744 free_pgtable_page(iommu->root_entry);
745 iommu->root_entry = NULL;
746out:
747 spin_unlock_irqrestore(&iommu->lock, flags);
748}
749
b026fd28 750static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
4399c8bf 751 unsigned long pfn, int target_level)
ba395927 752{
b026fd28 753 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
754 struct dma_pte *parent, *pte = NULL;
755 int level = agaw_to_level(domain->agaw);
4399c8bf 756 int offset;
ba395927
KA
757
758 BUG_ON(!domain->pgd);
f9423606
JS
759
760 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
761 /* Address beyond IOMMU's addressing capabilities. */
762 return NULL;
763
ba395927
KA
764 parent = domain->pgd;
765
ba395927
KA
766 while (level > 0) {
767 void *tmp_page;
768
b026fd28 769 offset = pfn_level_offset(pfn, level);
ba395927 770 pte = &parent[offset];
4399c8bf 771 if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7
YS
772 break;
773 if (level == target_level)
ba395927
KA
774 break;
775
19c239ce 776 if (!dma_pte_present(pte)) {
c85994e4
DW
777 uint64_t pteval;
778
4c923d47 779 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 780
206a73c1 781 if (!tmp_page)
ba395927 782 return NULL;
206a73c1 783
c85994e4 784 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 785 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
786 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
787 /* Someone else set it while we were thinking; use theirs. */
788 free_pgtable_page(tmp_page);
789 } else {
790 dma_pte_addr(pte);
791 domain_flush_cache(domain, pte, sizeof(*pte));
792 }
ba395927 793 }
19c239ce 794 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
795 level--;
796 }
797
ba395927
KA
798 return pte;
799}
800
6dd9a7c7 801
ba395927 802/* return address's pte at specific level */
90dcfb5e
DW
803static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
804 unsigned long pfn,
6dd9a7c7 805 int level, int *large_page)
ba395927
KA
806{
807 struct dma_pte *parent, *pte = NULL;
808 int total = agaw_to_level(domain->agaw);
809 int offset;
810
811 parent = domain->pgd;
812 while (level <= total) {
90dcfb5e 813 offset = pfn_level_offset(pfn, total);
ba395927
KA
814 pte = &parent[offset];
815 if (level == total)
816 return pte;
817
6dd9a7c7
YS
818 if (!dma_pte_present(pte)) {
819 *large_page = total;
ba395927 820 break;
6dd9a7c7
YS
821 }
822
823 if (pte->val & DMA_PTE_LARGE_PAGE) {
824 *large_page = total;
825 return pte;
826 }
827
19c239ce 828 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
829 total--;
830 }
831 return NULL;
832}
833
ba395927 834/* clear last level pte, a tlb flush should be followed */
292827cb 835static int dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
836 unsigned long start_pfn,
837 unsigned long last_pfn)
ba395927 838{
04b18e65 839 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 840 unsigned int large_page = 1;
310a5ab9 841 struct dma_pte *first_pte, *pte;
66eae846 842
04b18e65 843 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 844 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 845 BUG_ON(start_pfn > last_pfn);
ba395927 846
04b18e65 847 /* we don't need lock here; nobody else touches the iova range */
59c36286 848 do {
6dd9a7c7
YS
849 large_page = 1;
850 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 851 if (!pte) {
6dd9a7c7 852 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
853 continue;
854 }
6dd9a7c7 855 do {
310a5ab9 856 dma_clear_pte(pte);
6dd9a7c7 857 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 858 pte++;
75e6bf96
DW
859 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
860
310a5ab9
DW
861 domain_flush_cache(domain, first_pte,
862 (void *)pte - (void *)first_pte);
59c36286
DW
863
864 } while (start_pfn && start_pfn <= last_pfn);
292827cb 865
5c645b35 866 return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH);
ba395927
KA
867}
868
3269ee0b
AW
869static void dma_pte_free_level(struct dmar_domain *domain, int level,
870 struct dma_pte *pte, unsigned long pfn,
871 unsigned long start_pfn, unsigned long last_pfn)
872{
873 pfn = max(start_pfn, pfn);
874 pte = &pte[pfn_level_offset(pfn, level)];
875
876 do {
877 unsigned long level_pfn;
878 struct dma_pte *level_pte;
879
880 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
881 goto next;
882
883 level_pfn = pfn & level_mask(level - 1);
884 level_pte = phys_to_virt(dma_pte_addr(pte));
885
886 if (level > 2)
887 dma_pte_free_level(domain, level - 1, level_pte,
888 level_pfn, start_pfn, last_pfn);
889
890 /* If range covers entire pagetable, free it */
891 if (!(start_pfn > level_pfn ||
892 last_pfn < level_pfn + level_size(level))) {
893 dma_clear_pte(pte);
894 domain_flush_cache(domain, pte, sizeof(*pte));
895 free_pgtable_page(level_pte);
896 }
897next:
898 pfn += level_size(level);
899 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
900}
901
ba395927
KA
902/* free page table pages. last level pte should already be cleared */
903static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
904 unsigned long start_pfn,
905 unsigned long last_pfn)
ba395927 906{
6660c63a 907 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 908
6660c63a
DW
909 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
910 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 911 BUG_ON(start_pfn > last_pfn);
ba395927 912
f3a0a52f 913 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
914 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
915 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 916
ba395927 917 /* free pgd */
d794dc9b 918 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
919 free_pgtable_page(domain->pgd);
920 domain->pgd = NULL;
921 }
922}
923
924/* iommu handling */
925static int iommu_alloc_root_entry(struct intel_iommu *iommu)
926{
927 struct root_entry *root;
928 unsigned long flags;
929
4c923d47 930 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
931 if (!root)
932 return -ENOMEM;
933
5b6985ce 934 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
935
936 spin_lock_irqsave(&iommu->lock, flags);
937 iommu->root_entry = root;
938 spin_unlock_irqrestore(&iommu->lock, flags);
939
940 return 0;
941}
942
ba395927
KA
943static void iommu_set_root_entry(struct intel_iommu *iommu)
944{
945 void *addr;
c416daa9 946 u32 sts;
ba395927
KA
947 unsigned long flag;
948
949 addr = iommu->root_entry;
950
1f5b3c3f 951 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
952 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
953
c416daa9 954 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
955
956 /* Make sure hardware complete it */
957 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 958 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 959
1f5b3c3f 960 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
961}
962
963static void iommu_flush_write_buffer(struct intel_iommu *iommu)
964{
965 u32 val;
966 unsigned long flag;
967
9af88143 968 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 969 return;
ba395927 970
1f5b3c3f 971 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 972 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
973
974 /* Make sure hardware complete it */
975 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 976 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 977
1f5b3c3f 978 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
979}
980
981/* return value determine if we need a write buffer flush */
4c25a2c1
DW
982static void __iommu_flush_context(struct intel_iommu *iommu,
983 u16 did, u16 source_id, u8 function_mask,
984 u64 type)
ba395927
KA
985{
986 u64 val = 0;
987 unsigned long flag;
988
ba395927
KA
989 switch (type) {
990 case DMA_CCMD_GLOBAL_INVL:
991 val = DMA_CCMD_GLOBAL_INVL;
992 break;
993 case DMA_CCMD_DOMAIN_INVL:
994 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
995 break;
996 case DMA_CCMD_DEVICE_INVL:
997 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
998 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
999 break;
1000 default:
1001 BUG();
1002 }
1003 val |= DMA_CCMD_ICC;
1004
1f5b3c3f 1005 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1006 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1007
1008 /* Make sure hardware complete it */
1009 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1010 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1011
1f5b3c3f 1012 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1013}
1014
ba395927 1015/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1016static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1017 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1018{
1019 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1020 u64 val = 0, val_iva = 0;
1021 unsigned long flag;
1022
ba395927
KA
1023 switch (type) {
1024 case DMA_TLB_GLOBAL_FLUSH:
1025 /* global flush doesn't need set IVA_REG */
1026 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1027 break;
1028 case DMA_TLB_DSI_FLUSH:
1029 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1030 break;
1031 case DMA_TLB_PSI_FLUSH:
1032 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1033 /* Note: always flush non-leaf currently */
1034 val_iva = size_order | addr;
1035 break;
1036 default:
1037 BUG();
1038 }
1039 /* Note: set drain read/write */
1040#if 0
1041 /*
1042 * This is probably to be super secure.. Looks like we can
1043 * ignore it without any impact.
1044 */
1045 if (cap_read_drain(iommu->cap))
1046 val |= DMA_TLB_READ_DRAIN;
1047#endif
1048 if (cap_write_drain(iommu->cap))
1049 val |= DMA_TLB_WRITE_DRAIN;
1050
1f5b3c3f 1051 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1052 /* Note: Only uses first TLB reg currently */
1053 if (val_iva)
1054 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1055 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1056
1057 /* Make sure hardware complete it */
1058 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1059 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1060
1f5b3c3f 1061 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1062
1063 /* check IOTLB invalidation granularity */
1064 if (DMA_TLB_IAIG(val) == 0)
1065 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1066 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1067 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1068 (unsigned long long)DMA_TLB_IIRG(type),
1069 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1070}
1071
93a23a72
YZ
1072static struct device_domain_info *iommu_support_dev_iotlb(
1073 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1074{
1075 int found = 0;
1076 unsigned long flags;
1077 struct device_domain_info *info;
1078 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1079
1080 if (!ecap_dev_iotlb_support(iommu->ecap))
1081 return NULL;
1082
1083 if (!iommu->qi)
1084 return NULL;
1085
1086 spin_lock_irqsave(&device_domain_lock, flags);
1087 list_for_each_entry(info, &domain->devices, link)
1088 if (info->bus == bus && info->devfn == devfn) {
1089 found = 1;
1090 break;
1091 }
1092 spin_unlock_irqrestore(&device_domain_lock, flags);
1093
1094 if (!found || !info->dev)
1095 return NULL;
1096
1097 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1098 return NULL;
1099
1100 if (!dmar_find_matched_atsr_unit(info->dev))
1101 return NULL;
1102
1103 info->iommu = iommu;
1104
1105 return info;
1106}
1107
1108static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1109{
93a23a72
YZ
1110 if (!info)
1111 return;
1112
1113 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1114}
1115
1116static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1117{
1118 if (!info->dev || !pci_ats_enabled(info->dev))
1119 return;
1120
1121 pci_disable_ats(info->dev);
1122}
1123
1124static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1125 u64 addr, unsigned mask)
1126{
1127 u16 sid, qdep;
1128 unsigned long flags;
1129 struct device_domain_info *info;
1130
1131 spin_lock_irqsave(&device_domain_lock, flags);
1132 list_for_each_entry(info, &domain->devices, link) {
1133 if (!info->dev || !pci_ats_enabled(info->dev))
1134 continue;
1135
1136 sid = info->bus << 8 | info->devfn;
1137 qdep = pci_ats_queue_depth(info->dev);
1138 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1139 }
1140 spin_unlock_irqrestore(&device_domain_lock, flags);
1141}
1142
1f0ef2aa 1143static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
82653633 1144 unsigned long pfn, unsigned int pages, int map)
ba395927 1145{
9dd2fe89 1146 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1147 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1148
ba395927
KA
1149 BUG_ON(pages == 0);
1150
ba395927 1151 /*
9dd2fe89
YZ
1152 * Fallback to domain selective flush if no PSI support or the size is
1153 * too big.
ba395927
KA
1154 * PSI requires page size to be 2 ^ x, and the base address is naturally
1155 * aligned to the size
1156 */
9dd2fe89
YZ
1157 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1158 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1159 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1160 else
1161 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1162 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1163
1164 /*
82653633
NA
1165 * In caching mode, changes of pages from non-present to present require
1166 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1167 */
82653633 1168 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1169 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1170}
1171
f8bab735 1172static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1173{
1174 u32 pmen;
1175 unsigned long flags;
1176
1f5b3c3f 1177 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1178 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1179 pmen &= ~DMA_PMEN_EPM;
1180 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1181
1182 /* wait for the protected region status bit to clear */
1183 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1184 readl, !(pmen & DMA_PMEN_PRS), pmen);
1185
1f5b3c3f 1186 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1187}
1188
ba395927
KA
1189static int iommu_enable_translation(struct intel_iommu *iommu)
1190{
1191 u32 sts;
1192 unsigned long flags;
1193
1f5b3c3f 1194 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1195 iommu->gcmd |= DMA_GCMD_TE;
1196 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1197
1198 /* Make sure hardware complete it */
1199 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1200 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1201
1f5b3c3f 1202 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1203 return 0;
1204}
1205
1206static int iommu_disable_translation(struct intel_iommu *iommu)
1207{
1208 u32 sts;
1209 unsigned long flag;
1210
1f5b3c3f 1211 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1212 iommu->gcmd &= ~DMA_GCMD_TE;
1213 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1214
1215 /* Make sure hardware complete it */
1216 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1217 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1218
1f5b3c3f 1219 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1220 return 0;
1221}
1222
3460a6d9 1223
ba395927
KA
1224static int iommu_init_domains(struct intel_iommu *iommu)
1225{
1226 unsigned long ndomains;
1227 unsigned long nlongs;
1228
1229 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1230 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1231 iommu->seq_id, ndomains);
ba395927
KA
1232 nlongs = BITS_TO_LONGS(ndomains);
1233
94a91b50
DD
1234 spin_lock_init(&iommu->lock);
1235
ba395927
KA
1236 /* TBD: there might be 64K domains,
1237 * consider other allocation for future chip
1238 */
1239 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1240 if (!iommu->domain_ids) {
852bdb04
JL
1241 pr_err("IOMMU%d: allocating domain id array failed\n",
1242 iommu->seq_id);
ba395927
KA
1243 return -ENOMEM;
1244 }
1245 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1246 GFP_KERNEL);
1247 if (!iommu->domains) {
852bdb04
JL
1248 pr_err("IOMMU%d: allocating domain array failed\n",
1249 iommu->seq_id);
1250 kfree(iommu->domain_ids);
1251 iommu->domain_ids = NULL;
ba395927
KA
1252 return -ENOMEM;
1253 }
1254
1255 /*
1256 * if Caching mode is set, then invalid translations are tagged
1257 * with domainid 0. Hence we need to pre-allocate it.
1258 */
1259 if (cap_caching_mode(iommu->cap))
1260 set_bit(0, iommu->domain_ids);
1261 return 0;
1262}
ba395927 1263
ba395927
KA
1264
1265static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1266static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1267
1268void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1269{
1270 struct dmar_domain *domain;
1271 int i;
c7151a8d 1272 unsigned long flags;
ba395927 1273
94a91b50 1274 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1275 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
94a91b50
DD
1276 domain = iommu->domains[i];
1277 clear_bit(i, iommu->domain_ids);
1278
1279 spin_lock_irqsave(&domain->iommu_lock, flags);
1280 if (--domain->iommu_count == 0) {
1281 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1282 vm_domain_exit(domain);
1283 else
1284 domain_exit(domain);
1285 }
1286 spin_unlock_irqrestore(&domain->iommu_lock, flags);
5e98c4b1 1287 }
ba395927
KA
1288 }
1289
1290 if (iommu->gcmd & DMA_GCMD_TE)
1291 iommu_disable_translation(iommu);
1292
1293 if (iommu->irq) {
ba395927
KA
1294 /* This will mask the irq */
1295 free_irq(iommu->irq, iommu);
b5f36d9e 1296 irq_set_handler_data(iommu->irq, NULL);
ba395927
KA
1297 destroy_irq(iommu->irq);
1298 }
1299
1300 kfree(iommu->domains);
1301 kfree(iommu->domain_ids);
1302
d9630fe9
WH
1303 g_iommus[iommu->seq_id] = NULL;
1304
1305 /* if all iommus are freed, free g_iommus */
1306 for (i = 0; i < g_num_of_iommus; i++) {
1307 if (g_iommus[i])
1308 break;
1309 }
1310
1311 if (i == g_num_of_iommus)
1312 kfree(g_iommus);
1313
ba395927
KA
1314 /* free context mapping */
1315 free_context_table(iommu);
ba395927
KA
1316}
1317
2c2e2c38 1318static struct dmar_domain *alloc_domain(void)
ba395927 1319{
ba395927 1320 struct dmar_domain *domain;
ba395927
KA
1321
1322 domain = alloc_domain_mem();
1323 if (!domain)
1324 return NULL;
1325
4c923d47 1326 domain->nid = -1;
1b198bb0 1327 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
2c2e2c38
FY
1328 domain->flags = 0;
1329
1330 return domain;
1331}
1332
1333static int iommu_attach_domain(struct dmar_domain *domain,
1334 struct intel_iommu *iommu)
1335{
1336 int num;
1337 unsigned long ndomains;
1338 unsigned long flags;
1339
ba395927
KA
1340 ndomains = cap_ndoms(iommu->cap);
1341
1342 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1343
ba395927
KA
1344 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1345 if (num >= ndomains) {
1346 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1347 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1348 return -ENOMEM;
ba395927
KA
1349 }
1350
ba395927 1351 domain->id = num;
2c2e2c38 1352 set_bit(num, iommu->domain_ids);
1b198bb0 1353 set_bit(iommu->seq_id, domain->iommu_bmp);
ba395927
KA
1354 iommu->domains[num] = domain;
1355 spin_unlock_irqrestore(&iommu->lock, flags);
1356
2c2e2c38 1357 return 0;
ba395927
KA
1358}
1359
2c2e2c38
FY
1360static void iommu_detach_domain(struct dmar_domain *domain,
1361 struct intel_iommu *iommu)
ba395927
KA
1362{
1363 unsigned long flags;
2c2e2c38
FY
1364 int num, ndomains;
1365 int found = 0;
ba395927 1366
8c11e798 1367 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1368 ndomains = cap_ndoms(iommu->cap);
a45946ab 1369 for_each_set_bit(num, iommu->domain_ids, ndomains) {
2c2e2c38
FY
1370 if (iommu->domains[num] == domain) {
1371 found = 1;
1372 break;
1373 }
2c2e2c38
FY
1374 }
1375
1376 if (found) {
1377 clear_bit(num, iommu->domain_ids);
1b198bb0 1378 clear_bit(iommu->seq_id, domain->iommu_bmp);
2c2e2c38
FY
1379 iommu->domains[num] = NULL;
1380 }
8c11e798 1381 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1382}
1383
1384static struct iova_domain reserved_iova_list;
8a443df4 1385static struct lock_class_key reserved_rbtree_key;
ba395927 1386
51a63e67 1387static int dmar_init_reserved_ranges(void)
ba395927
KA
1388{
1389 struct pci_dev *pdev = NULL;
1390 struct iova *iova;
1391 int i;
ba395927 1392
f661197e 1393 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1394
8a443df4
MG
1395 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1396 &reserved_rbtree_key);
1397
ba395927
KA
1398 /* IOAPIC ranges shouldn't be accessed by DMA */
1399 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1400 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1401 if (!iova) {
ba395927 1402 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1403 return -ENODEV;
1404 }
ba395927
KA
1405
1406 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1407 for_each_pci_dev(pdev) {
1408 struct resource *r;
1409
1410 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1411 r = &pdev->resource[i];
1412 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1413 continue;
1a4a4551
DW
1414 iova = reserve_iova(&reserved_iova_list,
1415 IOVA_PFN(r->start),
1416 IOVA_PFN(r->end));
51a63e67 1417 if (!iova) {
ba395927 1418 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1419 return -ENODEV;
1420 }
ba395927
KA
1421 }
1422 }
51a63e67 1423 return 0;
ba395927
KA
1424}
1425
1426static void domain_reserve_special_ranges(struct dmar_domain *domain)
1427{
1428 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1429}
1430
1431static inline int guestwidth_to_adjustwidth(int gaw)
1432{
1433 int agaw;
1434 int r = (gaw - 12) % 9;
1435
1436 if (r == 0)
1437 agaw = gaw;
1438 else
1439 agaw = gaw + 9 - r;
1440 if (agaw > 64)
1441 agaw = 64;
1442 return agaw;
1443}
1444
1445static int domain_init(struct dmar_domain *domain, int guest_width)
1446{
1447 struct intel_iommu *iommu;
1448 int adjust_width, agaw;
1449 unsigned long sagaw;
1450
f661197e 1451 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1452 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1453
1454 domain_reserve_special_ranges(domain);
1455
1456 /* calculate AGAW */
8c11e798 1457 iommu = domain_get_iommu(domain);
ba395927
KA
1458 if (guest_width > cap_mgaw(iommu->cap))
1459 guest_width = cap_mgaw(iommu->cap);
1460 domain->gaw = guest_width;
1461 adjust_width = guestwidth_to_adjustwidth(guest_width);
1462 agaw = width_to_agaw(adjust_width);
1463 sagaw = cap_sagaw(iommu->cap);
1464 if (!test_bit(agaw, &sagaw)) {
1465 /* hardware doesn't support it, choose a bigger one */
1466 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1467 agaw = find_next_bit(&sagaw, 5, agaw);
1468 if (agaw >= 5)
1469 return -ENODEV;
1470 }
1471 domain->agaw = agaw;
1472 INIT_LIST_HEAD(&domain->devices);
1473
8e604097
WH
1474 if (ecap_coherent(iommu->ecap))
1475 domain->iommu_coherency = 1;
1476 else
1477 domain->iommu_coherency = 0;
1478
58c610bd
SY
1479 if (ecap_sc_support(iommu->ecap))
1480 domain->iommu_snooping = 1;
1481 else
1482 domain->iommu_snooping = 0;
1483
6dd9a7c7 1484 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
c7151a8d 1485 domain->iommu_count = 1;
4c923d47 1486 domain->nid = iommu->node;
c7151a8d 1487
ba395927 1488 /* always allocate the top pgd */
4c923d47 1489 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1490 if (!domain->pgd)
1491 return -ENOMEM;
5b6985ce 1492 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1493 return 0;
1494}
1495
1496static void domain_exit(struct dmar_domain *domain)
1497{
2c2e2c38
FY
1498 struct dmar_drhd_unit *drhd;
1499 struct intel_iommu *iommu;
ba395927
KA
1500
1501 /* Domain 0 is reserved, so dont process it */
1502 if (!domain)
1503 return;
1504
7b668357
AW
1505 /* Flush any lazy unmaps that may reference this domain */
1506 if (!intel_iommu_strict)
1507 flush_unmaps_timeout(0);
1508
ba395927
KA
1509 domain_remove_dev_info(domain);
1510 /* destroy iovas */
1511 put_iova_domain(&domain->iovad);
ba395927
KA
1512
1513 /* clear ptes */
595badf5 1514 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1515
1516 /* free page tables */
d794dc9b 1517 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1518
2c2e2c38 1519 for_each_active_iommu(iommu, drhd)
1b198bb0 1520 if (test_bit(iommu->seq_id, domain->iommu_bmp))
2c2e2c38
FY
1521 iommu_detach_domain(domain, iommu);
1522
ba395927
KA
1523 free_domain_mem(domain);
1524}
1525
4ed0d3e6
FY
1526static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1527 u8 bus, u8 devfn, int translation)
ba395927
KA
1528{
1529 struct context_entry *context;
ba395927 1530 unsigned long flags;
5331fe6f 1531 struct intel_iommu *iommu;
ea6606b0
WH
1532 struct dma_pte *pgd;
1533 unsigned long num;
1534 unsigned long ndomains;
1535 int id;
1536 int agaw;
93a23a72 1537 struct device_domain_info *info = NULL;
ba395927
KA
1538
1539 pr_debug("Set context mapping for %02x:%02x.%d\n",
1540 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1541
ba395927 1542 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1543 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1544 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1545
276dbf99 1546 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1547 if (!iommu)
1548 return -ENODEV;
1549
ba395927
KA
1550 context = device_to_context_entry(iommu, bus, devfn);
1551 if (!context)
1552 return -ENOMEM;
1553 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1554 if (context_present(context)) {
ba395927
KA
1555 spin_unlock_irqrestore(&iommu->lock, flags);
1556 return 0;
1557 }
1558
ea6606b0
WH
1559 id = domain->id;
1560 pgd = domain->pgd;
1561
2c2e2c38
FY
1562 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1563 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1564 int found = 0;
1565
1566 /* find an available domain id for this device in iommu */
1567 ndomains = cap_ndoms(iommu->cap);
a45946ab 1568 for_each_set_bit(num, iommu->domain_ids, ndomains) {
ea6606b0
WH
1569 if (iommu->domains[num] == domain) {
1570 id = num;
1571 found = 1;
1572 break;
1573 }
ea6606b0
WH
1574 }
1575
1576 if (found == 0) {
1577 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1578 if (num >= ndomains) {
1579 spin_unlock_irqrestore(&iommu->lock, flags);
1580 printk(KERN_ERR "IOMMU: no free domain ids\n");
1581 return -EFAULT;
1582 }
1583
1584 set_bit(num, iommu->domain_ids);
1585 iommu->domains[num] = domain;
1586 id = num;
1587 }
1588
1589 /* Skip top levels of page tables for
1590 * iommu which has less agaw than default.
1672af11 1591 * Unnecessary for PT mode.
ea6606b0 1592 */
1672af11
CW
1593 if (translation != CONTEXT_TT_PASS_THROUGH) {
1594 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1595 pgd = phys_to_virt(dma_pte_addr(pgd));
1596 if (!dma_pte_present(pgd)) {
1597 spin_unlock_irqrestore(&iommu->lock, flags);
1598 return -ENOMEM;
1599 }
ea6606b0
WH
1600 }
1601 }
1602 }
1603
1604 context_set_domain_id(context, id);
4ed0d3e6 1605
93a23a72
YZ
1606 if (translation != CONTEXT_TT_PASS_THROUGH) {
1607 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1608 translation = info ? CONTEXT_TT_DEV_IOTLB :
1609 CONTEXT_TT_MULTI_LEVEL;
1610 }
4ed0d3e6
FY
1611 /*
1612 * In pass through mode, AW must be programmed to indicate the largest
1613 * AGAW value supported by hardware. And ASR is ignored by hardware.
1614 */
93a23a72 1615 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1616 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1617 else {
1618 context_set_address_root(context, virt_to_phys(pgd));
1619 context_set_address_width(context, iommu->agaw);
1620 }
4ed0d3e6
FY
1621
1622 context_set_translation_type(context, translation);
c07e7d21
MM
1623 context_set_fault_enable(context);
1624 context_set_present(context);
5331fe6f 1625 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1626
4c25a2c1
DW
1627 /*
1628 * It's a non-present to present mapping. If hardware doesn't cache
1629 * non-present entry we only need to flush the write-buffer. If the
1630 * _does_ cache non-present entries, then it does so in the special
1631 * domain #0, which we have to flush:
1632 */
1633 if (cap_caching_mode(iommu->cap)) {
1634 iommu->flush.flush_context(iommu, 0,
1635 (((u16)bus) << 8) | devfn,
1636 DMA_CCMD_MASK_NOBIT,
1637 DMA_CCMD_DEVICE_INVL);
82653633 1638 iommu->flush.flush_iotlb(iommu, domain->id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1639 } else {
ba395927 1640 iommu_flush_write_buffer(iommu);
4c25a2c1 1641 }
93a23a72 1642 iommu_enable_dev_iotlb(info);
ba395927 1643 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1644
1645 spin_lock_irqsave(&domain->iommu_lock, flags);
1b198bb0 1646 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
c7151a8d 1647 domain->iommu_count++;
4c923d47
SS
1648 if (domain->iommu_count == 1)
1649 domain->nid = iommu->node;
58c610bd 1650 domain_update_iommu_cap(domain);
c7151a8d
WH
1651 }
1652 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1653 return 0;
1654}
1655
1656static int
4ed0d3e6
FY
1657domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1658 int translation)
ba395927
KA
1659{
1660 int ret;
1661 struct pci_dev *tmp, *parent;
1662
276dbf99 1663 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1664 pdev->bus->number, pdev->devfn,
1665 translation);
ba395927
KA
1666 if (ret)
1667 return ret;
1668
1669 /* dependent device mapping */
1670 tmp = pci_find_upstream_pcie_bridge(pdev);
1671 if (!tmp)
1672 return 0;
1673 /* Secondary interface's bus number and devfn 0 */
1674 parent = pdev->bus->self;
1675 while (parent != tmp) {
276dbf99
DW
1676 ret = domain_context_mapping_one(domain,
1677 pci_domain_nr(parent->bus),
1678 parent->bus->number,
4ed0d3e6 1679 parent->devfn, translation);
ba395927
KA
1680 if (ret)
1681 return ret;
1682 parent = parent->bus->self;
1683 }
45e829ea 1684 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
ba395927 1685 return domain_context_mapping_one(domain,
276dbf99 1686 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1687 tmp->subordinate->number, 0,
1688 translation);
ba395927
KA
1689 else /* this is a legacy PCI bridge */
1690 return domain_context_mapping_one(domain,
276dbf99
DW
1691 pci_domain_nr(tmp->bus),
1692 tmp->bus->number,
4ed0d3e6
FY
1693 tmp->devfn,
1694 translation);
ba395927
KA
1695}
1696
5331fe6f 1697static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1698{
1699 int ret;
1700 struct pci_dev *tmp, *parent;
5331fe6f
WH
1701 struct intel_iommu *iommu;
1702
276dbf99
DW
1703 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1704 pdev->devfn);
5331fe6f
WH
1705 if (!iommu)
1706 return -ENODEV;
ba395927 1707
276dbf99 1708 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1709 if (!ret)
1710 return ret;
1711 /* dependent device mapping */
1712 tmp = pci_find_upstream_pcie_bridge(pdev);
1713 if (!tmp)
1714 return ret;
1715 /* Secondary interface's bus number and devfn 0 */
1716 parent = pdev->bus->self;
1717 while (parent != tmp) {
8c11e798 1718 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1719 parent->devfn);
ba395927
KA
1720 if (!ret)
1721 return ret;
1722 parent = parent->bus->self;
1723 }
5f4d91a1 1724 if (pci_is_pcie(tmp))
276dbf99
DW
1725 return device_context_mapped(iommu, tmp->subordinate->number,
1726 0);
ba395927 1727 else
276dbf99
DW
1728 return device_context_mapped(iommu, tmp->bus->number,
1729 tmp->devfn);
ba395927
KA
1730}
1731
f532959b
FY
1732/* Returns a number of VTD pages, but aligned to MM page size */
1733static inline unsigned long aligned_nrpages(unsigned long host_addr,
1734 size_t size)
1735{
1736 host_addr &= ~PAGE_MASK;
1737 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1738}
1739
6dd9a7c7
YS
1740/* Return largest possible superpage level for a given mapping */
1741static inline int hardware_largepage_caps(struct dmar_domain *domain,
1742 unsigned long iov_pfn,
1743 unsigned long phy_pfn,
1744 unsigned long pages)
1745{
1746 int support, level = 1;
1747 unsigned long pfnmerge;
1748
1749 support = domain->iommu_superpage;
1750
1751 /* To use a large page, the virtual *and* physical addresses
1752 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1753 of them will mean we have to use smaller pages. So just
1754 merge them and check both at once. */
1755 pfnmerge = iov_pfn | phy_pfn;
1756
1757 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1758 pages >>= VTD_STRIDE_SHIFT;
1759 if (!pages)
1760 break;
1761 pfnmerge >>= VTD_STRIDE_SHIFT;
1762 level++;
1763 support--;
1764 }
1765 return level;
1766}
1767
9051aa02
DW
1768static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1769 struct scatterlist *sg, unsigned long phys_pfn,
1770 unsigned long nr_pages, int prot)
e1605495
DW
1771{
1772 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1773 phys_addr_t uninitialized_var(pteval);
e1605495 1774 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1775 unsigned long sg_res;
6dd9a7c7
YS
1776 unsigned int largepage_lvl = 0;
1777 unsigned long lvl_pages = 0;
e1605495
DW
1778
1779 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1780
1781 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1782 return -EINVAL;
1783
1784 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1785
9051aa02
DW
1786 if (sg)
1787 sg_res = 0;
1788 else {
1789 sg_res = nr_pages + 1;
1790 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1791 }
1792
6dd9a7c7 1793 while (nr_pages > 0) {
c85994e4
DW
1794 uint64_t tmp;
1795
e1605495 1796 if (!sg_res) {
f532959b 1797 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1798 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1799 sg->dma_length = sg->length;
1800 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 1801 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 1802 }
6dd9a7c7 1803
e1605495 1804 if (!pte) {
6dd9a7c7
YS
1805 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
1806
1807 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl);
e1605495
DW
1808 if (!pte)
1809 return -ENOMEM;
6dd9a7c7 1810 /* It is large page*/
6491d4d0 1811 if (largepage_lvl > 1) {
6dd9a7c7 1812 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
1813 /* Ensure that old small page tables are removed to make room
1814 for superpage, if they exist. */
1815 dma_pte_clear_range(domain, iov_pfn,
1816 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1817 dma_pte_free_pagetable(domain, iov_pfn,
1818 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
1819 } else {
6dd9a7c7 1820 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 1821 }
6dd9a7c7 1822
e1605495
DW
1823 }
1824 /* We don't need lock here, nobody else
1825 * touches the iova range
1826 */
7766a3fb 1827 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1828 if (tmp) {
1bf20f0d 1829 static int dumps = 5;
c85994e4
DW
1830 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1831 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1832 if (dumps) {
1833 dumps--;
1834 debug_dma_dump_mappings(NULL);
1835 }
1836 WARN_ON(1);
1837 }
6dd9a7c7
YS
1838
1839 lvl_pages = lvl_to_nr_pages(largepage_lvl);
1840
1841 BUG_ON(nr_pages < lvl_pages);
1842 BUG_ON(sg_res < lvl_pages);
1843
1844 nr_pages -= lvl_pages;
1845 iov_pfn += lvl_pages;
1846 phys_pfn += lvl_pages;
1847 pteval += lvl_pages * VTD_PAGE_SIZE;
1848 sg_res -= lvl_pages;
1849
1850 /* If the next PTE would be the first in a new page, then we
1851 need to flush the cache on the entries we've just written.
1852 And then we'll need to recalculate 'pte', so clear it and
1853 let it get set again in the if (!pte) block above.
1854
1855 If we're done (!nr_pages) we need to flush the cache too.
1856
1857 Also if we've been setting superpages, we may need to
1858 recalculate 'pte' and switch back to smaller pages for the
1859 end of the mapping, if the trailing size is not enough to
1860 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 1861 pte++;
6dd9a7c7
YS
1862 if (!nr_pages || first_pte_in_page(pte) ||
1863 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
1864 domain_flush_cache(domain, first_pte,
1865 (void *)pte - (void *)first_pte);
1866 pte = NULL;
1867 }
6dd9a7c7
YS
1868
1869 if (!sg_res && nr_pages)
e1605495
DW
1870 sg = sg_next(sg);
1871 }
1872 return 0;
1873}
1874
9051aa02
DW
1875static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1876 struct scatterlist *sg, unsigned long nr_pages,
1877 int prot)
ba395927 1878{
9051aa02
DW
1879 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1880}
6f6a00e4 1881
9051aa02
DW
1882static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1883 unsigned long phys_pfn, unsigned long nr_pages,
1884 int prot)
1885{
1886 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1887}
1888
c7151a8d 1889static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1890{
c7151a8d
WH
1891 if (!iommu)
1892 return;
8c11e798
WH
1893
1894 clear_context_table(iommu, bus, devfn);
1895 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1896 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1897 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1898}
1899
109b9b04
DW
1900static inline void unlink_domain_info(struct device_domain_info *info)
1901{
1902 assert_spin_locked(&device_domain_lock);
1903 list_del(&info->link);
1904 list_del(&info->global);
1905 if (info->dev)
1906 info->dev->dev.archdata.iommu = NULL;
1907}
1908
ba395927
KA
1909static void domain_remove_dev_info(struct dmar_domain *domain)
1910{
1911 struct device_domain_info *info;
1912 unsigned long flags;
c7151a8d 1913 struct intel_iommu *iommu;
ba395927
KA
1914
1915 spin_lock_irqsave(&device_domain_lock, flags);
1916 while (!list_empty(&domain->devices)) {
1917 info = list_entry(domain->devices.next,
1918 struct device_domain_info, link);
109b9b04 1919 unlink_domain_info(info);
ba395927
KA
1920 spin_unlock_irqrestore(&device_domain_lock, flags);
1921
93a23a72 1922 iommu_disable_dev_iotlb(info);
276dbf99 1923 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1924 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1925 free_devinfo_mem(info);
1926
1927 spin_lock_irqsave(&device_domain_lock, flags);
1928 }
1929 spin_unlock_irqrestore(&device_domain_lock, flags);
1930}
1931
1932/*
1933 * find_domain
358dd8ac 1934 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1935 */
38717946 1936static struct dmar_domain *
ba395927
KA
1937find_domain(struct pci_dev *pdev)
1938{
1939 struct device_domain_info *info;
1940
1941 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1942 info = pdev->dev.archdata.iommu;
ba395927
KA
1943 if (info)
1944 return info->domain;
1945 return NULL;
1946}
1947
ba395927
KA
1948/* domain is initialized */
1949static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1950{
1951 struct dmar_domain *domain, *found = NULL;
1952 struct intel_iommu *iommu;
1953 struct dmar_drhd_unit *drhd;
1954 struct device_domain_info *info, *tmp;
1955 struct pci_dev *dev_tmp;
1956 unsigned long flags;
1957 int bus = 0, devfn = 0;
276dbf99 1958 int segment;
2c2e2c38 1959 int ret;
ba395927
KA
1960
1961 domain = find_domain(pdev);
1962 if (domain)
1963 return domain;
1964
276dbf99
DW
1965 segment = pci_domain_nr(pdev->bus);
1966
ba395927
KA
1967 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1968 if (dev_tmp) {
5f4d91a1 1969 if (pci_is_pcie(dev_tmp)) {
ba395927
KA
1970 bus = dev_tmp->subordinate->number;
1971 devfn = 0;
1972 } else {
1973 bus = dev_tmp->bus->number;
1974 devfn = dev_tmp->devfn;
1975 }
1976 spin_lock_irqsave(&device_domain_lock, flags);
1977 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1978 if (info->segment == segment &&
1979 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1980 found = info->domain;
1981 break;
1982 }
1983 }
1984 spin_unlock_irqrestore(&device_domain_lock, flags);
1985 /* pcie-pci bridge already has a domain, uses it */
1986 if (found) {
1987 domain = found;
1988 goto found_domain;
1989 }
1990 }
1991
2c2e2c38
FY
1992 domain = alloc_domain();
1993 if (!domain)
1994 goto error;
1995
ba395927
KA
1996 /* Allocate new domain for the device */
1997 drhd = dmar_find_matched_drhd_unit(pdev);
1998 if (!drhd) {
1999 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
2000 pci_name(pdev));
d2900bd6 2001 free_domain_mem(domain);
ba395927
KA
2002 return NULL;
2003 }
2004 iommu = drhd->iommu;
2005
2c2e2c38
FY
2006 ret = iommu_attach_domain(domain, iommu);
2007 if (ret) {
2fe9723d 2008 free_domain_mem(domain);
ba395927 2009 goto error;
2c2e2c38 2010 }
ba395927
KA
2011
2012 if (domain_init(domain, gaw)) {
2013 domain_exit(domain);
2014 goto error;
2015 }
2016
2017 /* register pcie-to-pci device */
2018 if (dev_tmp) {
2019 info = alloc_devinfo_mem();
2020 if (!info) {
2021 domain_exit(domain);
2022 goto error;
2023 }
276dbf99 2024 info->segment = segment;
ba395927
KA
2025 info->bus = bus;
2026 info->devfn = devfn;
2027 info->dev = NULL;
2028 info->domain = domain;
2029 /* This domain is shared by devices under p2p bridge */
3b5410e7 2030 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
2031
2032 /* pcie-to-pci bridge already has a domain, uses it */
2033 found = NULL;
2034 spin_lock_irqsave(&device_domain_lock, flags);
2035 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
2036 if (tmp->segment == segment &&
2037 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
2038 found = tmp->domain;
2039 break;
2040 }
2041 }
2042 if (found) {
00dfff77 2043 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
2044 free_devinfo_mem(info);
2045 domain_exit(domain);
2046 domain = found;
2047 } else {
2048 list_add(&info->link, &domain->devices);
2049 list_add(&info->global, &device_domain_list);
00dfff77 2050 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2051 }
ba395927
KA
2052 }
2053
2054found_domain:
2055 info = alloc_devinfo_mem();
2056 if (!info)
2057 goto error;
276dbf99 2058 info->segment = segment;
ba395927
KA
2059 info->bus = pdev->bus->number;
2060 info->devfn = pdev->devfn;
2061 info->dev = pdev;
2062 info->domain = domain;
2063 spin_lock_irqsave(&device_domain_lock, flags);
2064 /* somebody is fast */
2065 found = find_domain(pdev);
2066 if (found != NULL) {
2067 spin_unlock_irqrestore(&device_domain_lock, flags);
2068 if (found != domain) {
2069 domain_exit(domain);
2070 domain = found;
2071 }
2072 free_devinfo_mem(info);
2073 return domain;
2074 }
2075 list_add(&info->link, &domain->devices);
2076 list_add(&info->global, &device_domain_list);
358dd8ac 2077 pdev->dev.archdata.iommu = info;
ba395927
KA
2078 spin_unlock_irqrestore(&device_domain_lock, flags);
2079 return domain;
2080error:
2081 /* recheck it here, maybe others set it */
2082 return find_domain(pdev);
2083}
2084
2c2e2c38 2085static int iommu_identity_mapping;
e0fc7e0b
DW
2086#define IDENTMAP_ALL 1
2087#define IDENTMAP_GFX 2
2088#define IDENTMAP_AZALIA 4
2c2e2c38 2089
b213203e
DW
2090static int iommu_domain_identity_map(struct dmar_domain *domain,
2091 unsigned long long start,
2092 unsigned long long end)
ba395927 2093{
c5395d5c
DW
2094 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2095 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2096
2097 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2098 dma_to_mm_pfn(last_vpfn))) {
ba395927 2099 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2100 return -ENOMEM;
ba395927
KA
2101 }
2102
c5395d5c
DW
2103 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2104 start, end, domain->id);
ba395927
KA
2105 /*
2106 * RMRR range might have overlap with physical memory range,
2107 * clear it first
2108 */
c5395d5c 2109 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2110
c5395d5c
DW
2111 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2112 last_vpfn - first_vpfn + 1,
61df7443 2113 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2114}
2115
2116static int iommu_prepare_identity_map(struct pci_dev *pdev,
2117 unsigned long long start,
2118 unsigned long long end)
2119{
2120 struct dmar_domain *domain;
2121 int ret;
2122
c7ab48d2 2123 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2124 if (!domain)
2125 return -ENOMEM;
2126
19943b0e
DW
2127 /* For _hardware_ passthrough, don't bother. But for software
2128 passthrough, we do it anyway -- it may indicate a memory
2129 range which is reserved in E820, so which didn't get set
2130 up to start with in si_domain */
2131 if (domain == si_domain && hw_pass_through) {
2132 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2133 pci_name(pdev), start, end);
2134 return 0;
2135 }
2136
2137 printk(KERN_INFO
2138 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2139 pci_name(pdev), start, end);
2ff729f5 2140
5595b528
DW
2141 if (end < start) {
2142 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2143 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2144 dmi_get_system_info(DMI_BIOS_VENDOR),
2145 dmi_get_system_info(DMI_BIOS_VERSION),
2146 dmi_get_system_info(DMI_PRODUCT_VERSION));
2147 ret = -EIO;
2148 goto error;
2149 }
2150
2ff729f5
DW
2151 if (end >> agaw_to_width(domain->agaw)) {
2152 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2153 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2154 agaw_to_width(domain->agaw),
2155 dmi_get_system_info(DMI_BIOS_VENDOR),
2156 dmi_get_system_info(DMI_BIOS_VERSION),
2157 dmi_get_system_info(DMI_PRODUCT_VERSION));
2158 ret = -EIO;
2159 goto error;
2160 }
19943b0e 2161
b213203e 2162 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2163 if (ret)
2164 goto error;
2165
2166 /* context entry init */
4ed0d3e6 2167 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2168 if (ret)
2169 goto error;
2170
2171 return 0;
2172
2173 error:
ba395927
KA
2174 domain_exit(domain);
2175 return ret;
ba395927
KA
2176}
2177
2178static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2179 struct pci_dev *pdev)
2180{
358dd8ac 2181 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2182 return 0;
2183 return iommu_prepare_identity_map(pdev, rmrr->base_address,
70e535d1 2184 rmrr->end_address);
ba395927
KA
2185}
2186
d3f13810 2187#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2188static inline void iommu_prepare_isa(void)
2189{
2190 struct pci_dev *pdev;
2191 int ret;
2192
2193 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2194 if (!pdev)
2195 return;
2196
c7ab48d2 2197 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
70e535d1 2198 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1);
49a0429e
KA
2199
2200 if (ret)
c7ab48d2
DW
2201 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2202 "floppy might not work\n");
49a0429e
KA
2203
2204}
2205#else
2206static inline void iommu_prepare_isa(void)
2207{
2208 return;
2209}
d3f13810 2210#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2211
2c2e2c38 2212static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2213
071e1374 2214static int __init si_domain_init(int hw)
2c2e2c38
FY
2215{
2216 struct dmar_drhd_unit *drhd;
2217 struct intel_iommu *iommu;
c7ab48d2 2218 int nid, ret = 0;
2c2e2c38
FY
2219
2220 si_domain = alloc_domain();
2221 if (!si_domain)
2222 return -EFAULT;
2223
2c2e2c38
FY
2224 for_each_active_iommu(iommu, drhd) {
2225 ret = iommu_attach_domain(si_domain, iommu);
2226 if (ret) {
2227 domain_exit(si_domain);
2228 return -EFAULT;
2229 }
2230 }
2231
2232 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2233 domain_exit(si_domain);
2234 return -EFAULT;
2235 }
2236
2237 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
9544c003
JL
2238 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2239 si_domain->id);
2c2e2c38 2240
19943b0e
DW
2241 if (hw)
2242 return 0;
2243
c7ab48d2 2244 for_each_online_node(nid) {
5dfe8660
TH
2245 unsigned long start_pfn, end_pfn;
2246 int i;
2247
2248 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2249 ret = iommu_domain_identity_map(si_domain,
2250 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2251 if (ret)
2252 return ret;
2253 }
c7ab48d2
DW
2254 }
2255
2c2e2c38
FY
2256 return 0;
2257}
2258
2259static void domain_remove_one_dev_info(struct dmar_domain *domain,
2260 struct pci_dev *pdev);
2261static int identity_mapping(struct pci_dev *pdev)
2262{
2263 struct device_domain_info *info;
2264
2265 if (likely(!iommu_identity_mapping))
2266 return 0;
2267
cb452a40
MT
2268 info = pdev->dev.archdata.iommu;
2269 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2270 return (info->domain == si_domain);
2c2e2c38 2271
2c2e2c38
FY
2272 return 0;
2273}
2274
2275static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2276 struct pci_dev *pdev,
2277 int translation)
2c2e2c38
FY
2278{
2279 struct device_domain_info *info;
2280 unsigned long flags;
5fe60f4e 2281 int ret;
2c2e2c38
FY
2282
2283 info = alloc_devinfo_mem();
2284 if (!info)
2285 return -ENOMEM;
2286
2287 info->segment = pci_domain_nr(pdev->bus);
2288 info->bus = pdev->bus->number;
2289 info->devfn = pdev->devfn;
2290 info->dev = pdev;
2291 info->domain = domain;
2292
2293 spin_lock_irqsave(&device_domain_lock, flags);
2294 list_add(&info->link, &domain->devices);
2295 list_add(&info->global, &device_domain_list);
2296 pdev->dev.archdata.iommu = info;
2297 spin_unlock_irqrestore(&device_domain_lock, flags);
2298
e2ad23d0
DW
2299 ret = domain_context_mapping(domain, pdev, translation);
2300 if (ret) {
2301 spin_lock_irqsave(&device_domain_lock, flags);
109b9b04 2302 unlink_domain_info(info);
e2ad23d0
DW
2303 spin_unlock_irqrestore(&device_domain_lock, flags);
2304 free_devinfo_mem(info);
2305 return ret;
2306 }
2307
2c2e2c38
FY
2308 return 0;
2309}
2310
ea2447f7
TM
2311static bool device_has_rmrr(struct pci_dev *dev)
2312{
2313 struct dmar_rmrr_unit *rmrr;
2314 int i;
2315
2316 for_each_rmrr_units(rmrr) {
2317 for (i = 0; i < rmrr->devices_cnt; i++) {
2318 /*
2319 * Return TRUE if this RMRR contains the device that
2320 * is passed in.
2321 */
2322 if (rmrr->devices[i] == dev)
2323 return true;
2324 }
2325 }
2326 return false;
2327}
2328
6941af28
DW
2329static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2330{
ea2447f7
TM
2331
2332 /*
2333 * We want to prevent any device associated with an RMRR from
2334 * getting placed into the SI Domain. This is done because
2335 * problems exist when devices are moved in and out of domains
2336 * and their respective RMRR info is lost. We exempt USB devices
2337 * from this process due to their usage of RMRRs that are known
2338 * to not be needed after BIOS hand-off to OS.
2339 */
2340 if (device_has_rmrr(pdev) &&
2341 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2342 return 0;
2343
e0fc7e0b
DW
2344 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2345 return 1;
2346
2347 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2348 return 1;
2349
2350 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2351 return 0;
6941af28 2352
3dfc813d
DW
2353 /*
2354 * We want to start off with all devices in the 1:1 domain, and
2355 * take them out later if we find they can't access all of memory.
2356 *
2357 * However, we can't do this for PCI devices behind bridges,
2358 * because all PCI devices behind the same bridge will end up
2359 * with the same source-id on their transactions.
2360 *
2361 * Practically speaking, we can't change things around for these
2362 * devices at run-time, because we can't be sure there'll be no
2363 * DMA transactions in flight for any of their siblings.
2364 *
2365 * So PCI devices (unless they're on the root bus) as well as
2366 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2367 * the 1:1 domain, just in _case_ one of their siblings turns out
2368 * not to be able to map all of memory.
2369 */
5f4d91a1 2370 if (!pci_is_pcie(pdev)) {
3dfc813d
DW
2371 if (!pci_is_root_bus(pdev->bus))
2372 return 0;
2373 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2374 return 0;
62f87c0e 2375 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d
DW
2376 return 0;
2377
2378 /*
2379 * At boot time, we don't yet know if devices will be 64-bit capable.
2380 * Assume that they will -- if they turn out not to be, then we can
2381 * take them out of the 1:1 domain later.
2382 */
8fcc5372
CW
2383 if (!startup) {
2384 /*
2385 * If the device's dma_mask is less than the system's memory
2386 * size then this is not a candidate for identity mapping.
2387 */
2388 u64 dma_mask = pdev->dma_mask;
2389
2390 if (pdev->dev.coherent_dma_mask &&
2391 pdev->dev.coherent_dma_mask < dma_mask)
2392 dma_mask = pdev->dev.coherent_dma_mask;
2393
2394 return dma_mask >= dma_get_required_mask(&pdev->dev);
2395 }
6941af28
DW
2396
2397 return 1;
2398}
2399
071e1374 2400static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2401{
2c2e2c38
FY
2402 struct pci_dev *pdev = NULL;
2403 int ret;
2404
19943b0e 2405 ret = si_domain_init(hw);
2c2e2c38
FY
2406 if (ret)
2407 return -EFAULT;
2408
2c2e2c38 2409 for_each_pci_dev(pdev) {
6941af28 2410 if (iommu_should_identity_map(pdev, 1)) {
5fe60f4e 2411 ret = domain_add_dev_info(si_domain, pdev,
eae460b6
MT
2412 hw ? CONTEXT_TT_PASS_THROUGH :
2413 CONTEXT_TT_MULTI_LEVEL);
2414 if (ret) {
2415 /* device not associated with an iommu */
2416 if (ret == -ENODEV)
2417 continue;
62edf5dc 2418 return ret;
eae460b6
MT
2419 }
2420 pr_info("IOMMU: %s identity mapping for device %s\n",
2421 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2422 }
2c2e2c38
FY
2423 }
2424
2425 return 0;
2426}
2427
b779260b 2428static int __init init_dmars(void)
ba395927
KA
2429{
2430 struct dmar_drhd_unit *drhd;
2431 struct dmar_rmrr_unit *rmrr;
2432 struct pci_dev *pdev;
2433 struct intel_iommu *iommu;
9d783ba0 2434 int i, ret;
2c2e2c38 2435
ba395927
KA
2436 /*
2437 * for each drhd
2438 * allocate root
2439 * initialize and program root entry to not present
2440 * endfor
2441 */
2442 for_each_drhd_unit(drhd) {
5e0d2a6f 2443 /*
2444 * lock not needed as this is only incremented in the single
2445 * threaded kernel __init code path all other access are read
2446 * only
2447 */
1b198bb0
MT
2448 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2449 g_num_of_iommus++;
2450 continue;
2451 }
2452 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2453 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2454 }
2455
d9630fe9
WH
2456 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2457 GFP_KERNEL);
2458 if (!g_iommus) {
2459 printk(KERN_ERR "Allocating global iommu array failed\n");
2460 ret = -ENOMEM;
2461 goto error;
2462 }
2463
80b20dd8 2464 deferred_flush = kzalloc(g_num_of_iommus *
2465 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2466 if (!deferred_flush) {
5e0d2a6f 2467 ret = -ENOMEM;
2468 goto error;
2469 }
2470
7c919779 2471 for_each_active_iommu(iommu, drhd) {
d9630fe9 2472 g_iommus[iommu->seq_id] = iommu;
ba395927 2473
e61d98d8
SS
2474 ret = iommu_init_domains(iommu);
2475 if (ret)
2476 goto error;
2477
ba395927
KA
2478 /*
2479 * TBD:
2480 * we could share the same root & context tables
25985edc 2481 * among all IOMMU's. Need to Split it later.
ba395927
KA
2482 */
2483 ret = iommu_alloc_root_entry(iommu);
2484 if (ret) {
2485 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2486 goto error;
2487 }
4ed0d3e6 2488 if (!ecap_pass_through(iommu->ecap))
19943b0e 2489 hw_pass_through = 0;
ba395927
KA
2490 }
2491
1531a6a6
SS
2492 /*
2493 * Start from the sane iommu hardware state.
2494 */
7c919779 2495 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2496 /*
2497 * If the queued invalidation is already initialized by us
2498 * (for example, while enabling interrupt-remapping) then
2499 * we got the things already rolling from a sane state.
2500 */
2501 if (iommu->qi)
2502 continue;
2503
2504 /*
2505 * Clear any previous faults.
2506 */
2507 dmar_fault(-1, iommu);
2508 /*
2509 * Disable queued invalidation if supported and already enabled
2510 * before OS handover.
2511 */
2512 dmar_disable_qi(iommu);
2513 }
2514
7c919779 2515 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2516 if (dmar_enable_qi(iommu)) {
2517 /*
2518 * Queued Invalidate not enabled, use Register Based
2519 * Invalidate
2520 */
2521 iommu->flush.flush_context = __iommu_flush_context;
2522 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2523 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2524 "invalidation\n",
680a7524 2525 iommu->seq_id,
b4e0f9eb 2526 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2527 } else {
2528 iommu->flush.flush_context = qi_flush_context;
2529 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2530 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2531 "invalidation\n",
680a7524 2532 iommu->seq_id,
b4e0f9eb 2533 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2534 }
2535 }
2536
19943b0e 2537 if (iommu_pass_through)
e0fc7e0b
DW
2538 iommu_identity_mapping |= IDENTMAP_ALL;
2539
d3f13810 2540#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2541 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2542#endif
e0fc7e0b
DW
2543
2544 check_tylersburg_isoch();
2545
ba395927 2546 /*
19943b0e
DW
2547 * If pass through is not set or not enabled, setup context entries for
2548 * identity mappings for rmrr, gfx, and isa and may fall back to static
2549 * identity mapping if iommu_identity_mapping is set.
ba395927 2550 */
19943b0e
DW
2551 if (iommu_identity_mapping) {
2552 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2553 if (ret) {
19943b0e
DW
2554 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2555 goto error;
ba395927
KA
2556 }
2557 }
ba395927 2558 /*
19943b0e
DW
2559 * For each rmrr
2560 * for each dev attached to rmrr
2561 * do
2562 * locate drhd for dev, alloc domain for dev
2563 * allocate free domain
2564 * allocate page table entries for rmrr
2565 * if context not allocated for bus
2566 * allocate and init context
2567 * set present in root table for this bus
2568 * init context with domain, translation etc
2569 * endfor
2570 * endfor
ba395927 2571 */
19943b0e
DW
2572 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2573 for_each_rmrr_units(rmrr) {
2574 for (i = 0; i < rmrr->devices_cnt; i++) {
2575 pdev = rmrr->devices[i];
2576 /*
2577 * some BIOS lists non-exist devices in DMAR
2578 * table.
2579 */
2580 if (!pdev)
2581 continue;
2582 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2583 if (ret)
2584 printk(KERN_ERR
2585 "IOMMU: mapping reserved region failed\n");
ba395927 2586 }
4ed0d3e6 2587 }
49a0429e 2588
19943b0e
DW
2589 iommu_prepare_isa();
2590
ba395927
KA
2591 /*
2592 * for each drhd
2593 * enable fault log
2594 * global invalidate context cache
2595 * global invalidate iotlb
2596 * enable translation
2597 */
7c919779 2598 for_each_iommu(iommu, drhd) {
51a63e67
JC
2599 if (drhd->ignored) {
2600 /*
2601 * we always have to disable PMRs or DMA may fail on
2602 * this device
2603 */
2604 if (force_on)
7c919779 2605 iommu_disable_protect_mem_regions(iommu);
ba395927 2606 continue;
51a63e67 2607 }
ba395927
KA
2608
2609 iommu_flush_write_buffer(iommu);
2610
3460a6d9
KA
2611 ret = dmar_set_interrupt(iommu);
2612 if (ret)
2613 goto error;
2614
ba395927
KA
2615 iommu_set_root_entry(iommu);
2616
4c25a2c1 2617 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2618 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2619
ba395927
KA
2620 ret = iommu_enable_translation(iommu);
2621 if (ret)
2622 goto error;
b94996c9
DW
2623
2624 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2625 }
2626
2627 return 0;
2628error:
7c919779 2629 for_each_active_iommu(iommu, drhd)
ba395927 2630 free_iommu(iommu);
d9630fe9 2631 kfree(g_iommus);
ba395927
KA
2632 return ret;
2633}
2634
5a5e02a6 2635/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2636static struct iova *intel_alloc_iova(struct device *dev,
2637 struct dmar_domain *domain,
2638 unsigned long nrpages, uint64_t dma_mask)
ba395927 2639{
ba395927 2640 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2641 struct iova *iova = NULL;
ba395927 2642
875764de
DW
2643 /* Restrict dma_mask to the width that the iommu can handle */
2644 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2645
2646 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2647 /*
2648 * First try to allocate an io virtual address in
284901a9 2649 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2650 * from higher range
ba395927 2651 */
875764de
DW
2652 iova = alloc_iova(&domain->iovad, nrpages,
2653 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2654 if (iova)
2655 return iova;
2656 }
2657 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2658 if (unlikely(!iova)) {
2659 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2660 nrpages, pci_name(pdev));
f76aec76
KA
2661 return NULL;
2662 }
2663
2664 return iova;
2665}
2666
147202aa 2667static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2668{
2669 struct dmar_domain *domain;
2670 int ret;
2671
2672 domain = get_domain_for_dev(pdev,
2673 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2674 if (!domain) {
2675 printk(KERN_ERR
2676 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2677 return NULL;
ba395927
KA
2678 }
2679
2680 /* make sure context mapping is ok */
5331fe6f 2681 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2682 ret = domain_context_mapping(domain, pdev,
2683 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2684 if (ret) {
2685 printk(KERN_ERR
2686 "Domain context map for %s failed",
2687 pci_name(pdev));
4fe05bbc 2688 return NULL;
f76aec76 2689 }
ba395927
KA
2690 }
2691
f76aec76
KA
2692 return domain;
2693}
2694
147202aa
DW
2695static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2696{
2697 struct device_domain_info *info;
2698
2699 /* No lock here, assumes no domain exit in normal case */
2700 info = dev->dev.archdata.iommu;
2701 if (likely(info))
2702 return info->domain;
2703
2704 return __get_valid_domain_for_dev(dev);
2705}
2706
2c2e2c38
FY
2707static int iommu_dummy(struct pci_dev *pdev)
2708{
2709 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2710}
2711
2712/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2713static int iommu_no_mapping(struct device *dev)
2c2e2c38 2714{
73676832 2715 struct pci_dev *pdev;
2c2e2c38
FY
2716 int found;
2717
dbad0864 2718 if (unlikely(!dev_is_pci(dev)))
73676832
DW
2719 return 1;
2720
2721 pdev = to_pci_dev(dev);
1e4c64c4
DW
2722 if (iommu_dummy(pdev))
2723 return 1;
2724
2c2e2c38 2725 if (!iommu_identity_mapping)
1e4c64c4 2726 return 0;
2c2e2c38
FY
2727
2728 found = identity_mapping(pdev);
2729 if (found) {
6941af28 2730 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2731 return 1;
2732 else {
2733 /*
2734 * 32 bit DMA is removed from si_domain and fall back
2735 * to non-identity mapping.
2736 */
2737 domain_remove_one_dev_info(si_domain, pdev);
2738 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2739 pci_name(pdev));
2740 return 0;
2741 }
2742 } else {
2743 /*
2744 * In case of a detached 64 bit DMA device from vm, the device
2745 * is put into si_domain for identity mapping.
2746 */
6941af28 2747 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2748 int ret;
5fe60f4e
DW
2749 ret = domain_add_dev_info(si_domain, pdev,
2750 hw_pass_through ?
2751 CONTEXT_TT_PASS_THROUGH :
2752 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2753 if (!ret) {
2754 printk(KERN_INFO "64bit %s uses identity mapping\n",
2755 pci_name(pdev));
2756 return 1;
2757 }
2758 }
2759 }
2760
1e4c64c4 2761 return 0;
2c2e2c38
FY
2762}
2763
bb9e6d65
FT
2764static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2765 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2766{
2767 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2768 struct dmar_domain *domain;
5b6985ce 2769 phys_addr_t start_paddr;
f76aec76
KA
2770 struct iova *iova;
2771 int prot = 0;
6865f0d1 2772 int ret;
8c11e798 2773 struct intel_iommu *iommu;
33041ec0 2774 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2775
2776 BUG_ON(dir == DMA_NONE);
2c2e2c38 2777
73676832 2778 if (iommu_no_mapping(hwdev))
6865f0d1 2779 return paddr;
f76aec76
KA
2780
2781 domain = get_valid_domain_for_dev(pdev);
2782 if (!domain)
2783 return 0;
2784
8c11e798 2785 iommu = domain_get_iommu(domain);
88cb6a74 2786 size = aligned_nrpages(paddr, size);
f76aec76 2787
c681d0ba 2788 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
2789 if (!iova)
2790 goto error;
2791
ba395927
KA
2792 /*
2793 * Check if DMAR supports zero-length reads on write only
2794 * mappings..
2795 */
2796 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2797 !cap_zlr(iommu->cap))
ba395927
KA
2798 prot |= DMA_PTE_READ;
2799 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2800 prot |= DMA_PTE_WRITE;
2801 /*
6865f0d1 2802 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2803 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2804 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2805 * is not a big problem
2806 */
0ab36de2 2807 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2808 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2809 if (ret)
2810 goto error;
2811
1f0ef2aa
DW
2812 /* it's a non-present to present mapping. Only flush if caching mode */
2813 if (cap_caching_mode(iommu->cap))
82653633 2814 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1);
1f0ef2aa 2815 else
8c11e798 2816 iommu_flush_write_buffer(iommu);
f76aec76 2817
03d6a246
DW
2818 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2819 start_paddr += paddr & ~PAGE_MASK;
2820 return start_paddr;
ba395927 2821
ba395927 2822error:
f76aec76
KA
2823 if (iova)
2824 __free_iova(&domain->iovad, iova);
4cf2e75d 2825 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2826 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2827 return 0;
2828}
2829
ffbbef5c
FT
2830static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2831 unsigned long offset, size_t size,
2832 enum dma_data_direction dir,
2833 struct dma_attrs *attrs)
bb9e6d65 2834{
ffbbef5c
FT
2835 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2836 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2837}
2838
5e0d2a6f 2839static void flush_unmaps(void)
2840{
80b20dd8 2841 int i, j;
5e0d2a6f 2842
5e0d2a6f 2843 timer_on = 0;
2844
2845 /* just flush them all */
2846 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2847 struct intel_iommu *iommu = g_iommus[i];
2848 if (!iommu)
2849 continue;
c42d9f32 2850
9dd2fe89
YZ
2851 if (!deferred_flush[i].next)
2852 continue;
2853
78d5f0f5
NA
2854 /* In caching mode, global flushes turn emulation expensive */
2855 if (!cap_caching_mode(iommu->cap))
2856 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2857 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2858 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2859 unsigned long mask;
2860 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
2861 struct dmar_domain *domain = deferred_flush[i].domain[j];
2862
2863 /* On real hardware multiple invalidations are expensive */
2864 if (cap_caching_mode(iommu->cap))
2865 iommu_flush_iotlb_psi(iommu, domain->id,
2866 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0);
2867 else {
2868 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
2869 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
2870 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
2871 }
93a23a72 2872 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2873 }
9dd2fe89 2874 deferred_flush[i].next = 0;
5e0d2a6f 2875 }
2876
5e0d2a6f 2877 list_size = 0;
5e0d2a6f 2878}
2879
2880static void flush_unmaps_timeout(unsigned long data)
2881{
80b20dd8 2882 unsigned long flags;
2883
2884 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2885 flush_unmaps();
80b20dd8 2886 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2887}
2888
2889static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2890{
2891 unsigned long flags;
80b20dd8 2892 int next, iommu_id;
8c11e798 2893 struct intel_iommu *iommu;
5e0d2a6f 2894
2895 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2896 if (list_size == HIGH_WATER_MARK)
2897 flush_unmaps();
2898
8c11e798
WH
2899 iommu = domain_get_iommu(dom);
2900 iommu_id = iommu->seq_id;
c42d9f32 2901
80b20dd8 2902 next = deferred_flush[iommu_id].next;
2903 deferred_flush[iommu_id].domain[next] = dom;
2904 deferred_flush[iommu_id].iova[next] = iova;
2905 deferred_flush[iommu_id].next++;
5e0d2a6f 2906
2907 if (!timer_on) {
2908 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2909 timer_on = 1;
2910 }
2911 list_size++;
2912 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2913}
2914
ffbbef5c
FT
2915static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2916 size_t size, enum dma_data_direction dir,
2917 struct dma_attrs *attrs)
ba395927 2918{
ba395927 2919 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2920 struct dmar_domain *domain;
d794dc9b 2921 unsigned long start_pfn, last_pfn;
ba395927 2922 struct iova *iova;
8c11e798 2923 struct intel_iommu *iommu;
ba395927 2924
73676832 2925 if (iommu_no_mapping(dev))
f76aec76 2926 return;
2c2e2c38 2927
ba395927
KA
2928 domain = find_domain(pdev);
2929 BUG_ON(!domain);
2930
8c11e798
WH
2931 iommu = domain_get_iommu(domain);
2932
ba395927 2933 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2934 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2935 (unsigned long long)dev_addr))
ba395927 2936 return;
ba395927 2937
d794dc9b
DW
2938 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2939 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2940
d794dc9b
DW
2941 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2942 pci_name(pdev), start_pfn, last_pfn);
ba395927 2943
f76aec76 2944 /* clear the whole page */
d794dc9b
DW
2945 dma_pte_clear_range(domain, start_pfn, last_pfn);
2946
f76aec76 2947 /* free page tables */
d794dc9b
DW
2948 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2949
5e0d2a6f 2950 if (intel_iommu_strict) {
03d6a246 2951 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 2952 last_pfn - start_pfn + 1, 0);
5e0d2a6f 2953 /* free iova */
2954 __free_iova(&domain->iovad, iova);
2955 } else {
2956 add_unmap(domain, iova);
2957 /*
2958 * queue up the release of the unmap to save the 1/6th of the
2959 * cpu used up by the iotlb flush operation...
2960 */
5e0d2a6f 2961 }
ba395927
KA
2962}
2963
d7ab5c46 2964static void *intel_alloc_coherent(struct device *hwdev, size_t size,
baa676fc
AP
2965 dma_addr_t *dma_handle, gfp_t flags,
2966 struct dma_attrs *attrs)
ba395927
KA
2967{
2968 void *vaddr;
2969 int order;
2970
5b6985ce 2971 size = PAGE_ALIGN(size);
ba395927 2972 order = get_order(size);
e8bb910d
AW
2973
2974 if (!iommu_no_mapping(hwdev))
2975 flags &= ~(GFP_DMA | GFP_DMA32);
2976 else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) {
2977 if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32))
2978 flags |= GFP_DMA;
2979 else
2980 flags |= GFP_DMA32;
2981 }
ba395927
KA
2982
2983 vaddr = (void *)__get_free_pages(flags, order);
2984 if (!vaddr)
2985 return NULL;
2986 memset(vaddr, 0, size);
2987
bb9e6d65
FT
2988 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2989 DMA_BIDIRECTIONAL,
2990 hwdev->coherent_dma_mask);
ba395927
KA
2991 if (*dma_handle)
2992 return vaddr;
2993 free_pages((unsigned long)vaddr, order);
2994 return NULL;
2995}
2996
d7ab5c46 2997static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
baa676fc 2998 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
2999{
3000 int order;
3001
5b6985ce 3002 size = PAGE_ALIGN(size);
ba395927
KA
3003 order = get_order(size);
3004
0db9b7ae 3005 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
3006 free_pages((unsigned long)vaddr, order);
3007}
3008
d7ab5c46
FT
3009static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
3010 int nelems, enum dma_data_direction dir,
3011 struct dma_attrs *attrs)
ba395927 3012{
ba395927
KA
3013 struct pci_dev *pdev = to_pci_dev(hwdev);
3014 struct dmar_domain *domain;
d794dc9b 3015 unsigned long start_pfn, last_pfn;
f76aec76 3016 struct iova *iova;
8c11e798 3017 struct intel_iommu *iommu;
ba395927 3018
73676832 3019 if (iommu_no_mapping(hwdev))
ba395927
KA
3020 return;
3021
3022 domain = find_domain(pdev);
8c11e798
WH
3023 BUG_ON(!domain);
3024
3025 iommu = domain_get_iommu(domain);
ba395927 3026
c03ab37c 3027 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3028 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3029 (unsigned long long)sglist[0].dma_address))
f76aec76 3030 return;
f76aec76 3031
d794dc9b
DW
3032 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3033 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
3034
3035 /* clear the whole page */
d794dc9b
DW
3036 dma_pte_clear_range(domain, start_pfn, last_pfn);
3037
f76aec76 3038 /* free page tables */
d794dc9b 3039 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 3040
acea0018
DW
3041 if (intel_iommu_strict) {
3042 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
82653633 3043 last_pfn - start_pfn + 1, 0);
acea0018
DW
3044 /* free iova */
3045 __free_iova(&domain->iovad, iova);
3046 } else {
3047 add_unmap(domain, iova);
3048 /*
3049 * queue up the release of the unmap to save the 1/6th of the
3050 * cpu used up by the iotlb flush operation...
3051 */
3052 }
ba395927
KA
3053}
3054
ba395927 3055static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3056 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3057{
3058 int i;
c03ab37c 3059 struct scatterlist *sg;
ba395927 3060
c03ab37c 3061 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3062 BUG_ON(!sg_page(sg));
4cf2e75d 3063 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3064 sg->dma_length = sg->length;
ba395927
KA
3065 }
3066 return nelems;
3067}
3068
d7ab5c46
FT
3069static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
3070 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3071{
ba395927 3072 int i;
ba395927
KA
3073 struct pci_dev *pdev = to_pci_dev(hwdev);
3074 struct dmar_domain *domain;
f76aec76
KA
3075 size_t size = 0;
3076 int prot = 0;
f76aec76
KA
3077 struct iova *iova = NULL;
3078 int ret;
c03ab37c 3079 struct scatterlist *sg;
b536d24d 3080 unsigned long start_vpfn;
8c11e798 3081 struct intel_iommu *iommu;
ba395927
KA
3082
3083 BUG_ON(dir == DMA_NONE);
73676832 3084 if (iommu_no_mapping(hwdev))
c03ab37c 3085 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 3086
f76aec76
KA
3087 domain = get_valid_domain_for_dev(pdev);
3088 if (!domain)
3089 return 0;
3090
8c11e798
WH
3091 iommu = domain_get_iommu(domain);
3092
b536d24d 3093 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3094 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3095
5a5e02a6
DW
3096 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
3097 pdev->dma_mask);
f76aec76 3098 if (!iova) {
c03ab37c 3099 sglist->dma_length = 0;
f76aec76
KA
3100 return 0;
3101 }
3102
3103 /*
3104 * Check if DMAR supports zero-length reads on write only
3105 * mappings..
3106 */
3107 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3108 !cap_zlr(iommu->cap))
f76aec76
KA
3109 prot |= DMA_PTE_READ;
3110 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3111 prot |= DMA_PTE_WRITE;
3112
b536d24d 3113 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3114
f532959b 3115 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3116 if (unlikely(ret)) {
3117 /* clear the page */
3118 dma_pte_clear_range(domain, start_vpfn,
3119 start_vpfn + size - 1);
3120 /* free page tables */
3121 dma_pte_free_pagetable(domain, start_vpfn,
3122 start_vpfn + size - 1);
3123 /* free iova */
3124 __free_iova(&domain->iovad, iova);
3125 return 0;
ba395927
KA
3126 }
3127
1f0ef2aa
DW
3128 /* it's a non-present to present mapping. Only flush if caching mode */
3129 if (cap_caching_mode(iommu->cap))
82653633 3130 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1);
1f0ef2aa 3131 else
8c11e798 3132 iommu_flush_write_buffer(iommu);
1f0ef2aa 3133
ba395927
KA
3134 return nelems;
3135}
3136
dfb805e8
FT
3137static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3138{
3139 return !dma_addr;
3140}
3141
160c1d8e 3142struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3143 .alloc = intel_alloc_coherent,
3144 .free = intel_free_coherent,
ba395927
KA
3145 .map_sg = intel_map_sg,
3146 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3147 .map_page = intel_map_page,
3148 .unmap_page = intel_unmap_page,
dfb805e8 3149 .mapping_error = intel_mapping_error,
ba395927
KA
3150};
3151
3152static inline int iommu_domain_cache_init(void)
3153{
3154 int ret = 0;
3155
3156 iommu_domain_cache = kmem_cache_create("iommu_domain",
3157 sizeof(struct dmar_domain),
3158 0,
3159 SLAB_HWCACHE_ALIGN,
3160
3161 NULL);
3162 if (!iommu_domain_cache) {
3163 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3164 ret = -ENOMEM;
3165 }
3166
3167 return ret;
3168}
3169
3170static inline int iommu_devinfo_cache_init(void)
3171{
3172 int ret = 0;
3173
3174 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3175 sizeof(struct device_domain_info),
3176 0,
3177 SLAB_HWCACHE_ALIGN,
ba395927
KA
3178 NULL);
3179 if (!iommu_devinfo_cache) {
3180 printk(KERN_ERR "Couldn't create devinfo cache\n");
3181 ret = -ENOMEM;
3182 }
3183
3184 return ret;
3185}
3186
3187static inline int iommu_iova_cache_init(void)
3188{
3189 int ret = 0;
3190
3191 iommu_iova_cache = kmem_cache_create("iommu_iova",
3192 sizeof(struct iova),
3193 0,
3194 SLAB_HWCACHE_ALIGN,
ba395927
KA
3195 NULL);
3196 if (!iommu_iova_cache) {
3197 printk(KERN_ERR "Couldn't create iova cache\n");
3198 ret = -ENOMEM;
3199 }
3200
3201 return ret;
3202}
3203
3204static int __init iommu_init_mempool(void)
3205{
3206 int ret;
3207 ret = iommu_iova_cache_init();
3208 if (ret)
3209 return ret;
3210
3211 ret = iommu_domain_cache_init();
3212 if (ret)
3213 goto domain_error;
3214
3215 ret = iommu_devinfo_cache_init();
3216 if (!ret)
3217 return ret;
3218
3219 kmem_cache_destroy(iommu_domain_cache);
3220domain_error:
3221 kmem_cache_destroy(iommu_iova_cache);
3222
3223 return -ENOMEM;
3224}
3225
3226static void __init iommu_exit_mempool(void)
3227{
3228 kmem_cache_destroy(iommu_devinfo_cache);
3229 kmem_cache_destroy(iommu_domain_cache);
3230 kmem_cache_destroy(iommu_iova_cache);
3231
3232}
3233
556ab45f
DW
3234static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3235{
3236 struct dmar_drhd_unit *drhd;
3237 u32 vtbar;
3238 int rc;
3239
3240 /* We know that this device on this chipset has its own IOMMU.
3241 * If we find it under a different IOMMU, then the BIOS is lying
3242 * to us. Hope that the IOMMU for this device is actually
3243 * disabled, and it needs no translation...
3244 */
3245 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3246 if (rc) {
3247 /* "can't" happen */
3248 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3249 return;
3250 }
3251 vtbar &= 0xffff0000;
3252
3253 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3254 drhd = dmar_find_matched_drhd_unit(pdev);
3255 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3256 TAINT_FIRMWARE_WORKAROUND,
3257 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3258 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3259}
3260DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3261
ba395927
KA
3262static void __init init_no_remapping_devices(void)
3263{
3264 struct dmar_drhd_unit *drhd;
3265
3266 for_each_drhd_unit(drhd) {
3267 if (!drhd->include_all) {
3268 int i;
3269 for (i = 0; i < drhd->devices_cnt; i++)
3270 if (drhd->devices[i] != NULL)
3271 break;
3272 /* ignore DMAR unit if no pci devices exist */
3273 if (i == drhd->devices_cnt)
3274 drhd->ignored = 1;
3275 }
3276 }
3277
7c919779 3278 for_each_active_drhd_unit(drhd) {
ba395927 3279 int i;
7c919779 3280 if (drhd->include_all)
ba395927
KA
3281 continue;
3282
3283 for (i = 0; i < drhd->devices_cnt; i++)
3284 if (drhd->devices[i] &&
c0771df8 3285 !IS_GFX_DEVICE(drhd->devices[i]))
ba395927
KA
3286 break;
3287
3288 if (i < drhd->devices_cnt)
3289 continue;
3290
c0771df8
DW
3291 /* This IOMMU has *only* gfx devices. Either bypass it or
3292 set the gfx_mapped flag, as appropriate */
3293 if (dmar_map_gfx) {
3294 intel_iommu_gfx_mapped = 1;
3295 } else {
3296 drhd->ignored = 1;
3297 for (i = 0; i < drhd->devices_cnt; i++) {
3298 if (!drhd->devices[i])
3299 continue;
3300 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3301 }
ba395927
KA
3302 }
3303 }
3304}
3305
f59c7b69
FY
3306#ifdef CONFIG_SUSPEND
3307static int init_iommu_hw(void)
3308{
3309 struct dmar_drhd_unit *drhd;
3310 struct intel_iommu *iommu = NULL;
3311
3312 for_each_active_iommu(iommu, drhd)
3313 if (iommu->qi)
3314 dmar_reenable_qi(iommu);
3315
b779260b
JC
3316 for_each_iommu(iommu, drhd) {
3317 if (drhd->ignored) {
3318 /*
3319 * we always have to disable PMRs or DMA may fail on
3320 * this device
3321 */
3322 if (force_on)
3323 iommu_disable_protect_mem_regions(iommu);
3324 continue;
3325 }
3326
f59c7b69
FY
3327 iommu_flush_write_buffer(iommu);
3328
3329 iommu_set_root_entry(iommu);
3330
3331 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3332 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3333 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3334 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3335 if (iommu_enable_translation(iommu))
3336 return 1;
b94996c9 3337 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3338 }
3339
3340 return 0;
3341}
3342
3343static void iommu_flush_all(void)
3344{
3345 struct dmar_drhd_unit *drhd;
3346 struct intel_iommu *iommu;
3347
3348 for_each_active_iommu(iommu, drhd) {
3349 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3350 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3351 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3352 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3353 }
3354}
3355
134fac3f 3356static int iommu_suspend(void)
f59c7b69
FY
3357{
3358 struct dmar_drhd_unit *drhd;
3359 struct intel_iommu *iommu = NULL;
3360 unsigned long flag;
3361
3362 for_each_active_iommu(iommu, drhd) {
3363 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3364 GFP_ATOMIC);
3365 if (!iommu->iommu_state)
3366 goto nomem;
3367 }
3368
3369 iommu_flush_all();
3370
3371 for_each_active_iommu(iommu, drhd) {
3372 iommu_disable_translation(iommu);
3373
1f5b3c3f 3374 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3375
3376 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3377 readl(iommu->reg + DMAR_FECTL_REG);
3378 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3379 readl(iommu->reg + DMAR_FEDATA_REG);
3380 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3381 readl(iommu->reg + DMAR_FEADDR_REG);
3382 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3383 readl(iommu->reg + DMAR_FEUADDR_REG);
3384
1f5b3c3f 3385 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3386 }
3387 return 0;
3388
3389nomem:
3390 for_each_active_iommu(iommu, drhd)
3391 kfree(iommu->iommu_state);
3392
3393 return -ENOMEM;
3394}
3395
134fac3f 3396static void iommu_resume(void)
f59c7b69
FY
3397{
3398 struct dmar_drhd_unit *drhd;
3399 struct intel_iommu *iommu = NULL;
3400 unsigned long flag;
3401
3402 if (init_iommu_hw()) {
b779260b
JC
3403 if (force_on)
3404 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3405 else
3406 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3407 return;
f59c7b69
FY
3408 }
3409
3410 for_each_active_iommu(iommu, drhd) {
3411
1f5b3c3f 3412 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3413
3414 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3415 iommu->reg + DMAR_FECTL_REG);
3416 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3417 iommu->reg + DMAR_FEDATA_REG);
3418 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3419 iommu->reg + DMAR_FEADDR_REG);
3420 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3421 iommu->reg + DMAR_FEUADDR_REG);
3422
1f5b3c3f 3423 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3424 }
3425
3426 for_each_active_iommu(iommu, drhd)
3427 kfree(iommu->iommu_state);
f59c7b69
FY
3428}
3429
134fac3f 3430static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3431 .resume = iommu_resume,
3432 .suspend = iommu_suspend,
3433};
3434
134fac3f 3435static void __init init_iommu_pm_ops(void)
f59c7b69 3436{
134fac3f 3437 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3438}
3439
3440#else
99592ba4 3441static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3442#endif /* CONFIG_PM */
3443
318fe7df
SS
3444LIST_HEAD(dmar_rmrr_units);
3445
3446static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
3447{
3448 list_add(&rmrr->list, &dmar_rmrr_units);
3449}
3450
3451
3452int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3453{
3454 struct acpi_dmar_reserved_memory *rmrr;
3455 struct dmar_rmrr_unit *rmrru;
3456
3457 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3458 if (!rmrru)
3459 return -ENOMEM;
3460
3461 rmrru->hdr = header;
3462 rmrr = (struct acpi_dmar_reserved_memory *)header;
3463 rmrru->base_address = rmrr->base_address;
3464 rmrru->end_address = rmrr->end_address;
3465
3466 dmar_register_rmrr_unit(rmrru);
3467 return 0;
3468}
3469
3470static int __init
3471rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
3472{
3473 struct acpi_dmar_reserved_memory *rmrr;
3474 int ret;
3475
3476 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
3477 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
3478 ((void *)rmrr) + rmrr->header.length,
3479 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
3480
3481 if (ret || (rmrru->devices_cnt == 0)) {
3482 list_del(&rmrru->list);
3483 kfree(rmrru);
3484 }
3485 return ret;
3486}
3487
3488static LIST_HEAD(dmar_atsr_units);
3489
3490int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3491{
3492 struct acpi_dmar_atsr *atsr;
3493 struct dmar_atsr_unit *atsru;
3494
3495 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3496 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3497 if (!atsru)
3498 return -ENOMEM;
3499
3500 atsru->hdr = hdr;
3501 atsru->include_all = atsr->flags & 0x1;
3502
3503 list_add(&atsru->list, &dmar_atsr_units);
3504
3505 return 0;
3506}
3507
3508static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
3509{
3510 int rc;
3511 struct acpi_dmar_atsr *atsr;
3512
3513 if (atsru->include_all)
3514 return 0;
3515
3516 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3517 rc = dmar_parse_dev_scope((void *)(atsr + 1),
3518 (void *)atsr + atsr->header.length,
3519 &atsru->devices_cnt, &atsru->devices,
3520 atsr->segment);
3521 if (rc || !atsru->devices_cnt) {
3522 list_del(&atsru->list);
3523 kfree(atsru);
3524 }
3525
3526 return rc;
3527}
3528
3529int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3530{
3531 int i;
3532 struct pci_bus *bus;
3533 struct acpi_dmar_atsr *atsr;
3534 struct dmar_atsr_unit *atsru;
3535
3536 dev = pci_physfn(dev);
3537
3538 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3539 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3540 if (atsr->segment == pci_domain_nr(dev->bus))
3541 goto found;
3542 }
3543
3544 return 0;
3545
3546found:
3547 for (bus = dev->bus; bus; bus = bus->parent) {
3548 struct pci_dev *bridge = bus->self;
3549
3550 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3551 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df
SS
3552 return 0;
3553
62f87c0e 3554 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) {
318fe7df
SS
3555 for (i = 0; i < atsru->devices_cnt; i++)
3556 if (atsru->devices[i] == bridge)
3557 return 1;
3558 break;
3559 }
3560 }
3561
3562 if (atsru->include_all)
3563 return 1;
3564
3565 return 0;
3566}
3567
c8f369ab 3568int __init dmar_parse_rmrr_atsr_dev(void)
318fe7df
SS
3569{
3570 struct dmar_rmrr_unit *rmrr, *rmrr_n;
3571 struct dmar_atsr_unit *atsr, *atsr_n;
3572 int ret = 0;
3573
3574 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
3575 ret = rmrr_parse_dev(rmrr);
3576 if (ret)
3577 return ret;
3578 }
3579
3580 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
3581 ret = atsr_parse_dev(atsr);
3582 if (ret)
3583 return ret;
3584 }
3585
3586 return ret;
3587}
3588
99dcaded
FY
3589/*
3590 * Here we only respond to action of unbound device from driver.
3591 *
3592 * Added device is not attached to its DMAR domain here yet. That will happen
3593 * when mapping the device to iova.
3594 */
3595static int device_notifier(struct notifier_block *nb,
3596 unsigned long action, void *data)
3597{
3598 struct device *dev = data;
3599 struct pci_dev *pdev = to_pci_dev(dev);
3600 struct dmar_domain *domain;
3601
44cd613c
DW
3602 if (iommu_no_mapping(dev))
3603 return 0;
3604
99dcaded
FY
3605 domain = find_domain(pdev);
3606 if (!domain)
3607 return 0;
3608
a97590e5 3609 if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) {
99dcaded
FY
3610 domain_remove_one_dev_info(domain, pdev);
3611
a97590e5
AW
3612 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3613 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) &&
3614 list_empty(&domain->devices))
3615 domain_exit(domain);
3616 }
3617
99dcaded
FY
3618 return 0;
3619}
3620
3621static struct notifier_block device_nb = {
3622 .notifier_call = device_notifier,
3623};
3624
ba395927
KA
3625int __init intel_iommu_init(void)
3626{
3627 int ret = 0;
3a93c841 3628 struct dmar_drhd_unit *drhd;
7c919779 3629 struct intel_iommu *iommu;
ba395927 3630
a59b50e9
JC
3631 /* VT-d is required for a TXT/tboot launch, so enforce that */
3632 force_on = tboot_force_iommu();
3633
3634 if (dmar_table_init()) {
3635 if (force_on)
3636 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3637 return -ENODEV;
a59b50e9 3638 }
ba395927 3639
3a93c841
TI
3640 /*
3641 * Disable translation if already enabled prior to OS handover.
3642 */
7c919779 3643 for_each_active_iommu(iommu, drhd)
3a93c841
TI
3644 if (iommu->gcmd & DMA_GCMD_TE)
3645 iommu_disable_translation(iommu);
3a93c841 3646
c2c7286a 3647 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
3648 if (force_on)
3649 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3650 return -ENODEV;
a59b50e9 3651 }
1886e8a9 3652
75f1cdf1 3653 if (no_iommu || dmar_disabled)
2ae21010
SS
3654 return -ENODEV;
3655
51a63e67
JC
3656 if (iommu_init_mempool()) {
3657 if (force_on)
3658 panic("tboot: Failed to initialize iommu memory\n");
3659 return -ENODEV;
3660 }
3661
318fe7df
SS
3662 if (list_empty(&dmar_rmrr_units))
3663 printk(KERN_INFO "DMAR: No RMRR found\n");
3664
3665 if (list_empty(&dmar_atsr_units))
3666 printk(KERN_INFO "DMAR: No ATSR found\n");
3667
51a63e67
JC
3668 if (dmar_init_reserved_ranges()) {
3669 if (force_on)
3670 panic("tboot: Failed to reserve iommu ranges\n");
3671 return -ENODEV;
3672 }
ba395927
KA
3673
3674 init_no_remapping_devices();
3675
b779260b 3676 ret = init_dmars();
ba395927 3677 if (ret) {
a59b50e9
JC
3678 if (force_on)
3679 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3680 printk(KERN_ERR "IOMMU: dmar init failed\n");
3681 put_iova_domain(&reserved_iova_list);
3682 iommu_exit_mempool();
3683 return ret;
3684 }
3685 printk(KERN_INFO
3686 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3687
5e0d2a6f 3688 init_timer(&unmap_timer);
75f1cdf1
FT
3689#ifdef CONFIG_SWIOTLB
3690 swiotlb = 0;
3691#endif
19943b0e 3692 dma_ops = &intel_dma_ops;
4ed0d3e6 3693
134fac3f 3694 init_iommu_pm_ops();
a8bcbb0d 3695
4236d97d 3696 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
a8bcbb0d 3697
99dcaded
FY
3698 bus_register_notifier(&pci_bus_type, &device_nb);
3699
8bc1f85c
ED
3700 intel_iommu_enabled = 1;
3701
ba395927
KA
3702 return 0;
3703}
e820482c 3704
3199aa6b
HW
3705static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3706 struct pci_dev *pdev)
3707{
3708 struct pci_dev *tmp, *parent;
3709
3710 if (!iommu || !pdev)
3711 return;
3712
3713 /* dependent device detach */
3714 tmp = pci_find_upstream_pcie_bridge(pdev);
3715 /* Secondary interface's bus number and devfn 0 */
3716 if (tmp) {
3717 parent = pdev->bus->self;
3718 while (parent != tmp) {
3719 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3720 parent->devfn);
3199aa6b
HW
3721 parent = parent->bus->self;
3722 }
45e829ea 3723 if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */
3199aa6b
HW
3724 iommu_detach_dev(iommu,
3725 tmp->subordinate->number, 0);
3726 else /* this is a legacy PCI bridge */
276dbf99
DW
3727 iommu_detach_dev(iommu, tmp->bus->number,
3728 tmp->devfn);
3199aa6b
HW
3729 }
3730}
3731
2c2e2c38 3732static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3733 struct pci_dev *pdev)
3734{
bca2b916 3735 struct device_domain_info *info, *tmp;
c7151a8d
WH
3736 struct intel_iommu *iommu;
3737 unsigned long flags;
3738 int found = 0;
c7151a8d 3739
276dbf99
DW
3740 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3741 pdev->devfn);
c7151a8d
WH
3742 if (!iommu)
3743 return;
3744
3745 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 3746 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
8519dc44
MH
3747 if (info->segment == pci_domain_nr(pdev->bus) &&
3748 info->bus == pdev->bus->number &&
c7151a8d 3749 info->devfn == pdev->devfn) {
109b9b04 3750 unlink_domain_info(info);
c7151a8d
WH
3751 spin_unlock_irqrestore(&device_domain_lock, flags);
3752
93a23a72 3753 iommu_disable_dev_iotlb(info);
c7151a8d 3754 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3755 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3756 free_devinfo_mem(info);
3757
3758 spin_lock_irqsave(&device_domain_lock, flags);
3759
3760 if (found)
3761 break;
3762 else
3763 continue;
3764 }
3765
3766 /* if there is no other devices under the same iommu
3767 * owned by this domain, clear this iommu in iommu_bmp
3768 * update iommu count and coherency
3769 */
276dbf99
DW
3770 if (iommu == device_to_iommu(info->segment, info->bus,
3771 info->devfn))
c7151a8d
WH
3772 found = 1;
3773 }
3774
3e7abe25
RD
3775 spin_unlock_irqrestore(&device_domain_lock, flags);
3776
c7151a8d
WH
3777 if (found == 0) {
3778 unsigned long tmp_flags;
3779 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
1b198bb0 3780 clear_bit(iommu->seq_id, domain->iommu_bmp);
c7151a8d 3781 domain->iommu_count--;
58c610bd 3782 domain_update_iommu_cap(domain);
c7151a8d 3783 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
a97590e5 3784
9b4554b2
AW
3785 if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) &&
3786 !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) {
3787 spin_lock_irqsave(&iommu->lock, tmp_flags);
3788 clear_bit(domain->id, iommu->domain_ids);
3789 iommu->domains[domain->id] = NULL;
3790 spin_unlock_irqrestore(&iommu->lock, tmp_flags);
3791 }
c7151a8d 3792 }
c7151a8d
WH
3793}
3794
3795static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3796{
3797 struct device_domain_info *info;
3798 struct intel_iommu *iommu;
3799 unsigned long flags1, flags2;
3800
3801 spin_lock_irqsave(&device_domain_lock, flags1);
3802 while (!list_empty(&domain->devices)) {
3803 info = list_entry(domain->devices.next,
3804 struct device_domain_info, link);
109b9b04 3805 unlink_domain_info(info);
c7151a8d
WH
3806 spin_unlock_irqrestore(&device_domain_lock, flags1);
3807
93a23a72 3808 iommu_disable_dev_iotlb(info);
276dbf99 3809 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3810 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3811 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3812
3813 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3814 * and capabilities
c7151a8d
WH
3815 */
3816 spin_lock_irqsave(&domain->iommu_lock, flags2);
3817 if (test_and_clear_bit(iommu->seq_id,
1b198bb0 3818 domain->iommu_bmp)) {
c7151a8d 3819 domain->iommu_count--;
58c610bd 3820 domain_update_iommu_cap(domain);
c7151a8d
WH
3821 }
3822 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3823
3824 free_devinfo_mem(info);
3825 spin_lock_irqsave(&device_domain_lock, flags1);
3826 }
3827 spin_unlock_irqrestore(&device_domain_lock, flags1);
3828}
3829
5e98c4b1 3830/* domain id for virtual machine, it won't be set in context */
18d99165 3831static atomic_t vm_domid = ATOMIC_INIT(0);
5e98c4b1
WH
3832
3833static struct dmar_domain *iommu_alloc_vm_domain(void)
3834{
3835 struct dmar_domain *domain;
3836
3837 domain = alloc_domain_mem();
3838 if (!domain)
3839 return NULL;
3840
18d99165 3841 domain->id = atomic_inc_return(&vm_domid);
4c923d47 3842 domain->nid = -1;
1b198bb0 3843 memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp));
5e98c4b1
WH
3844 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3845
3846 return domain;
3847}
3848
2c2e2c38 3849static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3850{
3851 int adjust_width;
3852
3853 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3854 spin_lock_init(&domain->iommu_lock);
3855
3856 domain_reserve_special_ranges(domain);
3857
3858 /* calculate AGAW */
3859 domain->gaw = guest_width;
3860 adjust_width = guestwidth_to_adjustwidth(guest_width);
3861 domain->agaw = width_to_agaw(adjust_width);
3862
3863 INIT_LIST_HEAD(&domain->devices);
3864
3865 domain->iommu_count = 0;
3866 domain->iommu_coherency = 0;
c5b15255 3867 domain->iommu_snooping = 0;
6dd9a7c7 3868 domain->iommu_superpage = 0;
fe40f1e0 3869 domain->max_addr = 0;
4c923d47 3870 domain->nid = -1;
5e98c4b1
WH
3871
3872 /* always allocate the top pgd */
4c923d47 3873 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3874 if (!domain->pgd)
3875 return -ENOMEM;
3876 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3877 return 0;
3878}
3879
3880static void iommu_free_vm_domain(struct dmar_domain *domain)
3881{
3882 unsigned long flags;
3883 struct dmar_drhd_unit *drhd;
3884 struct intel_iommu *iommu;
3885 unsigned long i;
3886 unsigned long ndomains;
3887
7c919779 3888 for_each_active_iommu(iommu, drhd) {
5e98c4b1 3889 ndomains = cap_ndoms(iommu->cap);
a45946ab 3890 for_each_set_bit(i, iommu->domain_ids, ndomains) {
5e98c4b1
WH
3891 if (iommu->domains[i] == domain) {
3892 spin_lock_irqsave(&iommu->lock, flags);
3893 clear_bit(i, iommu->domain_ids);
3894 iommu->domains[i] = NULL;
3895 spin_unlock_irqrestore(&iommu->lock, flags);
3896 break;
3897 }
5e98c4b1
WH
3898 }
3899 }
3900}
3901
3902static void vm_domain_exit(struct dmar_domain *domain)
3903{
5e98c4b1
WH
3904 /* Domain 0 is reserved, so dont process it */
3905 if (!domain)
3906 return;
3907
3908 vm_domain_remove_all_dev_info(domain);
3909 /* destroy iovas */
3910 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3911
3912 /* clear ptes */
595badf5 3913 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3914
3915 /* free page tables */
d794dc9b 3916 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3917
3918 iommu_free_vm_domain(domain);
3919 free_domain_mem(domain);
3920}
3921
5d450806 3922static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3923{
5d450806 3924 struct dmar_domain *dmar_domain;
38717946 3925
5d450806
JR
3926 dmar_domain = iommu_alloc_vm_domain();
3927 if (!dmar_domain) {
38717946 3928 printk(KERN_ERR
5d450806
JR
3929 "intel_iommu_domain_init: dmar_domain == NULL\n");
3930 return -ENOMEM;
38717946 3931 }
2c2e2c38 3932 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3933 printk(KERN_ERR
5d450806
JR
3934 "intel_iommu_domain_init() failed\n");
3935 vm_domain_exit(dmar_domain);
3936 return -ENOMEM;
38717946 3937 }
8140a95d 3938 domain_update_iommu_cap(dmar_domain);
5d450806 3939 domain->priv = dmar_domain;
faa3d6f5 3940
8a0e715b
JR
3941 domain->geometry.aperture_start = 0;
3942 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
3943 domain->geometry.force_aperture = true;
3944
5d450806 3945 return 0;
38717946 3946}
38717946 3947
5d450806 3948static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3949{
5d450806
JR
3950 struct dmar_domain *dmar_domain = domain->priv;
3951
3952 domain->priv = NULL;
3953 vm_domain_exit(dmar_domain);
38717946 3954}
38717946 3955
4c5478c9
JR
3956static int intel_iommu_attach_device(struct iommu_domain *domain,
3957 struct device *dev)
38717946 3958{
4c5478c9
JR
3959 struct dmar_domain *dmar_domain = domain->priv;
3960 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3961 struct intel_iommu *iommu;
3962 int addr_width;
faa3d6f5
WH
3963
3964 /* normally pdev is not mapped */
3965 if (unlikely(domain_context_mapped(pdev))) {
3966 struct dmar_domain *old_domain;
3967
3968 old_domain = find_domain(pdev);
3969 if (old_domain) {
2c2e2c38
FY
3970 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3971 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3972 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3973 else
3974 domain_remove_dev_info(old_domain);
3975 }
3976 }
3977
276dbf99
DW
3978 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3979 pdev->devfn);
fe40f1e0
WH
3980 if (!iommu)
3981 return -ENODEV;
3982
3983 /* check if this iommu agaw is sufficient for max mapped address */
3984 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
3985 if (addr_width > cap_mgaw(iommu->cap))
3986 addr_width = cap_mgaw(iommu->cap);
3987
3988 if (dmar_domain->max_addr > (1LL << addr_width)) {
3989 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 3990 "sufficient for the mapped address (%llx)\n",
a99c47a2 3991 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
3992 return -EFAULT;
3993 }
a99c47a2
TL
3994 dmar_domain->gaw = addr_width;
3995
3996 /*
3997 * Knock out extra levels of page tables if necessary
3998 */
3999 while (iommu->agaw < dmar_domain->agaw) {
4000 struct dma_pte *pte;
4001
4002 pte = dmar_domain->pgd;
4003 if (dma_pte_present(pte)) {
25cbff16
SY
4004 dmar_domain->pgd = (struct dma_pte *)
4005 phys_to_virt(dma_pte_addr(pte));
7a661013 4006 free_pgtable_page(pte);
a99c47a2
TL
4007 }
4008 dmar_domain->agaw--;
4009 }
fe40f1e0 4010
5fe60f4e 4011 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 4012}
38717946 4013
4c5478c9
JR
4014static void intel_iommu_detach_device(struct iommu_domain *domain,
4015 struct device *dev)
38717946 4016{
4c5478c9
JR
4017 struct dmar_domain *dmar_domain = domain->priv;
4018 struct pci_dev *pdev = to_pci_dev(dev);
4019
2c2e2c38 4020 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 4021}
c7151a8d 4022
b146a1c9
JR
4023static int intel_iommu_map(struct iommu_domain *domain,
4024 unsigned long iova, phys_addr_t hpa,
5009065d 4025 size_t size, int iommu_prot)
faa3d6f5 4026{
dde57a21 4027 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4028 u64 max_addr;
dde57a21 4029 int prot = 0;
faa3d6f5 4030 int ret;
fe40f1e0 4031
dde57a21
JR
4032 if (iommu_prot & IOMMU_READ)
4033 prot |= DMA_PTE_READ;
4034 if (iommu_prot & IOMMU_WRITE)
4035 prot |= DMA_PTE_WRITE;
9cf06697
SY
4036 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4037 prot |= DMA_PTE_SNP;
dde57a21 4038
163cc52c 4039 max_addr = iova + size;
dde57a21 4040 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4041 u64 end;
4042
4043 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4044 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4045 if (end < max_addr) {
8954da1f 4046 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4047 "sufficient for the mapped address (%llx)\n",
8954da1f 4048 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4049 return -EFAULT;
4050 }
dde57a21 4051 dmar_domain->max_addr = max_addr;
fe40f1e0 4052 }
ad051221
DW
4053 /* Round up size to next multiple of PAGE_SIZE, if it and
4054 the low bits of hpa would take us onto the next page */
88cb6a74 4055 size = aligned_nrpages(hpa, size);
ad051221
DW
4056 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4057 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4058 return ret;
38717946 4059}
38717946 4060
5009065d
OBC
4061static size_t intel_iommu_unmap(struct iommu_domain *domain,
4062 unsigned long iova, size_t size)
38717946 4063{
dde57a21 4064 struct dmar_domain *dmar_domain = domain->priv;
292827cb 4065 int order;
4b99d352 4066
292827cb 4067 order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
163cc52c 4068 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 4069
163cc52c
DW
4070 if (dmar_domain->max_addr == iova + size)
4071 dmar_domain->max_addr = iova;
b146a1c9 4072
5009065d 4073 return PAGE_SIZE << order;
38717946 4074}
38717946 4075
d14d6577 4076static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4077 dma_addr_t iova)
38717946 4078{
d14d6577 4079 struct dmar_domain *dmar_domain = domain->priv;
38717946 4080 struct dma_pte *pte;
faa3d6f5 4081 u64 phys = 0;
38717946 4082
6dd9a7c7 4083 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0);
38717946 4084 if (pte)
faa3d6f5 4085 phys = dma_pte_addr(pte);
38717946 4086
faa3d6f5 4087 return phys;
38717946 4088}
a8bcbb0d 4089
dbb9fd86
SY
4090static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4091 unsigned long cap)
4092{
4093 struct dmar_domain *dmar_domain = domain->priv;
4094
4095 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4096 return dmar_domain->iommu_snooping;
323f99cb 4097 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4098 return irq_remapping_enabled;
dbb9fd86
SY
4099
4100 return 0;
4101}
4102
783f157b 4103#define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
70ae6f0d 4104
abdfdde2
AW
4105static int intel_iommu_add_device(struct device *dev)
4106{
4107 struct pci_dev *pdev = to_pci_dev(dev);
3da4af0a 4108 struct pci_dev *bridge, *dma_pdev = NULL;
abdfdde2
AW
4109 struct iommu_group *group;
4110 int ret;
70ae6f0d 4111
abdfdde2
AW
4112 if (!device_to_iommu(pci_domain_nr(pdev->bus),
4113 pdev->bus->number, pdev->devfn))
70ae6f0d
AW
4114 return -ENODEV;
4115
4116 bridge = pci_find_upstream_pcie_bridge(pdev);
4117 if (bridge) {
abdfdde2
AW
4118 if (pci_is_pcie(bridge))
4119 dma_pdev = pci_get_domain_bus_and_slot(
4120 pci_domain_nr(pdev->bus),
4121 bridge->subordinate->number, 0);
3da4af0a 4122 if (!dma_pdev)
abdfdde2
AW
4123 dma_pdev = pci_dev_get(bridge);
4124 } else
4125 dma_pdev = pci_dev_get(pdev);
4126
a4ff1fc2 4127 /* Account for quirked devices */
783f157b
AW
4128 swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
4129
a4ff1fc2
AW
4130 /*
4131 * If it's a multifunction device that does not support our
c14d2690
AW
4132 * required ACS flags, add to the same group as lowest numbered
4133 * function that also does not suport the required ACS flags.
a4ff1fc2 4134 */
783f157b 4135 if (dma_pdev->multifunction &&
c14d2690
AW
4136 !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
4137 u8 i, slot = PCI_SLOT(dma_pdev->devfn);
4138
4139 for (i = 0; i < 8; i++) {
4140 struct pci_dev *tmp;
4141
4142 tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
4143 if (!tmp)
4144 continue;
4145
4146 if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
4147 swap_pci_ref(&dma_pdev, tmp);
4148 break;
4149 }
4150 pci_dev_put(tmp);
4151 }
4152 }
783f157b 4153
a4ff1fc2
AW
4154 /*
4155 * Devices on the root bus go through the iommu. If that's not us,
4156 * find the next upstream device and test ACS up to the root bus.
4157 * Finding the next device may require skipping virtual buses.
4158 */
783f157b 4159 while (!pci_is_root_bus(dma_pdev->bus)) {
a4ff1fc2
AW
4160 struct pci_bus *bus = dma_pdev->bus;
4161
4162 while (!bus->self) {
4163 if (!pci_is_root_bus(bus))
4164 bus = bus->parent;
4165 else
4166 goto root_bus;
4167 }
4168
4169 if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
783f157b
AW
4170 break;
4171
a4ff1fc2 4172 swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
783f157b
AW
4173 }
4174
a4ff1fc2 4175root_bus:
abdfdde2
AW
4176 group = iommu_group_get(&dma_pdev->dev);
4177 pci_dev_put(dma_pdev);
4178 if (!group) {
4179 group = iommu_group_alloc();
4180 if (IS_ERR(group))
4181 return PTR_ERR(group);
70ae6f0d
AW
4182 }
4183
abdfdde2 4184 ret = iommu_group_add_device(group, dev);
bcb71abe 4185
abdfdde2
AW
4186 iommu_group_put(group);
4187 return ret;
4188}
70ae6f0d 4189
abdfdde2
AW
4190static void intel_iommu_remove_device(struct device *dev)
4191{
4192 iommu_group_remove_device(dev);
70ae6f0d
AW
4193}
4194
a8bcbb0d
JR
4195static struct iommu_ops intel_iommu_ops = {
4196 .domain_init = intel_iommu_domain_init,
4197 .domain_destroy = intel_iommu_domain_destroy,
4198 .attach_dev = intel_iommu_attach_device,
4199 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4200 .map = intel_iommu_map,
4201 .unmap = intel_iommu_unmap,
a8bcbb0d 4202 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4203 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4204 .add_device = intel_iommu_add_device,
4205 .remove_device = intel_iommu_remove_device,
6d1c56a9 4206 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4207};
9af88143 4208
9452618e
DV
4209static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4210{
4211 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4212 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4213 dmar_map_gfx = 0;
4214}
4215
4216DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4217DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4218DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4219DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4220DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4221DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4222DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4223
d34d6517 4224static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4225{
4226 /*
4227 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4228 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4229 */
4230 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4231 rwbf_quirk = 1;
4232}
4233
4234DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4235DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4236DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4237DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4238DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4239DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4240DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4241
eecfd57f
AJ
4242#define GGC 0x52
4243#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4244#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4245#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4246#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4247#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4248#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4249#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4250#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4251
d34d6517 4252static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4253{
4254 unsigned short ggc;
4255
eecfd57f 4256 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4257 return;
4258
eecfd57f 4259 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4260 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4261 dmar_map_gfx = 0;
6fbcfb3e
DW
4262 } else if (dmar_map_gfx) {
4263 /* we have to ensure the gfx device is idle before we flush */
4264 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4265 intel_iommu_strict = 1;
4266 }
9eecabcb
DW
4267}
4268DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4270DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4272
e0fc7e0b
DW
4273/* On Tylersburg chipsets, some BIOSes have been known to enable the
4274 ISOCH DMAR unit for the Azalia sound device, but not give it any
4275 TLB entries, which causes it to deadlock. Check for that. We do
4276 this in a function called from init_dmars(), instead of in a PCI
4277 quirk, because we don't want to print the obnoxious "BIOS broken"
4278 message if VT-d is actually disabled.
4279*/
4280static void __init check_tylersburg_isoch(void)
4281{
4282 struct pci_dev *pdev;
4283 uint32_t vtisochctrl;
4284
4285 /* If there's no Azalia in the system anyway, forget it. */
4286 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4287 if (!pdev)
4288 return;
4289 pci_dev_put(pdev);
4290
4291 /* System Management Registers. Might be hidden, in which case
4292 we can't do the sanity check. But that's OK, because the
4293 known-broken BIOSes _don't_ actually hide it, so far. */
4294 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4295 if (!pdev)
4296 return;
4297
4298 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4299 pci_dev_put(pdev);
4300 return;
4301 }
4302
4303 pci_dev_put(pdev);
4304
4305 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4306 if (vtisochctrl & 1)
4307 return;
4308
4309 /* Drop all bits other than the number of TLB entries */
4310 vtisochctrl &= 0x1c;
4311
4312 /* If we have the recommended number of TLB entries (16), fine. */
4313 if (vtisochctrl == 0x10)
4314 return;
4315
4316 /* Zero TLB entries? You get to ride the short bus to school. */
4317 if (!vtisochctrl) {
4318 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4319 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4320 dmi_get_system_info(DMI_BIOS_VENDOR),
4321 dmi_get_system_info(DMI_BIOS_VERSION),
4322 dmi_get_system_info(DMI_PRODUCT_VERSION));
4323 iommu_identity_mapping |= IDENTMAP_AZALIA;
4324 return;
4325 }
4326
4327 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4328 vtisochctrl);
4329}
This page took 0.898247 seconds and 5 git commands to generate.