iommu/vt-d: Fix possible invalid memory access caused by free_dmar_iommu()
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
18 */
19
20#include <linux/init.h>
21#include <linux/bitmap.h>
5e0d2a6f 22#include <linux/debugfs.h>
54485c30 23#include <linux/export.h>
ba395927
KA
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
ba395927
KA
27#include <linux/spinlock.h>
28#include <linux/pci.h>
29#include <linux/dmar.h>
30#include <linux/dma-mapping.h>
31#include <linux/mempool.h>
75f05569 32#include <linux/memory.h>
5e0d2a6f 33#include <linux/timer.h>
38717946 34#include <linux/iova.h>
5d450806 35#include <linux/iommu.h>
38717946 36#include <linux/intel-iommu.h>
134fac3f 37#include <linux/syscore_ops.h>
69575d38 38#include <linux/tboot.h>
adb2fe02 39#include <linux/dmi.h>
5cdede24 40#include <linux/pci-ats.h>
0ee332c1 41#include <linux/memblock.h>
36746436 42#include <linux/dma-contiguous.h>
8a8f422d 43#include <asm/irq_remapping.h>
ba395927 44#include <asm/cacheflush.h>
46a7fa27 45#include <asm/iommu.h>
ba395927 46
078e1ee2
JR
47#include "irq_remapping.h"
48
5b6985ce
FY
49#define ROOT_SIZE VTD_PAGE_SIZE
50#define CONTEXT_SIZE VTD_PAGE_SIZE
51
ba395927
KA
52#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
53#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 54#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
55
56#define IOAPIC_RANGE_START (0xfee00000)
57#define IOAPIC_RANGE_END (0xfeefffff)
58#define IOVA_START_ADDR (0x1000)
59
60#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
61
4ed0d3e6 62#define MAX_AGAW_WIDTH 64
5c645b35 63#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 64
2ebe3151
DW
65#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
66#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
67
68/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
69 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
70#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
71 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
72#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 73
f27be03b 74#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 75#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 76#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 77
df08cdc7
AM
78/* page table handling */
79#define LEVEL_STRIDE (9)
80#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
81
6d1c56a9
OBC
82/*
83 * This bitmap is used to advertise the page sizes our hardware support
84 * to the IOMMU core, which will then use this information to split
85 * physically contiguous memory regions it is mapping into page sizes
86 * that we support.
87 *
88 * Traditionally the IOMMU core just handed us the mappings directly,
89 * after making sure the size is an order of a 4KiB page and that the
90 * mapping has natural alignment.
91 *
92 * To retain this behavior, we currently advertise that we support
93 * all page sizes that are an order of 4KiB.
94 *
95 * If at some point we'd like to utilize the IOMMU core's new behavior,
96 * we could change this to advertise the real page sizes we support.
97 */
98#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
99
df08cdc7
AM
100static inline int agaw_to_level(int agaw)
101{
102 return agaw + 2;
103}
104
105static inline int agaw_to_width(int agaw)
106{
5c645b35 107 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
108}
109
110static inline int width_to_agaw(int width)
111{
5c645b35 112 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
113}
114
115static inline unsigned int level_to_offset_bits(int level)
116{
117 return (level - 1) * LEVEL_STRIDE;
118}
119
120static inline int pfn_level_offset(unsigned long pfn, int level)
121{
122 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
123}
124
125static inline unsigned long level_mask(int level)
126{
127 return -1UL << level_to_offset_bits(level);
128}
129
130static inline unsigned long level_size(int level)
131{
132 return 1UL << level_to_offset_bits(level);
133}
134
135static inline unsigned long align_to_level(unsigned long pfn, int level)
136{
137 return (pfn + level_size(level) - 1) & level_mask(level);
138}
fd18de50 139
6dd9a7c7
YS
140static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
141{
5c645b35 142 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
143}
144
dd4e8319
DW
145/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
146 are never going to work. */
147static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
148{
149 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
150}
151
152static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
153{
154 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
155}
156static inline unsigned long page_to_dma_pfn(struct page *pg)
157{
158 return mm_to_dma_pfn(page_to_pfn(pg));
159}
160static inline unsigned long virt_to_dma_pfn(void *p)
161{
162 return page_to_dma_pfn(virt_to_page(p));
163}
164
d9630fe9
WH
165/* global iommu list, set NULL for ignored DMAR units */
166static struct intel_iommu **g_iommus;
167
e0fc7e0b 168static void __init check_tylersburg_isoch(void);
9af88143
DW
169static int rwbf_quirk;
170
b779260b
JC
171/*
172 * set to 1 to panic kernel if can't successfully enable VT-d
173 * (used when kernel is launched w/ TXT)
174 */
175static int force_on = 0;
176
46b08e1a
MM
177/*
178 * 0: Present
179 * 1-11: Reserved
180 * 12-63: Context Ptr (12 - (haw-1))
181 * 64-127: Reserved
182 */
183struct root_entry {
184 u64 val;
185 u64 rsvd1;
186};
187#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
188static inline bool root_present(struct root_entry *root)
189{
190 return (root->val & 1);
191}
192static inline void set_root_present(struct root_entry *root)
193{
194 root->val |= 1;
195}
196static inline void set_root_value(struct root_entry *root, unsigned long value)
197{
198 root->val |= value & VTD_PAGE_MASK;
199}
200
201static inline struct context_entry *
202get_context_addr_from_root(struct root_entry *root)
203{
204 return (struct context_entry *)
205 (root_present(root)?phys_to_virt(
206 root->val & VTD_PAGE_MASK) :
207 NULL);
208}
209
7a8fc25e
MM
210/*
211 * low 64 bits:
212 * 0: present
213 * 1: fault processing disable
214 * 2-3: translation type
215 * 12-63: address space root
216 * high 64 bits:
217 * 0-2: address width
218 * 3-6: aval
219 * 8-23: domain id
220 */
221struct context_entry {
222 u64 lo;
223 u64 hi;
224};
c07e7d21
MM
225
226static inline bool context_present(struct context_entry *context)
227{
228 return (context->lo & 1);
229}
230static inline void context_set_present(struct context_entry *context)
231{
232 context->lo |= 1;
233}
234
235static inline void context_set_fault_enable(struct context_entry *context)
236{
237 context->lo &= (((u64)-1) << 2) | 1;
238}
239
c07e7d21
MM
240static inline void context_set_translation_type(struct context_entry *context,
241 unsigned long value)
242{
243 context->lo &= (((u64)-1) << 4) | 3;
244 context->lo |= (value & 3) << 2;
245}
246
247static inline void context_set_address_root(struct context_entry *context,
248 unsigned long value)
249{
250 context->lo |= value & VTD_PAGE_MASK;
251}
252
253static inline void context_set_address_width(struct context_entry *context,
254 unsigned long value)
255{
256 context->hi |= value & 7;
257}
258
259static inline void context_set_domain_id(struct context_entry *context,
260 unsigned long value)
261{
262 context->hi |= (value & ((1 << 16) - 1)) << 8;
263}
264
265static inline void context_clear_entry(struct context_entry *context)
266{
267 context->lo = 0;
268 context->hi = 0;
269}
7a8fc25e 270
622ba12a
MM
271/*
272 * 0: readable
273 * 1: writable
274 * 2-6: reserved
275 * 7: super page
9cf06697
SY
276 * 8-10: available
277 * 11: snoop behavior
622ba12a
MM
278 * 12-63: Host physcial address
279 */
280struct dma_pte {
281 u64 val;
282};
622ba12a 283
19c239ce
MM
284static inline void dma_clear_pte(struct dma_pte *pte)
285{
286 pte->val = 0;
287}
288
19c239ce
MM
289static inline u64 dma_pte_addr(struct dma_pte *pte)
290{
c85994e4
DW
291#ifdef CONFIG_64BIT
292 return pte->val & VTD_PAGE_MASK;
293#else
294 /* Must have a full atomic 64-bit read */
1a8bd481 295 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 296#endif
19c239ce
MM
297}
298
19c239ce
MM
299static inline bool dma_pte_present(struct dma_pte *pte)
300{
301 return (pte->val & 3) != 0;
302}
622ba12a 303
4399c8bf
AK
304static inline bool dma_pte_superpage(struct dma_pte *pte)
305{
c3c75eb7 306 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
307}
308
75e6bf96
DW
309static inline int first_pte_in_page(struct dma_pte *pte)
310{
311 return !((unsigned long)pte & ~VTD_PAGE_MASK);
312}
313
2c2e2c38
FY
314/*
315 * This domain is a statically identity mapping domain.
316 * 1. This domain creats a static 1:1 mapping to all usable memory.
317 * 2. It maps to each iommu if successful.
318 * 3. Each iommu mapps to this domain if successful.
319 */
19943b0e
DW
320static struct dmar_domain *si_domain;
321static int hw_pass_through = 1;
2c2e2c38 322
1ce28feb
WH
323/* domain represents a virtual machine, more than one devices
324 * across iommus may be owned in one domain, e.g. kvm guest.
325 */
ab8dfe25 326#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 327
2c2e2c38 328/* si_domain contains mulitple devices */
ab8dfe25 329#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 330
1b198bb0
MT
331/* define the limit of IOMMUs supported in each domain */
332#ifdef CONFIG_X86
333# define IOMMU_UNITS_SUPPORTED MAX_IO_APICS
334#else
335# define IOMMU_UNITS_SUPPORTED 64
336#endif
337
99126f7c
MM
338struct dmar_domain {
339 int id; /* domain id */
4c923d47 340 int nid; /* node id */
1b198bb0
MT
341 DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
342 /* bitmap of iommus this domain uses*/
99126f7c
MM
343
344 struct list_head devices; /* all devices' list */
345 struct iova_domain iovad; /* iova's that belong to this domain */
346
347 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
348 int gaw; /* max guest address width */
349
350 /* adjusted guest address width, 0 is level 2 30-bit */
351 int agaw;
352
3b5410e7 353 int flags; /* flags to find out type of domain */
8e604097
WH
354
355 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 356 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 357 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
358 int iommu_superpage;/* Level of superpages supported:
359 0 == 4KiB (no superpages), 1 == 2MiB,
360 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
c7151a8d 361 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 362 u64 max_addr; /* maximum mapped address */
99126f7c
MM
363};
364
a647dacb
MM
365/* PCI domain-device relationship */
366struct device_domain_info {
367 struct list_head link; /* link to domain siblings */
368 struct list_head global; /* link to global list */
276dbf99 369 u8 bus; /* PCI bus number */
a647dacb 370 u8 devfn; /* PCI devfn number */
0bcb3e28 371 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 372 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
373 struct dmar_domain *domain; /* pointer to domain */
374};
375
b94e4117
JL
376struct dmar_rmrr_unit {
377 struct list_head list; /* list of rmrr units */
378 struct acpi_dmar_header *hdr; /* ACPI header */
379 u64 base_address; /* reserved base address*/
380 u64 end_address; /* reserved end address */
832bd858 381 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
382 int devices_cnt; /* target device count */
383};
384
385struct dmar_atsr_unit {
386 struct list_head list; /* list of ATSR units */
387 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 388 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
389 int devices_cnt; /* target device count */
390 u8 include_all:1; /* include all ports */
391};
392
393static LIST_HEAD(dmar_atsr_units);
394static LIST_HEAD(dmar_rmrr_units);
395
396#define for_each_rmrr_units(rmrr) \
397 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
398
5e0d2a6f 399static void flush_unmaps_timeout(unsigned long data);
400
b707cb02 401static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 402
80b20dd8 403#define HIGH_WATER_MARK 250
404struct deferred_flush_tables {
405 int next;
406 struct iova *iova[HIGH_WATER_MARK];
407 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 408 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 409};
410
411static struct deferred_flush_tables *deferred_flush;
412
5e0d2a6f 413/* bitmap for indexing intel_iommus */
5e0d2a6f 414static int g_num_of_iommus;
415
416static DEFINE_SPINLOCK(async_umap_flush_lock);
417static LIST_HEAD(unmaps_to_do);
418
419static int timer_on;
420static long list_size;
5e0d2a6f 421
92d03cc8 422static void domain_exit(struct dmar_domain *domain);
ba395927 423static void domain_remove_dev_info(struct dmar_domain *domain);
b94e4117 424static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 425 struct device *dev);
92d03cc8 426static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 427 struct device *dev);
2a46ddf7
JL
428static int domain_detach_iommu(struct dmar_domain *domain,
429 struct intel_iommu *iommu);
ba395927 430
d3f13810 431#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
432int dmar_disabled = 0;
433#else
434int dmar_disabled = 1;
d3f13810 435#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 436
8bc1f85c
ED
437int intel_iommu_enabled = 0;
438EXPORT_SYMBOL_GPL(intel_iommu_enabled);
439
2d9e667e 440static int dmar_map_gfx = 1;
7d3b03ce 441static int dmar_forcedac;
5e0d2a6f 442static int intel_iommu_strict;
6dd9a7c7 443static int intel_iommu_superpage = 1;
ba395927 444
c0771df8
DW
445int intel_iommu_gfx_mapped;
446EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
447
ba395927
KA
448#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
449static DEFINE_SPINLOCK(device_domain_lock);
450static LIST_HEAD(device_domain_list);
451
b22f6434 452static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 453
ba395927
KA
454static int __init intel_iommu_setup(char *str)
455{
456 if (!str)
457 return -EINVAL;
458 while (*str) {
0cd5c3c8
KM
459 if (!strncmp(str, "on", 2)) {
460 dmar_disabled = 0;
461 printk(KERN_INFO "Intel-IOMMU: enabled\n");
462 } else if (!strncmp(str, "off", 3)) {
ba395927 463 dmar_disabled = 1;
0cd5c3c8 464 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
465 } else if (!strncmp(str, "igfx_off", 8)) {
466 dmar_map_gfx = 0;
467 printk(KERN_INFO
468 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 469 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 470 printk(KERN_INFO
7d3b03ce
KA
471 "Intel-IOMMU: Forcing DAC for PCI devices\n");
472 dmar_forcedac = 1;
5e0d2a6f 473 } else if (!strncmp(str, "strict", 6)) {
474 printk(KERN_INFO
475 "Intel-IOMMU: disable batched IOTLB flush\n");
476 intel_iommu_strict = 1;
6dd9a7c7
YS
477 } else if (!strncmp(str, "sp_off", 6)) {
478 printk(KERN_INFO
479 "Intel-IOMMU: disable supported super page\n");
480 intel_iommu_superpage = 0;
ba395927
KA
481 }
482
483 str += strcspn(str, ",");
484 while (*str == ',')
485 str++;
486 }
487 return 0;
488}
489__setup("intel_iommu=", intel_iommu_setup);
490
491static struct kmem_cache *iommu_domain_cache;
492static struct kmem_cache *iommu_devinfo_cache;
493static struct kmem_cache *iommu_iova_cache;
494
4c923d47 495static inline void *alloc_pgtable_page(int node)
eb3fa7cb 496{
4c923d47
SS
497 struct page *page;
498 void *vaddr = NULL;
eb3fa7cb 499
4c923d47
SS
500 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
501 if (page)
502 vaddr = page_address(page);
eb3fa7cb 503 return vaddr;
ba395927
KA
504}
505
506static inline void free_pgtable_page(void *vaddr)
507{
508 free_page((unsigned long)vaddr);
509}
510
511static inline void *alloc_domain_mem(void)
512{
354bb65e 513 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
514}
515
38717946 516static void free_domain_mem(void *vaddr)
ba395927
KA
517{
518 kmem_cache_free(iommu_domain_cache, vaddr);
519}
520
521static inline void * alloc_devinfo_mem(void)
522{
354bb65e 523 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
524}
525
526static inline void free_devinfo_mem(void *vaddr)
527{
528 kmem_cache_free(iommu_devinfo_cache, vaddr);
529}
530
531struct iova *alloc_iova_mem(void)
532{
354bb65e 533 return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC);
ba395927
KA
534}
535
536void free_iova_mem(struct iova *iova)
537{
538 kmem_cache_free(iommu_iova_cache, iova);
539}
540
ab8dfe25
JL
541static inline int domain_type_is_vm(struct dmar_domain *domain)
542{
543 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
544}
545
546static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
547{
548 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
549 DOMAIN_FLAG_STATIC_IDENTITY);
550}
1b573683 551
4ed0d3e6 552static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
553{
554 unsigned long sagaw;
555 int agaw = -1;
556
557 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 558 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
559 agaw >= 0; agaw--) {
560 if (test_bit(agaw, &sagaw))
561 break;
562 }
563
564 return agaw;
565}
566
4ed0d3e6
FY
567/*
568 * Calculate max SAGAW for each iommu.
569 */
570int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
571{
572 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
573}
574
575/*
576 * calculate agaw for each iommu.
577 * "SAGAW" may be different across iommus, use a default agaw, and
578 * get a supported less agaw for iommus that don't support the default agaw.
579 */
580int iommu_calculate_agaw(struct intel_iommu *iommu)
581{
582 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
583}
584
2c2e2c38 585/* This functionin only returns single iommu in a domain */
8c11e798
WH
586static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
587{
588 int iommu_id;
589
2c2e2c38 590 /* si_domain and vm domain should not get here. */
ab8dfe25 591 BUG_ON(domain_type_is_vm_or_si(domain));
1b198bb0 592 iommu_id = find_first_bit(domain->iommu_bmp, g_num_of_iommus);
8c11e798
WH
593 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
594 return NULL;
595
596 return g_iommus[iommu_id];
597}
598
8e604097
WH
599static void domain_update_iommu_coherency(struct dmar_domain *domain)
600{
d0501960
DW
601 struct dmar_drhd_unit *drhd;
602 struct intel_iommu *iommu;
603 int i, found = 0;
2e12bc29 604
d0501960 605 domain->iommu_coherency = 1;
8e604097 606
1b198bb0 607 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
d0501960 608 found = 1;
8e604097
WH
609 if (!ecap_coherent(g_iommus[i]->ecap)) {
610 domain->iommu_coherency = 0;
611 break;
612 }
8e604097 613 }
d0501960
DW
614 if (found)
615 return;
616
617 /* No hardware attached; use lowest common denominator */
618 rcu_read_lock();
619 for_each_active_iommu(iommu, drhd) {
620 if (!ecap_coherent(iommu->ecap)) {
621 domain->iommu_coherency = 0;
622 break;
623 }
624 }
625 rcu_read_unlock();
8e604097
WH
626}
627
58c610bd
SY
628static void domain_update_iommu_snooping(struct dmar_domain *domain)
629{
630 int i;
631
632 domain->iommu_snooping = 1;
633
1b198bb0 634 for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) {
58c610bd
SY
635 if (!ecap_sc_support(g_iommus[i]->ecap)) {
636 domain->iommu_snooping = 0;
637 break;
638 }
58c610bd
SY
639 }
640}
641
6dd9a7c7
YS
642static void domain_update_iommu_superpage(struct dmar_domain *domain)
643{
8140a95d
AK
644 struct dmar_drhd_unit *drhd;
645 struct intel_iommu *iommu = NULL;
646 int mask = 0xf;
6dd9a7c7
YS
647
648 if (!intel_iommu_superpage) {
649 domain->iommu_superpage = 0;
650 return;
651 }
652
8140a95d 653 /* set iommu_superpage to the smallest common denominator */
0e242612 654 rcu_read_lock();
8140a95d
AK
655 for_each_active_iommu(iommu, drhd) {
656 mask &= cap_super_page_val(iommu->cap);
6dd9a7c7
YS
657 if (!mask) {
658 break;
659 }
660 }
0e242612
JL
661 rcu_read_unlock();
662
6dd9a7c7
YS
663 domain->iommu_superpage = fls(mask);
664}
665
58c610bd
SY
666/* Some capabilities may be different across iommus */
667static void domain_update_iommu_cap(struct dmar_domain *domain)
668{
669 domain_update_iommu_coherency(domain);
670 domain_update_iommu_snooping(domain);
6dd9a7c7 671 domain_update_iommu_superpage(domain);
58c610bd
SY
672}
673
156baca8 674static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
675{
676 struct dmar_drhd_unit *drhd = NULL;
b683b230 677 struct intel_iommu *iommu;
156baca8
DW
678 struct device *tmp;
679 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 680 u16 segment = 0;
c7151a8d
WH
681 int i;
682
156baca8
DW
683 if (dev_is_pci(dev)) {
684 pdev = to_pci_dev(dev);
685 segment = pci_domain_nr(pdev->bus);
686 } else if (ACPI_COMPANION(dev))
687 dev = &ACPI_COMPANION(dev)->dev;
688
0e242612 689 rcu_read_lock();
b683b230 690 for_each_active_iommu(iommu, drhd) {
156baca8 691 if (pdev && segment != drhd->segment)
276dbf99 692 continue;
c7151a8d 693
b683b230 694 for_each_active_dev_scope(drhd->devices,
156baca8
DW
695 drhd->devices_cnt, i, tmp) {
696 if (tmp == dev) {
697 *bus = drhd->devices[i].bus;
698 *devfn = drhd->devices[i].devfn;
b683b230 699 goto out;
156baca8
DW
700 }
701
702 if (!pdev || !dev_is_pci(tmp))
703 continue;
704
705 ptmp = to_pci_dev(tmp);
706 if (ptmp->subordinate &&
707 ptmp->subordinate->number <= pdev->bus->number &&
708 ptmp->subordinate->busn_res.end >= pdev->bus->number)
709 goto got_pdev;
924b6231 710 }
c7151a8d 711
156baca8
DW
712 if (pdev && drhd->include_all) {
713 got_pdev:
714 *bus = pdev->bus->number;
715 *devfn = pdev->devfn;
b683b230 716 goto out;
156baca8 717 }
c7151a8d 718 }
b683b230 719 iommu = NULL;
156baca8 720 out:
0e242612 721 rcu_read_unlock();
c7151a8d 722
b683b230 723 return iommu;
c7151a8d
WH
724}
725
5331fe6f
WH
726static void domain_flush_cache(struct dmar_domain *domain,
727 void *addr, int size)
728{
729 if (!domain->iommu_coherency)
730 clflush_cache_range(addr, size);
731}
732
ba395927
KA
733/* Gets context entry for a given bus and devfn */
734static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
735 u8 bus, u8 devfn)
736{
737 struct root_entry *root;
738 struct context_entry *context;
739 unsigned long phy_addr;
740 unsigned long flags;
741
742 spin_lock_irqsave(&iommu->lock, flags);
743 root = &iommu->root_entry[bus];
744 context = get_context_addr_from_root(root);
745 if (!context) {
4c923d47
SS
746 context = (struct context_entry *)
747 alloc_pgtable_page(iommu->node);
ba395927
KA
748 if (!context) {
749 spin_unlock_irqrestore(&iommu->lock, flags);
750 return NULL;
751 }
5b6985ce 752 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
753 phy_addr = virt_to_phys((void *)context);
754 set_root_value(root, phy_addr);
755 set_root_present(root);
756 __iommu_flush_cache(iommu, root, sizeof(*root));
757 }
758 spin_unlock_irqrestore(&iommu->lock, flags);
759 return &context[devfn];
760}
761
762static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
763{
764 struct root_entry *root;
765 struct context_entry *context;
766 int ret;
767 unsigned long flags;
768
769 spin_lock_irqsave(&iommu->lock, flags);
770 root = &iommu->root_entry[bus];
771 context = get_context_addr_from_root(root);
772 if (!context) {
773 ret = 0;
774 goto out;
775 }
c07e7d21 776 ret = context_present(&context[devfn]);
ba395927
KA
777out:
778 spin_unlock_irqrestore(&iommu->lock, flags);
779 return ret;
780}
781
782static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
783{
784 struct root_entry *root;
785 struct context_entry *context;
786 unsigned long flags;
787
788 spin_lock_irqsave(&iommu->lock, flags);
789 root = &iommu->root_entry[bus];
790 context = get_context_addr_from_root(root);
791 if (context) {
c07e7d21 792 context_clear_entry(&context[devfn]);
ba395927
KA
793 __iommu_flush_cache(iommu, &context[devfn], \
794 sizeof(*context));
795 }
796 spin_unlock_irqrestore(&iommu->lock, flags);
797}
798
799static void free_context_table(struct intel_iommu *iommu)
800{
801 struct root_entry *root;
802 int i;
803 unsigned long flags;
804 struct context_entry *context;
805
806 spin_lock_irqsave(&iommu->lock, flags);
807 if (!iommu->root_entry) {
808 goto out;
809 }
810 for (i = 0; i < ROOT_ENTRY_NR; i++) {
811 root = &iommu->root_entry[i];
812 context = get_context_addr_from_root(root);
813 if (context)
814 free_pgtable_page(context);
815 }
816 free_pgtable_page(iommu->root_entry);
817 iommu->root_entry = NULL;
818out:
819 spin_unlock_irqrestore(&iommu->lock, flags);
820}
821
b026fd28 822static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 823 unsigned long pfn, int *target_level)
ba395927 824{
b026fd28 825 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
826 struct dma_pte *parent, *pte = NULL;
827 int level = agaw_to_level(domain->agaw);
4399c8bf 828 int offset;
ba395927
KA
829
830 BUG_ON(!domain->pgd);
f9423606
JS
831
832 if (addr_width < BITS_PER_LONG && pfn >> addr_width)
833 /* Address beyond IOMMU's addressing capabilities. */
834 return NULL;
835
ba395927
KA
836 parent = domain->pgd;
837
5cf0a76f 838 while (1) {
ba395927
KA
839 void *tmp_page;
840
b026fd28 841 offset = pfn_level_offset(pfn, level);
ba395927 842 pte = &parent[offset];
5cf0a76f 843 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 844 break;
5cf0a76f 845 if (level == *target_level)
ba395927
KA
846 break;
847
19c239ce 848 if (!dma_pte_present(pte)) {
c85994e4
DW
849 uint64_t pteval;
850
4c923d47 851 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 852
206a73c1 853 if (!tmp_page)
ba395927 854 return NULL;
206a73c1 855
c85994e4 856 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 857 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 858 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
859 /* Someone else set it while we were thinking; use theirs. */
860 free_pgtable_page(tmp_page);
effad4b5 861 else
c85994e4 862 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 863 }
5cf0a76f
DW
864 if (level == 1)
865 break;
866
19c239ce 867 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
868 level--;
869 }
870
5cf0a76f
DW
871 if (!*target_level)
872 *target_level = level;
873
ba395927
KA
874 return pte;
875}
876
6dd9a7c7 877
ba395927 878/* return address's pte at specific level */
90dcfb5e
DW
879static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
880 unsigned long pfn,
6dd9a7c7 881 int level, int *large_page)
ba395927
KA
882{
883 struct dma_pte *parent, *pte = NULL;
884 int total = agaw_to_level(domain->agaw);
885 int offset;
886
887 parent = domain->pgd;
888 while (level <= total) {
90dcfb5e 889 offset = pfn_level_offset(pfn, total);
ba395927
KA
890 pte = &parent[offset];
891 if (level == total)
892 return pte;
893
6dd9a7c7
YS
894 if (!dma_pte_present(pte)) {
895 *large_page = total;
ba395927 896 break;
6dd9a7c7
YS
897 }
898
e16922af 899 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
900 *large_page = total;
901 return pte;
902 }
903
19c239ce 904 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
905 total--;
906 }
907 return NULL;
908}
909
ba395927 910/* clear last level pte, a tlb flush should be followed */
5cf0a76f 911static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
912 unsigned long start_pfn,
913 unsigned long last_pfn)
ba395927 914{
04b18e65 915 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
6dd9a7c7 916 unsigned int large_page = 1;
310a5ab9 917 struct dma_pte *first_pte, *pte;
66eae846 918
04b18e65 919 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 920 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 921 BUG_ON(start_pfn > last_pfn);
ba395927 922
04b18e65 923 /* we don't need lock here; nobody else touches the iova range */
59c36286 924 do {
6dd9a7c7
YS
925 large_page = 1;
926 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 927 if (!pte) {
6dd9a7c7 928 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
929 continue;
930 }
6dd9a7c7 931 do {
310a5ab9 932 dma_clear_pte(pte);
6dd9a7c7 933 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 934 pte++;
75e6bf96
DW
935 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
936
310a5ab9
DW
937 domain_flush_cache(domain, first_pte,
938 (void *)pte - (void *)first_pte);
59c36286
DW
939
940 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
941}
942
3269ee0b
AW
943static void dma_pte_free_level(struct dmar_domain *domain, int level,
944 struct dma_pte *pte, unsigned long pfn,
945 unsigned long start_pfn, unsigned long last_pfn)
946{
947 pfn = max(start_pfn, pfn);
948 pte = &pte[pfn_level_offset(pfn, level)];
949
950 do {
951 unsigned long level_pfn;
952 struct dma_pte *level_pte;
953
954 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
955 goto next;
956
957 level_pfn = pfn & level_mask(level - 1);
958 level_pte = phys_to_virt(dma_pte_addr(pte));
959
960 if (level > 2)
961 dma_pte_free_level(domain, level - 1, level_pte,
962 level_pfn, start_pfn, last_pfn);
963
964 /* If range covers entire pagetable, free it */
965 if (!(start_pfn > level_pfn ||
08336fd2 966 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
967 dma_clear_pte(pte);
968 domain_flush_cache(domain, pte, sizeof(*pte));
969 free_pgtable_page(level_pte);
970 }
971next:
972 pfn += level_size(level);
973 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
974}
975
ba395927
KA
976/* free page table pages. last level pte should already be cleared */
977static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
978 unsigned long start_pfn,
979 unsigned long last_pfn)
ba395927 980{
6660c63a 981 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927 982
6660c63a
DW
983 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
984 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 985 BUG_ON(start_pfn > last_pfn);
ba395927 986
f3a0a52f 987 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
988 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
989 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 990
ba395927 991 /* free pgd */
d794dc9b 992 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
993 free_pgtable_page(domain->pgd);
994 domain->pgd = NULL;
995 }
996}
997
ea8ea460
DW
998/* When a page at a given level is being unlinked from its parent, we don't
999 need to *modify* it at all. All we need to do is make a list of all the
1000 pages which can be freed just as soon as we've flushed the IOTLB and we
1001 know the hardware page-walk will no longer touch them.
1002 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1003 be freed. */
1004static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1005 int level, struct dma_pte *pte,
1006 struct page *freelist)
1007{
1008 struct page *pg;
1009
1010 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1011 pg->freelist = freelist;
1012 freelist = pg;
1013
1014 if (level == 1)
1015 return freelist;
1016
adeb2590
JL
1017 pte = page_address(pg);
1018 do {
ea8ea460
DW
1019 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1020 freelist = dma_pte_list_pagetables(domain, level - 1,
1021 pte, freelist);
adeb2590
JL
1022 pte++;
1023 } while (!first_pte_in_page(pte));
ea8ea460
DW
1024
1025 return freelist;
1026}
1027
1028static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1029 struct dma_pte *pte, unsigned long pfn,
1030 unsigned long start_pfn,
1031 unsigned long last_pfn,
1032 struct page *freelist)
1033{
1034 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1035
1036 pfn = max(start_pfn, pfn);
1037 pte = &pte[pfn_level_offset(pfn, level)];
1038
1039 do {
1040 unsigned long level_pfn;
1041
1042 if (!dma_pte_present(pte))
1043 goto next;
1044
1045 level_pfn = pfn & level_mask(level);
1046
1047 /* If range covers entire pagetable, free it */
1048 if (start_pfn <= level_pfn &&
1049 last_pfn >= level_pfn + level_size(level) - 1) {
1050 /* These suborbinate page tables are going away entirely. Don't
1051 bother to clear them; we're just going to *free* them. */
1052 if (level > 1 && !dma_pte_superpage(pte))
1053 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1054
1055 dma_clear_pte(pte);
1056 if (!first_pte)
1057 first_pte = pte;
1058 last_pte = pte;
1059 } else if (level > 1) {
1060 /* Recurse down into a level that isn't *entirely* obsolete */
1061 freelist = dma_pte_clear_level(domain, level - 1,
1062 phys_to_virt(dma_pte_addr(pte)),
1063 level_pfn, start_pfn, last_pfn,
1064 freelist);
1065 }
1066next:
1067 pfn += level_size(level);
1068 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1069
1070 if (first_pte)
1071 domain_flush_cache(domain, first_pte,
1072 (void *)++last_pte - (void *)first_pte);
1073
1074 return freelist;
1075}
1076
1077/* We can't just free the pages because the IOMMU may still be walking
1078 the page tables, and may have cached the intermediate levels. The
1079 pages can only be freed after the IOTLB flush has been done. */
1080struct page *domain_unmap(struct dmar_domain *domain,
1081 unsigned long start_pfn,
1082 unsigned long last_pfn)
1083{
1084 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
1085 struct page *freelist = NULL;
1086
1087 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
1088 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
1089 BUG_ON(start_pfn > last_pfn);
1090
1091 /* we don't need lock here; nobody else touches the iova range */
1092 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1093 domain->pgd, 0, start_pfn, last_pfn, NULL);
1094
1095 /* free pgd */
1096 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1097 struct page *pgd_page = virt_to_page(domain->pgd);
1098 pgd_page->freelist = freelist;
1099 freelist = pgd_page;
1100
1101 domain->pgd = NULL;
1102 }
1103
1104 return freelist;
1105}
1106
1107void dma_free_pagelist(struct page *freelist)
1108{
1109 struct page *pg;
1110
1111 while ((pg = freelist)) {
1112 freelist = pg->freelist;
1113 free_pgtable_page(page_address(pg));
1114 }
1115}
1116
ba395927
KA
1117/* iommu handling */
1118static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1119{
1120 struct root_entry *root;
1121 unsigned long flags;
1122
4c923d47 1123 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
1124 if (!root)
1125 return -ENOMEM;
1126
5b6985ce 1127 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1128
1129 spin_lock_irqsave(&iommu->lock, flags);
1130 iommu->root_entry = root;
1131 spin_unlock_irqrestore(&iommu->lock, flags);
1132
1133 return 0;
1134}
1135
ba395927
KA
1136static void iommu_set_root_entry(struct intel_iommu *iommu)
1137{
1138 void *addr;
c416daa9 1139 u32 sts;
ba395927
KA
1140 unsigned long flag;
1141
1142 addr = iommu->root_entry;
1143
1f5b3c3f 1144 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1145 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
1146
c416daa9 1147 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1148
1149 /* Make sure hardware complete it */
1150 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1151 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1152
1f5b3c3f 1153 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1154}
1155
1156static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1157{
1158 u32 val;
1159 unsigned long flag;
1160
9af88143 1161 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1162 return;
ba395927 1163
1f5b3c3f 1164 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1165 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1166
1167 /* Make sure hardware complete it */
1168 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1169 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1170
1f5b3c3f 1171 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1172}
1173
1174/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1175static void __iommu_flush_context(struct intel_iommu *iommu,
1176 u16 did, u16 source_id, u8 function_mask,
1177 u64 type)
ba395927
KA
1178{
1179 u64 val = 0;
1180 unsigned long flag;
1181
ba395927
KA
1182 switch (type) {
1183 case DMA_CCMD_GLOBAL_INVL:
1184 val = DMA_CCMD_GLOBAL_INVL;
1185 break;
1186 case DMA_CCMD_DOMAIN_INVL:
1187 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1188 break;
1189 case DMA_CCMD_DEVICE_INVL:
1190 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1191 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1192 break;
1193 default:
1194 BUG();
1195 }
1196 val |= DMA_CCMD_ICC;
1197
1f5b3c3f 1198 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1199 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1200
1201 /* Make sure hardware complete it */
1202 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1203 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1204
1f5b3c3f 1205 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1206}
1207
ba395927 1208/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1209static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1210 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1211{
1212 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1213 u64 val = 0, val_iva = 0;
1214 unsigned long flag;
1215
ba395927
KA
1216 switch (type) {
1217 case DMA_TLB_GLOBAL_FLUSH:
1218 /* global flush doesn't need set IVA_REG */
1219 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1220 break;
1221 case DMA_TLB_DSI_FLUSH:
1222 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1223 break;
1224 case DMA_TLB_PSI_FLUSH:
1225 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1226 /* IH bit is passed in as part of address */
ba395927
KA
1227 val_iva = size_order | addr;
1228 break;
1229 default:
1230 BUG();
1231 }
1232 /* Note: set drain read/write */
1233#if 0
1234 /*
1235 * This is probably to be super secure.. Looks like we can
1236 * ignore it without any impact.
1237 */
1238 if (cap_read_drain(iommu->cap))
1239 val |= DMA_TLB_READ_DRAIN;
1240#endif
1241 if (cap_write_drain(iommu->cap))
1242 val |= DMA_TLB_WRITE_DRAIN;
1243
1f5b3c3f 1244 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1245 /* Note: Only uses first TLB reg currently */
1246 if (val_iva)
1247 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1248 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1249
1250 /* Make sure hardware complete it */
1251 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1252 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1253
1f5b3c3f 1254 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1255
1256 /* check IOTLB invalidation granularity */
1257 if (DMA_TLB_IAIG(val) == 0)
1258 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1259 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1260 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1261 (unsigned long long)DMA_TLB_IIRG(type),
1262 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1263}
1264
64ae892b
DW
1265static struct device_domain_info *
1266iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1267 u8 bus, u8 devfn)
93a23a72
YZ
1268{
1269 int found = 0;
1270 unsigned long flags;
1271 struct device_domain_info *info;
0bcb3e28 1272 struct pci_dev *pdev;
93a23a72
YZ
1273
1274 if (!ecap_dev_iotlb_support(iommu->ecap))
1275 return NULL;
1276
1277 if (!iommu->qi)
1278 return NULL;
1279
1280 spin_lock_irqsave(&device_domain_lock, flags);
1281 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1282 if (info->iommu == iommu && info->bus == bus &&
1283 info->devfn == devfn) {
93a23a72
YZ
1284 found = 1;
1285 break;
1286 }
1287 spin_unlock_irqrestore(&device_domain_lock, flags);
1288
0bcb3e28 1289 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1290 return NULL;
1291
0bcb3e28
DW
1292 pdev = to_pci_dev(info->dev);
1293
1294 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1295 return NULL;
1296
0bcb3e28 1297 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1298 return NULL;
1299
93a23a72
YZ
1300 return info;
1301}
1302
1303static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1304{
0bcb3e28 1305 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1306 return;
1307
0bcb3e28 1308 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1309}
1310
1311static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1312{
0bcb3e28
DW
1313 if (!info->dev || !dev_is_pci(info->dev) ||
1314 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1315 return;
1316
0bcb3e28 1317 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1318}
1319
1320static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1321 u64 addr, unsigned mask)
1322{
1323 u16 sid, qdep;
1324 unsigned long flags;
1325 struct device_domain_info *info;
1326
1327 spin_lock_irqsave(&device_domain_lock, flags);
1328 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1329 struct pci_dev *pdev;
1330 if (!info->dev || !dev_is_pci(info->dev))
1331 continue;
1332
1333 pdev = to_pci_dev(info->dev);
1334 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1335 continue;
1336
1337 sid = info->bus << 8 | info->devfn;
0bcb3e28 1338 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1339 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1340 }
1341 spin_unlock_irqrestore(&device_domain_lock, flags);
1342}
1343
1f0ef2aa 1344static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
ea8ea460 1345 unsigned long pfn, unsigned int pages, int ih, int map)
ba395927 1346{
9dd2fe89 1347 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1348 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1349
ba395927
KA
1350 BUG_ON(pages == 0);
1351
ea8ea460
DW
1352 if (ih)
1353 ih = 1 << 6;
ba395927 1354 /*
9dd2fe89
YZ
1355 * Fallback to domain selective flush if no PSI support or the size is
1356 * too big.
ba395927
KA
1357 * PSI requires page size to be 2 ^ x, and the base address is naturally
1358 * aligned to the size
1359 */
9dd2fe89
YZ
1360 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1361 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1362 DMA_TLB_DSI_FLUSH);
9dd2fe89 1363 else
ea8ea460 1364 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1365 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1366
1367 /*
82653633
NA
1368 * In caching mode, changes of pages from non-present to present require
1369 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1370 */
82653633 1371 if (!cap_caching_mode(iommu->cap) || !map)
93a23a72 1372 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1373}
1374
f8bab735 1375static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1376{
1377 u32 pmen;
1378 unsigned long flags;
1379
1f5b3c3f 1380 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1381 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1382 pmen &= ~DMA_PMEN_EPM;
1383 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1384
1385 /* wait for the protected region status bit to clear */
1386 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1387 readl, !(pmen & DMA_PMEN_PRS), pmen);
1388
1f5b3c3f 1389 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1390}
1391
ba395927
KA
1392static int iommu_enable_translation(struct intel_iommu *iommu)
1393{
1394 u32 sts;
1395 unsigned long flags;
1396
1f5b3c3f 1397 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1398 iommu->gcmd |= DMA_GCMD_TE;
1399 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1400
1401 /* Make sure hardware complete it */
1402 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1403 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1404
1f5b3c3f 1405 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1406 return 0;
1407}
1408
1409static int iommu_disable_translation(struct intel_iommu *iommu)
1410{
1411 u32 sts;
1412 unsigned long flag;
1413
1f5b3c3f 1414 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1415 iommu->gcmd &= ~DMA_GCMD_TE;
1416 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1417
1418 /* Make sure hardware complete it */
1419 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1420 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1421
1f5b3c3f 1422 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1423 return 0;
1424}
1425
3460a6d9 1426
ba395927
KA
1427static int iommu_init_domains(struct intel_iommu *iommu)
1428{
1429 unsigned long ndomains;
1430 unsigned long nlongs;
1431
1432 ndomains = cap_ndoms(iommu->cap);
852bdb04
JL
1433 pr_debug("IOMMU%d: Number of Domains supported <%ld>\n",
1434 iommu->seq_id, ndomains);
ba395927
KA
1435 nlongs = BITS_TO_LONGS(ndomains);
1436
94a91b50
DD
1437 spin_lock_init(&iommu->lock);
1438
ba395927
KA
1439 /* TBD: there might be 64K domains,
1440 * consider other allocation for future chip
1441 */
1442 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1443 if (!iommu->domain_ids) {
852bdb04
JL
1444 pr_err("IOMMU%d: allocating domain id array failed\n",
1445 iommu->seq_id);
ba395927
KA
1446 return -ENOMEM;
1447 }
1448 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1449 GFP_KERNEL);
1450 if (!iommu->domains) {
852bdb04
JL
1451 pr_err("IOMMU%d: allocating domain array failed\n",
1452 iommu->seq_id);
1453 kfree(iommu->domain_ids);
1454 iommu->domain_ids = NULL;
ba395927
KA
1455 return -ENOMEM;
1456 }
1457
1458 /*
1459 * if Caching mode is set, then invalid translations are tagged
1460 * with domainid 0. Hence we need to pre-allocate it.
1461 */
1462 if (cap_caching_mode(iommu->cap))
1463 set_bit(0, iommu->domain_ids);
1464 return 0;
1465}
ba395927 1466
a868e6b7 1467static void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1468{
1469 struct dmar_domain *domain;
2a46ddf7 1470 int i;
ba395927 1471
94a91b50 1472 if ((iommu->domains) && (iommu->domain_ids)) {
a45946ab 1473 for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) {
a4eaa86c
JL
1474 /*
1475 * Domain id 0 is reserved for invalid translation
1476 * if hardware supports caching mode.
1477 */
1478 if (cap_caching_mode(iommu->cap) && i == 0)
1479 continue;
1480
94a91b50
DD
1481 domain = iommu->domains[i];
1482 clear_bit(i, iommu->domain_ids);
2a46ddf7 1483 if (domain_detach_iommu(domain, iommu) == 0)
92d03cc8 1484 domain_exit(domain);
5e98c4b1 1485 }
ba395927
KA
1486 }
1487
1488 if (iommu->gcmd & DMA_GCMD_TE)
1489 iommu_disable_translation(iommu);
1490
ba395927
KA
1491 kfree(iommu->domains);
1492 kfree(iommu->domain_ids);
a868e6b7
JL
1493 iommu->domains = NULL;
1494 iommu->domain_ids = NULL;
ba395927 1495
d9630fe9
WH
1496 g_iommus[iommu->seq_id] = NULL;
1497
ba395927
KA
1498 /* free context mapping */
1499 free_context_table(iommu);
ba395927
KA
1500}
1501
ab8dfe25 1502static struct dmar_domain *alloc_domain(int flags)
ba395927 1503{
92d03cc8
JL
1504 /* domain id for virtual machine, it won't be set in context */
1505 static atomic_t vm_domid = ATOMIC_INIT(0);
ba395927 1506 struct dmar_domain *domain;
ba395927
KA
1507
1508 domain = alloc_domain_mem();
1509 if (!domain)
1510 return NULL;
1511
ab8dfe25 1512 memset(domain, 0, sizeof(*domain));
4c923d47 1513 domain->nid = -1;
ab8dfe25 1514 domain->flags = flags;
92d03cc8
JL
1515 spin_lock_init(&domain->iommu_lock);
1516 INIT_LIST_HEAD(&domain->devices);
ab8dfe25 1517 if (flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
92d03cc8 1518 domain->id = atomic_inc_return(&vm_domid);
2c2e2c38
FY
1519
1520 return domain;
1521}
1522
fb170fb4
JL
1523static int __iommu_attach_domain(struct dmar_domain *domain,
1524 struct intel_iommu *iommu)
2c2e2c38
FY
1525{
1526 int num;
1527 unsigned long ndomains;
2c2e2c38 1528
ba395927 1529 ndomains = cap_ndoms(iommu->cap);
ba395927 1530 num = find_first_zero_bit(iommu->domain_ids, ndomains);
fb170fb4
JL
1531 if (num < ndomains) {
1532 set_bit(num, iommu->domain_ids);
1533 iommu->domains[num] = domain;
1534 } else {
1535 num = -ENOSPC;
ba395927
KA
1536 }
1537
fb170fb4
JL
1538 return num;
1539}
1540
1541static int iommu_attach_domain(struct dmar_domain *domain,
1542 struct intel_iommu *iommu)
1543{
1544 int num;
1545 unsigned long flags;
1546
1547 spin_lock_irqsave(&iommu->lock, flags);
1548 num = __iommu_attach_domain(domain, iommu);
44bde614 1549 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4
JL
1550 if (num < 0)
1551 pr_err("IOMMU: no free domain ids\n");
ba395927 1552
fb170fb4 1553 return num;
ba395927
KA
1554}
1555
44bde614
JL
1556static int iommu_attach_vm_domain(struct dmar_domain *domain,
1557 struct intel_iommu *iommu)
1558{
1559 int num;
1560 unsigned long ndomains;
1561
1562 ndomains = cap_ndoms(iommu->cap);
1563 for_each_set_bit(num, iommu->domain_ids, ndomains)
1564 if (iommu->domains[num] == domain)
1565 return num;
1566
1567 return __iommu_attach_domain(domain, iommu);
1568}
1569
2c2e2c38
FY
1570static void iommu_detach_domain(struct dmar_domain *domain,
1571 struct intel_iommu *iommu)
ba395927
KA
1572{
1573 unsigned long flags;
2c2e2c38 1574 int num, ndomains;
ba395927 1575
8c11e798 1576 spin_lock_irqsave(&iommu->lock, flags);
fb170fb4
JL
1577 if (domain_type_is_vm_or_si(domain)) {
1578 ndomains = cap_ndoms(iommu->cap);
1579 for_each_set_bit(num, iommu->domain_ids, ndomains) {
1580 if (iommu->domains[num] == domain) {
1581 clear_bit(num, iommu->domain_ids);
1582 iommu->domains[num] = NULL;
1583 break;
1584 }
2c2e2c38 1585 }
fb170fb4
JL
1586 } else {
1587 clear_bit(domain->id, iommu->domain_ids);
1588 iommu->domains[domain->id] = NULL;
2c2e2c38 1589 }
8c11e798 1590 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1591}
1592
fb170fb4
JL
1593static void domain_attach_iommu(struct dmar_domain *domain,
1594 struct intel_iommu *iommu)
1595{
1596 unsigned long flags;
1597
1598 spin_lock_irqsave(&domain->iommu_lock, flags);
1599 if (!test_and_set_bit(iommu->seq_id, domain->iommu_bmp)) {
1600 domain->iommu_count++;
1601 if (domain->iommu_count == 1)
1602 domain->nid = iommu->node;
1603 domain_update_iommu_cap(domain);
1604 }
1605 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1606}
1607
1608static int domain_detach_iommu(struct dmar_domain *domain,
1609 struct intel_iommu *iommu)
1610{
1611 unsigned long flags;
1612 int count = INT_MAX;
1613
1614 spin_lock_irqsave(&domain->iommu_lock, flags);
1615 if (test_and_clear_bit(iommu->seq_id, domain->iommu_bmp)) {
1616 count = --domain->iommu_count;
1617 domain_update_iommu_cap(domain);
1618 }
1619 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1620
1621 return count;
1622}
1623
ba395927 1624static struct iova_domain reserved_iova_list;
8a443df4 1625static struct lock_class_key reserved_rbtree_key;
ba395927 1626
51a63e67 1627static int dmar_init_reserved_ranges(void)
ba395927
KA
1628{
1629 struct pci_dev *pdev = NULL;
1630 struct iova *iova;
1631 int i;
ba395927 1632
f661197e 1633 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1634
8a443df4
MG
1635 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1636 &reserved_rbtree_key);
1637
ba395927
KA
1638 /* IOAPIC ranges shouldn't be accessed by DMA */
1639 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1640 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1641 if (!iova) {
ba395927 1642 printk(KERN_ERR "Reserve IOAPIC range failed\n");
51a63e67
JC
1643 return -ENODEV;
1644 }
ba395927
KA
1645
1646 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1647 for_each_pci_dev(pdev) {
1648 struct resource *r;
1649
1650 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1651 r = &pdev->resource[i];
1652 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1653 continue;
1a4a4551
DW
1654 iova = reserve_iova(&reserved_iova_list,
1655 IOVA_PFN(r->start),
1656 IOVA_PFN(r->end));
51a63e67 1657 if (!iova) {
ba395927 1658 printk(KERN_ERR "Reserve iova failed\n");
51a63e67
JC
1659 return -ENODEV;
1660 }
ba395927
KA
1661 }
1662 }
51a63e67 1663 return 0;
ba395927
KA
1664}
1665
1666static void domain_reserve_special_ranges(struct dmar_domain *domain)
1667{
1668 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1669}
1670
1671static inline int guestwidth_to_adjustwidth(int gaw)
1672{
1673 int agaw;
1674 int r = (gaw - 12) % 9;
1675
1676 if (r == 0)
1677 agaw = gaw;
1678 else
1679 agaw = gaw + 9 - r;
1680 if (agaw > 64)
1681 agaw = 64;
1682 return agaw;
1683}
1684
1685static int domain_init(struct dmar_domain *domain, int guest_width)
1686{
1687 struct intel_iommu *iommu;
1688 int adjust_width, agaw;
1689 unsigned long sagaw;
1690
f661197e 1691 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1692 domain_reserve_special_ranges(domain);
1693
1694 /* calculate AGAW */
8c11e798 1695 iommu = domain_get_iommu(domain);
ba395927
KA
1696 if (guest_width > cap_mgaw(iommu->cap))
1697 guest_width = cap_mgaw(iommu->cap);
1698 domain->gaw = guest_width;
1699 adjust_width = guestwidth_to_adjustwidth(guest_width);
1700 agaw = width_to_agaw(adjust_width);
1701 sagaw = cap_sagaw(iommu->cap);
1702 if (!test_bit(agaw, &sagaw)) {
1703 /* hardware doesn't support it, choose a bigger one */
1704 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1705 agaw = find_next_bit(&sagaw, 5, agaw);
1706 if (agaw >= 5)
1707 return -ENODEV;
1708 }
1709 domain->agaw = agaw;
ba395927 1710
8e604097
WH
1711 if (ecap_coherent(iommu->ecap))
1712 domain->iommu_coherency = 1;
1713 else
1714 domain->iommu_coherency = 0;
1715
58c610bd
SY
1716 if (ecap_sc_support(iommu->ecap))
1717 domain->iommu_snooping = 1;
1718 else
1719 domain->iommu_snooping = 0;
1720
214e39aa
DW
1721 if (intel_iommu_superpage)
1722 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1723 else
1724 domain->iommu_superpage = 0;
1725
4c923d47 1726 domain->nid = iommu->node;
c7151a8d 1727
ba395927 1728 /* always allocate the top pgd */
4c923d47 1729 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1730 if (!domain->pgd)
1731 return -ENOMEM;
5b6985ce 1732 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1733 return 0;
1734}
1735
1736static void domain_exit(struct dmar_domain *domain)
1737{
2c2e2c38
FY
1738 struct dmar_drhd_unit *drhd;
1739 struct intel_iommu *iommu;
ea8ea460 1740 struct page *freelist = NULL;
ba395927
KA
1741
1742 /* Domain 0 is reserved, so dont process it */
1743 if (!domain)
1744 return;
1745
7b668357
AW
1746 /* Flush any lazy unmaps that may reference this domain */
1747 if (!intel_iommu_strict)
1748 flush_unmaps_timeout(0);
1749
92d03cc8 1750 /* remove associated devices */
ba395927 1751 domain_remove_dev_info(domain);
92d03cc8 1752
ba395927
KA
1753 /* destroy iovas */
1754 put_iova_domain(&domain->iovad);
ba395927 1755
ea8ea460 1756 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1757
92d03cc8 1758 /* clear attached or cached domains */
0e242612 1759 rcu_read_lock();
2c2e2c38 1760 for_each_active_iommu(iommu, drhd)
fb170fb4 1761 iommu_detach_domain(domain, iommu);
0e242612 1762 rcu_read_unlock();
2c2e2c38 1763
ea8ea460
DW
1764 dma_free_pagelist(freelist);
1765
ba395927
KA
1766 free_domain_mem(domain);
1767}
1768
64ae892b
DW
1769static int domain_context_mapping_one(struct dmar_domain *domain,
1770 struct intel_iommu *iommu,
1771 u8 bus, u8 devfn, int translation)
ba395927
KA
1772{
1773 struct context_entry *context;
ba395927 1774 unsigned long flags;
ea6606b0 1775 struct dma_pte *pgd;
ea6606b0
WH
1776 int id;
1777 int agaw;
93a23a72 1778 struct device_domain_info *info = NULL;
ba395927
KA
1779
1780 pr_debug("Set context mapping for %02x:%02x.%d\n",
1781 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1782
ba395927 1783 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1784 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1785 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1786
ba395927
KA
1787 context = device_to_context_entry(iommu, bus, devfn);
1788 if (!context)
1789 return -ENOMEM;
1790 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1791 if (context_present(context)) {
ba395927
KA
1792 spin_unlock_irqrestore(&iommu->lock, flags);
1793 return 0;
1794 }
1795
ea6606b0
WH
1796 id = domain->id;
1797 pgd = domain->pgd;
1798
ab8dfe25 1799 if (domain_type_is_vm_or_si(domain)) {
44bde614
JL
1800 if (domain_type_is_vm(domain)) {
1801 id = iommu_attach_vm_domain(domain, iommu);
fb170fb4 1802 if (id < 0) {
ea6606b0 1803 spin_unlock_irqrestore(&iommu->lock, flags);
fb170fb4 1804 pr_err("IOMMU: no free domain ids\n");
ea6606b0
WH
1805 return -EFAULT;
1806 }
ea6606b0
WH
1807 }
1808
1809 /* Skip top levels of page tables for
1810 * iommu which has less agaw than default.
1672af11 1811 * Unnecessary for PT mode.
ea6606b0 1812 */
1672af11
CW
1813 if (translation != CONTEXT_TT_PASS_THROUGH) {
1814 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1815 pgd = phys_to_virt(dma_pte_addr(pgd));
1816 if (!dma_pte_present(pgd)) {
1817 spin_unlock_irqrestore(&iommu->lock, flags);
1818 return -ENOMEM;
1819 }
ea6606b0
WH
1820 }
1821 }
1822 }
1823
1824 context_set_domain_id(context, id);
4ed0d3e6 1825
93a23a72 1826 if (translation != CONTEXT_TT_PASS_THROUGH) {
64ae892b 1827 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1828 translation = info ? CONTEXT_TT_DEV_IOTLB :
1829 CONTEXT_TT_MULTI_LEVEL;
1830 }
4ed0d3e6
FY
1831 /*
1832 * In pass through mode, AW must be programmed to indicate the largest
1833 * AGAW value supported by hardware. And ASR is ignored by hardware.
1834 */
93a23a72 1835 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1836 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1837 else {
1838 context_set_address_root(context, virt_to_phys(pgd));
1839 context_set_address_width(context, iommu->agaw);
1840 }
4ed0d3e6
FY
1841
1842 context_set_translation_type(context, translation);
c07e7d21
MM
1843 context_set_fault_enable(context);
1844 context_set_present(context);
5331fe6f 1845 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1846
4c25a2c1
DW
1847 /*
1848 * It's a non-present to present mapping. If hardware doesn't cache
1849 * non-present entry we only need to flush the write-buffer. If the
1850 * _does_ cache non-present entries, then it does so in the special
1851 * domain #0, which we have to flush:
1852 */
1853 if (cap_caching_mode(iommu->cap)) {
1854 iommu->flush.flush_context(iommu, 0,
1855 (((u16)bus) << 8) | devfn,
1856 DMA_CCMD_MASK_NOBIT,
1857 DMA_CCMD_DEVICE_INVL);
18fd779a 1858 iommu->flush.flush_iotlb(iommu, id, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1859 } else {
ba395927 1860 iommu_flush_write_buffer(iommu);
4c25a2c1 1861 }
93a23a72 1862 iommu_enable_dev_iotlb(info);
ba395927 1863 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d 1864
fb170fb4
JL
1865 domain_attach_iommu(domain, iommu);
1866
ba395927
KA
1867 return 0;
1868}
1869
579305f7
AW
1870struct domain_context_mapping_data {
1871 struct dmar_domain *domain;
1872 struct intel_iommu *iommu;
1873 int translation;
1874};
1875
1876static int domain_context_mapping_cb(struct pci_dev *pdev,
1877 u16 alias, void *opaque)
1878{
1879 struct domain_context_mapping_data *data = opaque;
1880
1881 return domain_context_mapping_one(data->domain, data->iommu,
1882 PCI_BUS_NUM(alias), alias & 0xff,
1883 data->translation);
1884}
1885
ba395927 1886static int
e1f167f3
DW
1887domain_context_mapping(struct dmar_domain *domain, struct device *dev,
1888 int translation)
ba395927 1889{
64ae892b 1890 struct intel_iommu *iommu;
156baca8 1891 u8 bus, devfn;
579305f7 1892 struct domain_context_mapping_data data;
64ae892b 1893
e1f167f3 1894 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1895 if (!iommu)
1896 return -ENODEV;
ba395927 1897
579305f7
AW
1898 if (!dev_is_pci(dev))
1899 return domain_context_mapping_one(domain, iommu, bus, devfn,
4ed0d3e6 1900 translation);
579305f7
AW
1901
1902 data.domain = domain;
1903 data.iommu = iommu;
1904 data.translation = translation;
1905
1906 return pci_for_each_dma_alias(to_pci_dev(dev),
1907 &domain_context_mapping_cb, &data);
1908}
1909
1910static int domain_context_mapped_cb(struct pci_dev *pdev,
1911 u16 alias, void *opaque)
1912{
1913 struct intel_iommu *iommu = opaque;
1914
1915 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
1916}
1917
e1f167f3 1918static int domain_context_mapped(struct device *dev)
ba395927 1919{
5331fe6f 1920 struct intel_iommu *iommu;
156baca8 1921 u8 bus, devfn;
5331fe6f 1922
e1f167f3 1923 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
1924 if (!iommu)
1925 return -ENODEV;
ba395927 1926
579305f7
AW
1927 if (!dev_is_pci(dev))
1928 return device_context_mapped(iommu, bus, devfn);
e1f167f3 1929
579305f7
AW
1930 return !pci_for_each_dma_alias(to_pci_dev(dev),
1931 domain_context_mapped_cb, iommu);
ba395927
KA
1932}
1933
f532959b
FY
1934/* Returns a number of VTD pages, but aligned to MM page size */
1935static inline unsigned long aligned_nrpages(unsigned long host_addr,
1936 size_t size)
1937{
1938 host_addr &= ~PAGE_MASK;
1939 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1940}
1941
6dd9a7c7
YS
1942/* Return largest possible superpage level for a given mapping */
1943static inline int hardware_largepage_caps(struct dmar_domain *domain,
1944 unsigned long iov_pfn,
1945 unsigned long phy_pfn,
1946 unsigned long pages)
1947{
1948 int support, level = 1;
1949 unsigned long pfnmerge;
1950
1951 support = domain->iommu_superpage;
1952
1953 /* To use a large page, the virtual *and* physical addresses
1954 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
1955 of them will mean we have to use smaller pages. So just
1956 merge them and check both at once. */
1957 pfnmerge = iov_pfn | phy_pfn;
1958
1959 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
1960 pages >>= VTD_STRIDE_SHIFT;
1961 if (!pages)
1962 break;
1963 pfnmerge >>= VTD_STRIDE_SHIFT;
1964 level++;
1965 support--;
1966 }
1967 return level;
1968}
1969
9051aa02
DW
1970static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1971 struct scatterlist *sg, unsigned long phys_pfn,
1972 unsigned long nr_pages, int prot)
e1605495
DW
1973{
1974 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1975 phys_addr_t uninitialized_var(pteval);
e1605495 1976 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1977 unsigned long sg_res;
6dd9a7c7
YS
1978 unsigned int largepage_lvl = 0;
1979 unsigned long lvl_pages = 0;
e1605495
DW
1980
1981 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1982
1983 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1984 return -EINVAL;
1985
1986 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1987
9051aa02
DW
1988 if (sg)
1989 sg_res = 0;
1990 else {
1991 sg_res = nr_pages + 1;
1992 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1993 }
1994
6dd9a7c7 1995 while (nr_pages > 0) {
c85994e4
DW
1996 uint64_t tmp;
1997
e1605495 1998 if (!sg_res) {
f532959b 1999 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2000 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2001 sg->dma_length = sg->length;
2002 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2003 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2004 }
6dd9a7c7 2005
e1605495 2006 if (!pte) {
6dd9a7c7
YS
2007 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2008
5cf0a76f 2009 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2010 if (!pte)
2011 return -ENOMEM;
6dd9a7c7 2012 /* It is large page*/
6491d4d0 2013 if (largepage_lvl > 1) {
6dd9a7c7 2014 pteval |= DMA_PTE_LARGE_PAGE;
6491d4d0
WD
2015 /* Ensure that old small page tables are removed to make room
2016 for superpage, if they exist. */
2017 dma_pte_clear_range(domain, iov_pfn,
2018 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2019 dma_pte_free_pagetable(domain, iov_pfn,
2020 iov_pfn + lvl_to_nr_pages(largepage_lvl) - 1);
2021 } else {
6dd9a7c7 2022 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2023 }
6dd9a7c7 2024
e1605495
DW
2025 }
2026 /* We don't need lock here, nobody else
2027 * touches the iova range
2028 */
7766a3fb 2029 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2030 if (tmp) {
1bf20f0d 2031 static int dumps = 5;
c85994e4
DW
2032 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2033 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2034 if (dumps) {
2035 dumps--;
2036 debug_dma_dump_mappings(NULL);
2037 }
2038 WARN_ON(1);
2039 }
6dd9a7c7
YS
2040
2041 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2042
2043 BUG_ON(nr_pages < lvl_pages);
2044 BUG_ON(sg_res < lvl_pages);
2045
2046 nr_pages -= lvl_pages;
2047 iov_pfn += lvl_pages;
2048 phys_pfn += lvl_pages;
2049 pteval += lvl_pages * VTD_PAGE_SIZE;
2050 sg_res -= lvl_pages;
2051
2052 /* If the next PTE would be the first in a new page, then we
2053 need to flush the cache on the entries we've just written.
2054 And then we'll need to recalculate 'pte', so clear it and
2055 let it get set again in the if (!pte) block above.
2056
2057 If we're done (!nr_pages) we need to flush the cache too.
2058
2059 Also if we've been setting superpages, we may need to
2060 recalculate 'pte' and switch back to smaller pages for the
2061 end of the mapping, if the trailing size is not enough to
2062 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2063 pte++;
6dd9a7c7
YS
2064 if (!nr_pages || first_pte_in_page(pte) ||
2065 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2066 domain_flush_cache(domain, first_pte,
2067 (void *)pte - (void *)first_pte);
2068 pte = NULL;
2069 }
6dd9a7c7
YS
2070
2071 if (!sg_res && nr_pages)
e1605495
DW
2072 sg = sg_next(sg);
2073 }
2074 return 0;
2075}
2076
9051aa02
DW
2077static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2078 struct scatterlist *sg, unsigned long nr_pages,
2079 int prot)
ba395927 2080{
9051aa02
DW
2081 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2082}
6f6a00e4 2083
9051aa02
DW
2084static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2085 unsigned long phys_pfn, unsigned long nr_pages,
2086 int prot)
2087{
2088 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2089}
2090
c7151a8d 2091static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2092{
c7151a8d
WH
2093 if (!iommu)
2094 return;
8c11e798
WH
2095
2096 clear_context_table(iommu, bus, devfn);
2097 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2098 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2099 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2100}
2101
109b9b04
DW
2102static inline void unlink_domain_info(struct device_domain_info *info)
2103{
2104 assert_spin_locked(&device_domain_lock);
2105 list_del(&info->link);
2106 list_del(&info->global);
2107 if (info->dev)
0bcb3e28 2108 info->dev->archdata.iommu = NULL;
109b9b04
DW
2109}
2110
ba395927
KA
2111static void domain_remove_dev_info(struct dmar_domain *domain)
2112{
3a74ca01 2113 struct device_domain_info *info, *tmp;
fb170fb4 2114 unsigned long flags;
ba395927
KA
2115
2116 spin_lock_irqsave(&device_domain_lock, flags);
3a74ca01 2117 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
109b9b04 2118 unlink_domain_info(info);
ba395927
KA
2119 spin_unlock_irqrestore(&device_domain_lock, flags);
2120
93a23a72 2121 iommu_disable_dev_iotlb(info);
7c7faa11 2122 iommu_detach_dev(info->iommu, info->bus, info->devfn);
ba395927 2123
ab8dfe25 2124 if (domain_type_is_vm(domain)) {
7c7faa11 2125 iommu_detach_dependent_devices(info->iommu, info->dev);
fb170fb4 2126 domain_detach_iommu(domain, info->iommu);
92d03cc8
JL
2127 }
2128
2129 free_devinfo_mem(info);
ba395927
KA
2130 spin_lock_irqsave(&device_domain_lock, flags);
2131 }
2132 spin_unlock_irqrestore(&device_domain_lock, flags);
2133}
2134
2135/*
2136 * find_domain
1525a29a 2137 * Note: we use struct device->archdata.iommu stores the info
ba395927 2138 */
1525a29a 2139static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2140{
2141 struct device_domain_info *info;
2142
2143 /* No lock here, assumes no domain exit in normal case */
1525a29a 2144 info = dev->archdata.iommu;
ba395927
KA
2145 if (info)
2146 return info->domain;
2147 return NULL;
2148}
2149
5a8f40e8 2150static inline struct device_domain_info *
745f2586
JL
2151dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2152{
2153 struct device_domain_info *info;
2154
2155 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2156 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2157 info->devfn == devfn)
5a8f40e8 2158 return info;
745f2586
JL
2159
2160 return NULL;
2161}
2162
5a8f40e8 2163static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu,
41e80dca 2164 int bus, int devfn,
b718cd3d
DW
2165 struct device *dev,
2166 struct dmar_domain *domain)
745f2586 2167{
5a8f40e8 2168 struct dmar_domain *found = NULL;
745f2586
JL
2169 struct device_domain_info *info;
2170 unsigned long flags;
2171
2172 info = alloc_devinfo_mem();
2173 if (!info)
b718cd3d 2174 return NULL;
745f2586 2175
745f2586
JL
2176 info->bus = bus;
2177 info->devfn = devfn;
2178 info->dev = dev;
2179 info->domain = domain;
5a8f40e8 2180 info->iommu = iommu;
745f2586
JL
2181
2182 spin_lock_irqsave(&device_domain_lock, flags);
2183 if (dev)
0bcb3e28 2184 found = find_domain(dev);
5a8f40e8
DW
2185 else {
2186 struct device_domain_info *info2;
41e80dca 2187 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2188 if (info2)
2189 found = info2->domain;
2190 }
745f2586
JL
2191 if (found) {
2192 spin_unlock_irqrestore(&device_domain_lock, flags);
2193 free_devinfo_mem(info);
b718cd3d
DW
2194 /* Caller must free the original domain */
2195 return found;
745f2586
JL
2196 }
2197
b718cd3d
DW
2198 list_add(&info->link, &domain->devices);
2199 list_add(&info->global, &device_domain_list);
2200 if (dev)
2201 dev->archdata.iommu = info;
2202 spin_unlock_irqrestore(&device_domain_lock, flags);
2203
2204 return domain;
745f2586
JL
2205}
2206
579305f7
AW
2207static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2208{
2209 *(u16 *)opaque = alias;
2210 return 0;
2211}
2212
ba395927 2213/* domain is initialized */
146922ec 2214static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2215{
579305f7
AW
2216 struct dmar_domain *domain, *tmp;
2217 struct intel_iommu *iommu;
5a8f40e8 2218 struct device_domain_info *info;
579305f7 2219 u16 dma_alias;
ba395927 2220 unsigned long flags;
aa4d066a 2221 u8 bus, devfn;
ba395927 2222
146922ec 2223 domain = find_domain(dev);
ba395927
KA
2224 if (domain)
2225 return domain;
2226
579305f7
AW
2227 iommu = device_to_iommu(dev, &bus, &devfn);
2228 if (!iommu)
2229 return NULL;
2230
146922ec
DW
2231 if (dev_is_pci(dev)) {
2232 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2233
579305f7
AW
2234 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2235
2236 spin_lock_irqsave(&device_domain_lock, flags);
2237 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2238 PCI_BUS_NUM(dma_alias),
2239 dma_alias & 0xff);
2240 if (info) {
2241 iommu = info->iommu;
2242 domain = info->domain;
5a8f40e8 2243 }
579305f7 2244 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2245
579305f7
AW
2246 /* DMA alias already has a domain, uses it */
2247 if (info)
2248 goto found_domain;
2249 }
ba395927 2250
146922ec 2251 /* Allocate and initialize new domain for the device */
ab8dfe25 2252 domain = alloc_domain(0);
745f2586 2253 if (!domain)
579305f7 2254 return NULL;
44bde614
JL
2255 domain->id = iommu_attach_domain(domain, iommu);
2256 if (domain->id < 0) {
2fe9723d 2257 free_domain_mem(domain);
579305f7 2258 return NULL;
2c2e2c38 2259 }
fb170fb4 2260 domain_attach_iommu(domain, iommu);
579305f7
AW
2261 if (domain_init(domain, gaw)) {
2262 domain_exit(domain);
2263 return NULL;
2c2e2c38 2264 }
ba395927 2265
579305f7
AW
2266 /* register PCI DMA alias device */
2267 if (dev_is_pci(dev)) {
2268 tmp = dmar_insert_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2269 dma_alias & 0xff, NULL, domain);
2270
2271 if (!tmp || tmp != domain) {
2272 domain_exit(domain);
2273 domain = tmp;
2274 }
2275
b718cd3d 2276 if (!domain)
579305f7 2277 return NULL;
ba395927
KA
2278 }
2279
2280found_domain:
579305f7
AW
2281 tmp = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
2282
2283 if (!tmp || tmp != domain) {
2284 domain_exit(domain);
2285 domain = tmp;
2286 }
b718cd3d
DW
2287
2288 return domain;
ba395927
KA
2289}
2290
2c2e2c38 2291static int iommu_identity_mapping;
e0fc7e0b
DW
2292#define IDENTMAP_ALL 1
2293#define IDENTMAP_GFX 2
2294#define IDENTMAP_AZALIA 4
2c2e2c38 2295
b213203e
DW
2296static int iommu_domain_identity_map(struct dmar_domain *domain,
2297 unsigned long long start,
2298 unsigned long long end)
ba395927 2299{
c5395d5c
DW
2300 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2301 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2302
2303 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2304 dma_to_mm_pfn(last_vpfn))) {
ba395927 2305 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 2306 return -ENOMEM;
ba395927
KA
2307 }
2308
c5395d5c
DW
2309 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
2310 start, end, domain->id);
ba395927
KA
2311 /*
2312 * RMRR range might have overlap with physical memory range,
2313 * clear it first
2314 */
c5395d5c 2315 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2316
c5395d5c
DW
2317 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2318 last_vpfn - first_vpfn + 1,
61df7443 2319 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2320}
2321
0b9d9753 2322static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2323 unsigned long long start,
2324 unsigned long long end)
2325{
2326 struct dmar_domain *domain;
2327 int ret;
2328
0b9d9753 2329 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2330 if (!domain)
2331 return -ENOMEM;
2332
19943b0e
DW
2333 /* For _hardware_ passthrough, don't bother. But for software
2334 passthrough, we do it anyway -- it may indicate a memory
2335 range which is reserved in E820, so which didn't get set
2336 up to start with in si_domain */
2337 if (domain == si_domain && hw_pass_through) {
2338 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2339 dev_name(dev), start, end);
19943b0e
DW
2340 return 0;
2341 }
2342
2343 printk(KERN_INFO
2344 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
0b9d9753 2345 dev_name(dev), start, end);
2ff729f5 2346
5595b528
DW
2347 if (end < start) {
2348 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2349 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2350 dmi_get_system_info(DMI_BIOS_VENDOR),
2351 dmi_get_system_info(DMI_BIOS_VERSION),
2352 dmi_get_system_info(DMI_PRODUCT_VERSION));
2353 ret = -EIO;
2354 goto error;
2355 }
2356
2ff729f5
DW
2357 if (end >> agaw_to_width(domain->agaw)) {
2358 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2359 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2360 agaw_to_width(domain->agaw),
2361 dmi_get_system_info(DMI_BIOS_VENDOR),
2362 dmi_get_system_info(DMI_BIOS_VERSION),
2363 dmi_get_system_info(DMI_PRODUCT_VERSION));
2364 ret = -EIO;
2365 goto error;
2366 }
19943b0e 2367
b213203e 2368 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2369 if (ret)
2370 goto error;
2371
2372 /* context entry init */
0b9d9753 2373 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2374 if (ret)
2375 goto error;
2376
2377 return 0;
2378
2379 error:
ba395927
KA
2380 domain_exit(domain);
2381 return ret;
ba395927
KA
2382}
2383
2384static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2385 struct device *dev)
ba395927 2386{
0b9d9753 2387 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2388 return 0;
0b9d9753
DW
2389 return iommu_prepare_identity_map(dev, rmrr->base_address,
2390 rmrr->end_address);
ba395927
KA
2391}
2392
d3f13810 2393#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2394static inline void iommu_prepare_isa(void)
2395{
2396 struct pci_dev *pdev;
2397 int ret;
2398
2399 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2400 if (!pdev)
2401 return;
2402
c7ab48d2 2403 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2404 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2405
2406 if (ret)
c7ab48d2
DW
2407 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2408 "floppy might not work\n");
49a0429e 2409
9b27e82d 2410 pci_dev_put(pdev);
49a0429e
KA
2411}
2412#else
2413static inline void iommu_prepare_isa(void)
2414{
2415 return;
2416}
d3f13810 2417#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2418
2c2e2c38 2419static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2420
071e1374 2421static int __init si_domain_init(int hw)
2c2e2c38
FY
2422{
2423 struct dmar_drhd_unit *drhd;
2424 struct intel_iommu *iommu;
c7ab48d2 2425 int nid, ret = 0;
44bde614 2426 bool first = true;
2c2e2c38 2427
ab8dfe25 2428 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2429 if (!si_domain)
2430 return -EFAULT;
2431
2c2e2c38
FY
2432 for_each_active_iommu(iommu, drhd) {
2433 ret = iommu_attach_domain(si_domain, iommu);
fb170fb4 2434 if (ret < 0) {
2c2e2c38
FY
2435 domain_exit(si_domain);
2436 return -EFAULT;
44bde614
JL
2437 } else if (first) {
2438 si_domain->id = ret;
2439 first = false;
2440 } else if (si_domain->id != ret) {
2441 domain_exit(si_domain);
2442 return -EFAULT;
2c2e2c38 2443 }
fb170fb4 2444 domain_attach_iommu(si_domain, iommu);
2c2e2c38
FY
2445 }
2446
2447 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2448 domain_exit(si_domain);
2449 return -EFAULT;
2450 }
2451
9544c003
JL
2452 pr_debug("IOMMU: identity mapping domain is domain %d\n",
2453 si_domain->id);
2c2e2c38 2454
19943b0e
DW
2455 if (hw)
2456 return 0;
2457
c7ab48d2 2458 for_each_online_node(nid) {
5dfe8660
TH
2459 unsigned long start_pfn, end_pfn;
2460 int i;
2461
2462 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2463 ret = iommu_domain_identity_map(si_domain,
2464 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2465 if (ret)
2466 return ret;
2467 }
c7ab48d2
DW
2468 }
2469
2c2e2c38
FY
2470 return 0;
2471}
2472
9b226624 2473static int identity_mapping(struct device *dev)
2c2e2c38
FY
2474{
2475 struct device_domain_info *info;
2476
2477 if (likely(!iommu_identity_mapping))
2478 return 0;
2479
9b226624 2480 info = dev->archdata.iommu;
cb452a40
MT
2481 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2482 return (info->domain == si_domain);
2c2e2c38 2483
2c2e2c38
FY
2484 return 0;
2485}
2486
2487static int domain_add_dev_info(struct dmar_domain *domain,
5913c9bf 2488 struct device *dev, int translation)
2c2e2c38 2489{
0ac72664 2490 struct dmar_domain *ndomain;
5a8f40e8 2491 struct intel_iommu *iommu;
156baca8 2492 u8 bus, devfn;
5fe60f4e 2493 int ret;
2c2e2c38 2494
5913c9bf 2495 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2496 if (!iommu)
2497 return -ENODEV;
2498
5913c9bf 2499 ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2500 if (ndomain != domain)
2501 return -EBUSY;
2c2e2c38 2502
5913c9bf 2503 ret = domain_context_mapping(domain, dev, translation);
e2ad23d0 2504 if (ret) {
5913c9bf 2505 domain_remove_one_dev_info(domain, dev);
e2ad23d0
DW
2506 return ret;
2507 }
2508
2c2e2c38
FY
2509 return 0;
2510}
2511
0b9d9753 2512static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2513{
2514 struct dmar_rmrr_unit *rmrr;
832bd858 2515 struct device *tmp;
ea2447f7
TM
2516 int i;
2517
0e242612 2518 rcu_read_lock();
ea2447f7 2519 for_each_rmrr_units(rmrr) {
b683b230
JL
2520 /*
2521 * Return TRUE if this RMRR contains the device that
2522 * is passed in.
2523 */
2524 for_each_active_dev_scope(rmrr->devices,
2525 rmrr->devices_cnt, i, tmp)
0b9d9753 2526 if (tmp == dev) {
0e242612 2527 rcu_read_unlock();
ea2447f7 2528 return true;
b683b230 2529 }
ea2447f7 2530 }
0e242612 2531 rcu_read_unlock();
ea2447f7
TM
2532 return false;
2533}
2534
3bdb2591 2535static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2536{
ea2447f7 2537
3bdb2591
DW
2538 if (dev_is_pci(dev)) {
2539 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2540
3bdb2591
DW
2541 /*
2542 * We want to prevent any device associated with an RMRR from
2543 * getting placed into the SI Domain. This is done because
2544 * problems exist when devices are moved in and out of domains
2545 * and their respective RMRR info is lost. We exempt USB devices
2546 * from this process due to their usage of RMRRs that are known
2547 * to not be needed after BIOS hand-off to OS.
2548 */
2549 if (device_has_rmrr(dev) &&
2550 (pdev->class >> 8) != PCI_CLASS_SERIAL_USB)
2551 return 0;
e0fc7e0b 2552
3bdb2591
DW
2553 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2554 return 1;
e0fc7e0b 2555
3bdb2591
DW
2556 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2557 return 1;
6941af28 2558
3bdb2591 2559 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2560 return 0;
3bdb2591
DW
2561
2562 /*
2563 * We want to start off with all devices in the 1:1 domain, and
2564 * take them out later if we find they can't access all of memory.
2565 *
2566 * However, we can't do this for PCI devices behind bridges,
2567 * because all PCI devices behind the same bridge will end up
2568 * with the same source-id on their transactions.
2569 *
2570 * Practically speaking, we can't change things around for these
2571 * devices at run-time, because we can't be sure there'll be no
2572 * DMA transactions in flight for any of their siblings.
2573 *
2574 * So PCI devices (unless they're on the root bus) as well as
2575 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2576 * the 1:1 domain, just in _case_ one of their siblings turns out
2577 * not to be able to map all of memory.
2578 */
2579 if (!pci_is_pcie(pdev)) {
2580 if (!pci_is_root_bus(pdev->bus))
2581 return 0;
2582 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2583 return 0;
2584 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2585 return 0;
3bdb2591
DW
2586 } else {
2587 if (device_has_rmrr(dev))
2588 return 0;
2589 }
3dfc813d 2590
3bdb2591 2591 /*
3dfc813d 2592 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2593 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2594 * take them out of the 1:1 domain later.
2595 */
8fcc5372
CW
2596 if (!startup) {
2597 /*
2598 * If the device's dma_mask is less than the system's memory
2599 * size then this is not a candidate for identity mapping.
2600 */
3bdb2591 2601 u64 dma_mask = *dev->dma_mask;
8fcc5372 2602
3bdb2591
DW
2603 if (dev->coherent_dma_mask &&
2604 dev->coherent_dma_mask < dma_mask)
2605 dma_mask = dev->coherent_dma_mask;
8fcc5372 2606
3bdb2591 2607 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2608 }
6941af28
DW
2609
2610 return 1;
2611}
2612
cf04eee8
DW
2613static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2614{
2615 int ret;
2616
2617 if (!iommu_should_identity_map(dev, 1))
2618 return 0;
2619
2620 ret = domain_add_dev_info(si_domain, dev,
2621 hw ? CONTEXT_TT_PASS_THROUGH :
2622 CONTEXT_TT_MULTI_LEVEL);
2623 if (!ret)
2624 pr_info("IOMMU: %s identity mapping for device %s\n",
2625 hw ? "hardware" : "software", dev_name(dev));
2626 else if (ret == -ENODEV)
2627 /* device not associated with an iommu */
2628 ret = 0;
2629
2630 return ret;
2631}
2632
2633
071e1374 2634static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2635{
2c2e2c38 2636 struct pci_dev *pdev = NULL;
cf04eee8
DW
2637 struct dmar_drhd_unit *drhd;
2638 struct intel_iommu *iommu;
2639 struct device *dev;
2640 int i;
2641 int ret = 0;
2c2e2c38 2642
19943b0e 2643 ret = si_domain_init(hw);
2c2e2c38
FY
2644 if (ret)
2645 return -EFAULT;
2646
2c2e2c38 2647 for_each_pci_dev(pdev) {
cf04eee8
DW
2648 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2649 if (ret)
2650 return ret;
2651 }
2652
2653 for_each_active_iommu(iommu, drhd)
2654 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2655 struct acpi_device_physical_node *pn;
2656 struct acpi_device *adev;
2657
2658 if (dev->bus != &acpi_bus_type)
2659 continue;
2660
2661 adev= to_acpi_device(dev);
2662 mutex_lock(&adev->physical_node_lock);
2663 list_for_each_entry(pn, &adev->physical_node_list, node) {
2664 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2665 if (ret)
2666 break;
eae460b6 2667 }
cf04eee8
DW
2668 mutex_unlock(&adev->physical_node_lock);
2669 if (ret)
2670 return ret;
62edf5dc 2671 }
2c2e2c38
FY
2672
2673 return 0;
2674}
2675
b779260b 2676static int __init init_dmars(void)
ba395927
KA
2677{
2678 struct dmar_drhd_unit *drhd;
2679 struct dmar_rmrr_unit *rmrr;
832bd858 2680 struct device *dev;
ba395927 2681 struct intel_iommu *iommu;
9d783ba0 2682 int i, ret;
2c2e2c38 2683
ba395927
KA
2684 /*
2685 * for each drhd
2686 * allocate root
2687 * initialize and program root entry to not present
2688 * endfor
2689 */
2690 for_each_drhd_unit(drhd) {
5e0d2a6f 2691 /*
2692 * lock not needed as this is only incremented in the single
2693 * threaded kernel __init code path all other access are read
2694 * only
2695 */
1b198bb0
MT
2696 if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
2697 g_num_of_iommus++;
2698 continue;
2699 }
2700 printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
2701 IOMMU_UNITS_SUPPORTED);
5e0d2a6f 2702 }
2703
d9630fe9
WH
2704 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2705 GFP_KERNEL);
2706 if (!g_iommus) {
2707 printk(KERN_ERR "Allocating global iommu array failed\n");
2708 ret = -ENOMEM;
2709 goto error;
2710 }
2711
80b20dd8 2712 deferred_flush = kzalloc(g_num_of_iommus *
2713 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2714 if (!deferred_flush) {
5e0d2a6f 2715 ret = -ENOMEM;
989d51fc 2716 goto free_g_iommus;
5e0d2a6f 2717 }
2718
7c919779 2719 for_each_active_iommu(iommu, drhd) {
d9630fe9 2720 g_iommus[iommu->seq_id] = iommu;
ba395927 2721
e61d98d8
SS
2722 ret = iommu_init_domains(iommu);
2723 if (ret)
989d51fc 2724 goto free_iommu;
e61d98d8 2725
ba395927
KA
2726 /*
2727 * TBD:
2728 * we could share the same root & context tables
25985edc 2729 * among all IOMMU's. Need to Split it later.
ba395927
KA
2730 */
2731 ret = iommu_alloc_root_entry(iommu);
2732 if (ret) {
2733 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
989d51fc 2734 goto free_iommu;
ba395927 2735 }
4ed0d3e6 2736 if (!ecap_pass_through(iommu->ecap))
19943b0e 2737 hw_pass_through = 0;
ba395927
KA
2738 }
2739
1531a6a6
SS
2740 /*
2741 * Start from the sane iommu hardware state.
2742 */
7c919779 2743 for_each_active_iommu(iommu, drhd) {
1531a6a6
SS
2744 /*
2745 * If the queued invalidation is already initialized by us
2746 * (for example, while enabling interrupt-remapping) then
2747 * we got the things already rolling from a sane state.
2748 */
2749 if (iommu->qi)
2750 continue;
2751
2752 /*
2753 * Clear any previous faults.
2754 */
2755 dmar_fault(-1, iommu);
2756 /*
2757 * Disable queued invalidation if supported and already enabled
2758 * before OS handover.
2759 */
2760 dmar_disable_qi(iommu);
2761 }
2762
7c919779 2763 for_each_active_iommu(iommu, drhd) {
a77b67d4
YS
2764 if (dmar_enable_qi(iommu)) {
2765 /*
2766 * Queued Invalidate not enabled, use Register Based
2767 * Invalidate
2768 */
2769 iommu->flush.flush_context = __iommu_flush_context;
2770 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
680a7524 2771 printk(KERN_INFO "IOMMU %d 0x%Lx: using Register based "
b4e0f9eb 2772 "invalidation\n",
680a7524 2773 iommu->seq_id,
b4e0f9eb 2774 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2775 } else {
2776 iommu->flush.flush_context = qi_flush_context;
2777 iommu->flush.flush_iotlb = qi_flush_iotlb;
680a7524 2778 printk(KERN_INFO "IOMMU %d 0x%Lx: using Queued "
b4e0f9eb 2779 "invalidation\n",
680a7524 2780 iommu->seq_id,
b4e0f9eb 2781 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2782 }
2783 }
2784
19943b0e 2785 if (iommu_pass_through)
e0fc7e0b
DW
2786 iommu_identity_mapping |= IDENTMAP_ALL;
2787
d3f13810 2788#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 2789 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2790#endif
e0fc7e0b
DW
2791
2792 check_tylersburg_isoch();
2793
ba395927 2794 /*
19943b0e
DW
2795 * If pass through is not set or not enabled, setup context entries for
2796 * identity mappings for rmrr, gfx, and isa and may fall back to static
2797 * identity mapping if iommu_identity_mapping is set.
ba395927 2798 */
19943b0e
DW
2799 if (iommu_identity_mapping) {
2800 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2801 if (ret) {
19943b0e 2802 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
989d51fc 2803 goto free_iommu;
ba395927
KA
2804 }
2805 }
ba395927 2806 /*
19943b0e
DW
2807 * For each rmrr
2808 * for each dev attached to rmrr
2809 * do
2810 * locate drhd for dev, alloc domain for dev
2811 * allocate free domain
2812 * allocate page table entries for rmrr
2813 * if context not allocated for bus
2814 * allocate and init context
2815 * set present in root table for this bus
2816 * init context with domain, translation etc
2817 * endfor
2818 * endfor
ba395927 2819 */
19943b0e
DW
2820 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2821 for_each_rmrr_units(rmrr) {
b683b230
JL
2822 /* some BIOS lists non-exist devices in DMAR table. */
2823 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 2824 i, dev) {
0b9d9753 2825 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e
DW
2826 if (ret)
2827 printk(KERN_ERR
2828 "IOMMU: mapping reserved region failed\n");
ba395927 2829 }
4ed0d3e6 2830 }
49a0429e 2831
19943b0e
DW
2832 iommu_prepare_isa();
2833
ba395927
KA
2834 /*
2835 * for each drhd
2836 * enable fault log
2837 * global invalidate context cache
2838 * global invalidate iotlb
2839 * enable translation
2840 */
7c919779 2841 for_each_iommu(iommu, drhd) {
51a63e67
JC
2842 if (drhd->ignored) {
2843 /*
2844 * we always have to disable PMRs or DMA may fail on
2845 * this device
2846 */
2847 if (force_on)
7c919779 2848 iommu_disable_protect_mem_regions(iommu);
ba395927 2849 continue;
51a63e67 2850 }
ba395927
KA
2851
2852 iommu_flush_write_buffer(iommu);
2853
3460a6d9
KA
2854 ret = dmar_set_interrupt(iommu);
2855 if (ret)
989d51fc 2856 goto free_iommu;
3460a6d9 2857
ba395927
KA
2858 iommu_set_root_entry(iommu);
2859
4c25a2c1 2860 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2861 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2862
ba395927
KA
2863 ret = iommu_enable_translation(iommu);
2864 if (ret)
989d51fc 2865 goto free_iommu;
b94996c9
DW
2866
2867 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2868 }
2869
2870 return 0;
989d51fc
JL
2871
2872free_iommu:
7c919779 2873 for_each_active_iommu(iommu, drhd)
a868e6b7 2874 free_dmar_iommu(iommu);
9bdc531e 2875 kfree(deferred_flush);
989d51fc 2876free_g_iommus:
d9630fe9 2877 kfree(g_iommus);
989d51fc 2878error:
ba395927
KA
2879 return ret;
2880}
2881
5a5e02a6 2882/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2883static struct iova *intel_alloc_iova(struct device *dev,
2884 struct dmar_domain *domain,
2885 unsigned long nrpages, uint64_t dma_mask)
ba395927 2886{
ba395927 2887 struct iova *iova = NULL;
ba395927 2888
875764de
DW
2889 /* Restrict dma_mask to the width that the iommu can handle */
2890 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2891
2892 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2893 /*
2894 * First try to allocate an io virtual address in
284901a9 2895 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2896 * from higher range
ba395927 2897 */
875764de
DW
2898 iova = alloc_iova(&domain->iovad, nrpages,
2899 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2900 if (iova)
2901 return iova;
2902 }
2903 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2904 if (unlikely(!iova)) {
2905 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
207e3592 2906 nrpages, dev_name(dev));
f76aec76
KA
2907 return NULL;
2908 }
2909
2910 return iova;
2911}
2912
d4b709f4 2913static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
2914{
2915 struct dmar_domain *domain;
2916 int ret;
2917
d4b709f4 2918 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 2919 if (!domain) {
d4b709f4
DW
2920 printk(KERN_ERR "Allocating domain for %s failed",
2921 dev_name(dev));
4fe05bbc 2922 return NULL;
ba395927
KA
2923 }
2924
2925 /* make sure context mapping is ok */
d4b709f4
DW
2926 if (unlikely(!domain_context_mapped(dev))) {
2927 ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL);
f76aec76 2928 if (ret) {
d4b709f4
DW
2929 printk(KERN_ERR "Domain context map for %s failed",
2930 dev_name(dev));
4fe05bbc 2931 return NULL;
f76aec76 2932 }
ba395927
KA
2933 }
2934
f76aec76
KA
2935 return domain;
2936}
2937
d4b709f4 2938static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
2939{
2940 struct device_domain_info *info;
2941
2942 /* No lock here, assumes no domain exit in normal case */
d4b709f4 2943 info = dev->archdata.iommu;
147202aa
DW
2944 if (likely(info))
2945 return info->domain;
2946
2947 return __get_valid_domain_for_dev(dev);
2948}
2949
3d89194a 2950static int iommu_dummy(struct device *dev)
2c2e2c38 2951{
3d89194a 2952 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2c2e2c38
FY
2953}
2954
ecb509ec 2955/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 2956static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
2957{
2958 int found;
2959
3d89194a 2960 if (iommu_dummy(dev))
1e4c64c4
DW
2961 return 1;
2962
2c2e2c38 2963 if (!iommu_identity_mapping)
1e4c64c4 2964 return 0;
2c2e2c38 2965
9b226624 2966 found = identity_mapping(dev);
2c2e2c38 2967 if (found) {
ecb509ec 2968 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
2969 return 1;
2970 else {
2971 /*
2972 * 32 bit DMA is removed from si_domain and fall back
2973 * to non-identity mapping.
2974 */
bf9c9eda 2975 domain_remove_one_dev_info(si_domain, dev);
2c2e2c38 2976 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
ecb509ec 2977 dev_name(dev));
2c2e2c38
FY
2978 return 0;
2979 }
2980 } else {
2981 /*
2982 * In case of a detached 64 bit DMA device from vm, the device
2983 * is put into si_domain for identity mapping.
2984 */
ecb509ec 2985 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 2986 int ret;
5913c9bf 2987 ret = domain_add_dev_info(si_domain, dev,
5fe60f4e
DW
2988 hw_pass_through ?
2989 CONTEXT_TT_PASS_THROUGH :
2990 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2991 if (!ret) {
2992 printk(KERN_INFO "64bit %s uses identity mapping\n",
ecb509ec 2993 dev_name(dev));
2c2e2c38
FY
2994 return 1;
2995 }
2996 }
2997 }
2998
1e4c64c4 2999 return 0;
2c2e2c38
FY
3000}
3001
5040a918 3002static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3003 size_t size, int dir, u64 dma_mask)
f76aec76 3004{
f76aec76 3005 struct dmar_domain *domain;
5b6985ce 3006 phys_addr_t start_paddr;
f76aec76
KA
3007 struct iova *iova;
3008 int prot = 0;
6865f0d1 3009 int ret;
8c11e798 3010 struct intel_iommu *iommu;
33041ec0 3011 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3012
3013 BUG_ON(dir == DMA_NONE);
2c2e2c38 3014
5040a918 3015 if (iommu_no_mapping(dev))
6865f0d1 3016 return paddr;
f76aec76 3017
5040a918 3018 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3019 if (!domain)
3020 return 0;
3021
8c11e798 3022 iommu = domain_get_iommu(domain);
88cb6a74 3023 size = aligned_nrpages(paddr, size);
f76aec76 3024
5040a918 3025 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3026 if (!iova)
3027 goto error;
3028
ba395927
KA
3029 /*
3030 * Check if DMAR supports zero-length reads on write only
3031 * mappings..
3032 */
3033 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3034 !cap_zlr(iommu->cap))
ba395927
KA
3035 prot |= DMA_PTE_READ;
3036 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3037 prot |= DMA_PTE_WRITE;
3038 /*
6865f0d1 3039 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3040 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3041 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3042 * is not a big problem
3043 */
0ab36de2 3044 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3045 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3046 if (ret)
3047 goto error;
3048
1f0ef2aa
DW
3049 /* it's a non-present to present mapping. Only flush if caching mode */
3050 if (cap_caching_mode(iommu->cap))
ea8ea460 3051 iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1);
1f0ef2aa 3052 else
8c11e798 3053 iommu_flush_write_buffer(iommu);
f76aec76 3054
03d6a246
DW
3055 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3056 start_paddr += paddr & ~PAGE_MASK;
3057 return start_paddr;
ba395927 3058
ba395927 3059error:
f76aec76
KA
3060 if (iova)
3061 __free_iova(&domain->iovad, iova);
4cf2e75d 3062 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3063 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3064 return 0;
3065}
3066
ffbbef5c
FT
3067static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3068 unsigned long offset, size_t size,
3069 enum dma_data_direction dir,
3070 struct dma_attrs *attrs)
bb9e6d65 3071{
ffbbef5c 3072 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3073 dir, *dev->dma_mask);
bb9e6d65
FT
3074}
3075
5e0d2a6f 3076static void flush_unmaps(void)
3077{
80b20dd8 3078 int i, j;
5e0d2a6f 3079
5e0d2a6f 3080 timer_on = 0;
3081
3082 /* just flush them all */
3083 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3084 struct intel_iommu *iommu = g_iommus[i];
3085 if (!iommu)
3086 continue;
c42d9f32 3087
9dd2fe89
YZ
3088 if (!deferred_flush[i].next)
3089 continue;
3090
78d5f0f5
NA
3091 /* In caching mode, global flushes turn emulation expensive */
3092 if (!cap_caching_mode(iommu->cap))
3093 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3094 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3095 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3096 unsigned long mask;
3097 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3098 struct dmar_domain *domain = deferred_flush[i].domain[j];
3099
3100 /* On real hardware multiple invalidations are expensive */
3101 if (cap_caching_mode(iommu->cap))
3102 iommu_flush_iotlb_psi(iommu, domain->id,
ea8ea460
DW
3103 iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1,
3104 !deferred_flush[i].freelist[j], 0);
78d5f0f5
NA
3105 else {
3106 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
3107 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3108 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3109 }
93a23a72 3110 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3111 if (deferred_flush[i].freelist[j])
3112 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3113 }
9dd2fe89 3114 deferred_flush[i].next = 0;
5e0d2a6f 3115 }
3116
5e0d2a6f 3117 list_size = 0;
5e0d2a6f 3118}
3119
3120static void flush_unmaps_timeout(unsigned long data)
3121{
80b20dd8 3122 unsigned long flags;
3123
3124 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3125 flush_unmaps();
80b20dd8 3126 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3127}
3128
ea8ea460 3129static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3130{
3131 unsigned long flags;
80b20dd8 3132 int next, iommu_id;
8c11e798 3133 struct intel_iommu *iommu;
5e0d2a6f 3134
3135 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3136 if (list_size == HIGH_WATER_MARK)
3137 flush_unmaps();
3138
8c11e798
WH
3139 iommu = domain_get_iommu(dom);
3140 iommu_id = iommu->seq_id;
c42d9f32 3141
80b20dd8 3142 next = deferred_flush[iommu_id].next;
3143 deferred_flush[iommu_id].domain[next] = dom;
3144 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3145 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3146 deferred_flush[iommu_id].next++;
5e0d2a6f 3147
3148 if (!timer_on) {
3149 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3150 timer_on = 1;
3151 }
3152 list_size++;
3153 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3154}
3155
ffbbef5c
FT
3156static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3157 size_t size, enum dma_data_direction dir,
3158 struct dma_attrs *attrs)
ba395927 3159{
f76aec76 3160 struct dmar_domain *domain;
d794dc9b 3161 unsigned long start_pfn, last_pfn;
ba395927 3162 struct iova *iova;
8c11e798 3163 struct intel_iommu *iommu;
ea8ea460 3164 struct page *freelist;
ba395927 3165
73676832 3166 if (iommu_no_mapping(dev))
f76aec76 3167 return;
2c2e2c38 3168
1525a29a 3169 domain = find_domain(dev);
ba395927
KA
3170 BUG_ON(!domain);
3171
8c11e798
WH
3172 iommu = domain_get_iommu(domain);
3173
ba395927 3174 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3175 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3176 (unsigned long long)dev_addr))
ba395927 3177 return;
ba395927 3178
d794dc9b
DW
3179 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3180 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3181
d794dc9b 3182 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3183 dev_name(dev), start_pfn, last_pfn);
ba395927 3184
ea8ea460 3185 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3186
5e0d2a6f 3187 if (intel_iommu_strict) {
03d6a246 3188 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3189 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3190 /* free iova */
3191 __free_iova(&domain->iovad, iova);
ea8ea460 3192 dma_free_pagelist(freelist);
5e0d2a6f 3193 } else {
ea8ea460 3194 add_unmap(domain, iova, freelist);
5e0d2a6f 3195 /*
3196 * queue up the release of the unmap to save the 1/6th of the
3197 * cpu used up by the iotlb flush operation...
3198 */
5e0d2a6f 3199 }
ba395927
KA
3200}
3201
5040a918 3202static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3203 dma_addr_t *dma_handle, gfp_t flags,
3204 struct dma_attrs *attrs)
ba395927 3205{
36746436 3206 struct page *page = NULL;
ba395927
KA
3207 int order;
3208
5b6985ce 3209 size = PAGE_ALIGN(size);
ba395927 3210 order = get_order(size);
e8bb910d 3211
5040a918 3212 if (!iommu_no_mapping(dev))
e8bb910d 3213 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3214 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3215 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3216 flags |= GFP_DMA;
3217 else
3218 flags |= GFP_DMA32;
3219 }
ba395927 3220
36746436
AM
3221 if (flags & __GFP_WAIT) {
3222 unsigned int count = size >> PAGE_SHIFT;
3223
3224 page = dma_alloc_from_contiguous(dev, count, order);
3225 if (page && iommu_no_mapping(dev) &&
3226 page_to_phys(page) + size > dev->coherent_dma_mask) {
3227 dma_release_from_contiguous(dev, page, count);
3228 page = NULL;
3229 }
3230 }
3231
3232 if (!page)
3233 page = alloc_pages(flags, order);
3234 if (!page)
ba395927 3235 return NULL;
36746436 3236 memset(page_address(page), 0, size);
ba395927 3237
36746436 3238 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3239 DMA_BIDIRECTIONAL,
5040a918 3240 dev->coherent_dma_mask);
ba395927 3241 if (*dma_handle)
36746436
AM
3242 return page_address(page);
3243 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3244 __free_pages(page, order);
3245
ba395927
KA
3246 return NULL;
3247}
3248
5040a918 3249static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3250 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3251{
3252 int order;
36746436 3253 struct page *page = virt_to_page(vaddr);
ba395927 3254
5b6985ce 3255 size = PAGE_ALIGN(size);
ba395927
KA
3256 order = get_order(size);
3257
5040a918 3258 intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
36746436
AM
3259 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3260 __free_pages(page, order);
ba395927
KA
3261}
3262
5040a918 3263static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3264 int nelems, enum dma_data_direction dir,
3265 struct dma_attrs *attrs)
ba395927 3266{
ba395927 3267 struct dmar_domain *domain;
d794dc9b 3268 unsigned long start_pfn, last_pfn;
f76aec76 3269 struct iova *iova;
8c11e798 3270 struct intel_iommu *iommu;
ea8ea460 3271 struct page *freelist;
ba395927 3272
5040a918 3273 if (iommu_no_mapping(dev))
ba395927
KA
3274 return;
3275
5040a918 3276 domain = find_domain(dev);
8c11e798
WH
3277 BUG_ON(!domain);
3278
3279 iommu = domain_get_iommu(domain);
ba395927 3280
c03ab37c 3281 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
3282 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
3283 (unsigned long long)sglist[0].dma_address))
f76aec76 3284 return;
f76aec76 3285
d794dc9b
DW
3286 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3287 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76 3288
ea8ea460 3289 freelist = domain_unmap(domain, start_pfn, last_pfn);
f76aec76 3290
acea0018
DW
3291 if (intel_iommu_strict) {
3292 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
ea8ea460 3293 last_pfn - start_pfn + 1, !freelist, 0);
acea0018
DW
3294 /* free iova */
3295 __free_iova(&domain->iovad, iova);
ea8ea460 3296 dma_free_pagelist(freelist);
acea0018 3297 } else {
ea8ea460 3298 add_unmap(domain, iova, freelist);
acea0018
DW
3299 /*
3300 * queue up the release of the unmap to save the 1/6th of the
3301 * cpu used up by the iotlb flush operation...
3302 */
3303 }
ba395927
KA
3304}
3305
ba395927 3306static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3307 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3308{
3309 int i;
c03ab37c 3310 struct scatterlist *sg;
ba395927 3311
c03ab37c 3312 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3313 BUG_ON(!sg_page(sg));
4cf2e75d 3314 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3315 sg->dma_length = sg->length;
ba395927
KA
3316 }
3317 return nelems;
3318}
3319
5040a918 3320static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3321 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3322{
ba395927 3323 int i;
ba395927 3324 struct dmar_domain *domain;
f76aec76
KA
3325 size_t size = 0;
3326 int prot = 0;
f76aec76
KA
3327 struct iova *iova = NULL;
3328 int ret;
c03ab37c 3329 struct scatterlist *sg;
b536d24d 3330 unsigned long start_vpfn;
8c11e798 3331 struct intel_iommu *iommu;
ba395927
KA
3332
3333 BUG_ON(dir == DMA_NONE);
5040a918
DW
3334 if (iommu_no_mapping(dev))
3335 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3336
5040a918 3337 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3338 if (!domain)
3339 return 0;
3340
8c11e798
WH
3341 iommu = domain_get_iommu(domain);
3342
b536d24d 3343 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3344 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3345
5040a918
DW
3346 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3347 *dev->dma_mask);
f76aec76 3348 if (!iova) {
c03ab37c 3349 sglist->dma_length = 0;
f76aec76
KA
3350 return 0;
3351 }
3352
3353 /*
3354 * Check if DMAR supports zero-length reads on write only
3355 * mappings..
3356 */
3357 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3358 !cap_zlr(iommu->cap))
f76aec76
KA
3359 prot |= DMA_PTE_READ;
3360 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3361 prot |= DMA_PTE_WRITE;
3362
b536d24d 3363 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3364
f532959b 3365 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
3366 if (unlikely(ret)) {
3367 /* clear the page */
3368 dma_pte_clear_range(domain, start_vpfn,
3369 start_vpfn + size - 1);
3370 /* free page tables */
3371 dma_pte_free_pagetable(domain, start_vpfn,
3372 start_vpfn + size - 1);
3373 /* free iova */
3374 __free_iova(&domain->iovad, iova);
3375 return 0;
ba395927
KA
3376 }
3377
1f0ef2aa
DW
3378 /* it's a non-present to present mapping. Only flush if caching mode */
3379 if (cap_caching_mode(iommu->cap))
ea8ea460 3380 iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1);
1f0ef2aa 3381 else
8c11e798 3382 iommu_flush_write_buffer(iommu);
1f0ef2aa 3383
ba395927
KA
3384 return nelems;
3385}
3386
dfb805e8
FT
3387static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3388{
3389 return !dma_addr;
3390}
3391
160c1d8e 3392struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3393 .alloc = intel_alloc_coherent,
3394 .free = intel_free_coherent,
ba395927
KA
3395 .map_sg = intel_map_sg,
3396 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3397 .map_page = intel_map_page,
3398 .unmap_page = intel_unmap_page,
dfb805e8 3399 .mapping_error = intel_mapping_error,
ba395927
KA
3400};
3401
3402static inline int iommu_domain_cache_init(void)
3403{
3404 int ret = 0;
3405
3406 iommu_domain_cache = kmem_cache_create("iommu_domain",
3407 sizeof(struct dmar_domain),
3408 0,
3409 SLAB_HWCACHE_ALIGN,
3410
3411 NULL);
3412 if (!iommu_domain_cache) {
3413 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
3414 ret = -ENOMEM;
3415 }
3416
3417 return ret;
3418}
3419
3420static inline int iommu_devinfo_cache_init(void)
3421{
3422 int ret = 0;
3423
3424 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3425 sizeof(struct device_domain_info),
3426 0,
3427 SLAB_HWCACHE_ALIGN,
ba395927
KA
3428 NULL);
3429 if (!iommu_devinfo_cache) {
3430 printk(KERN_ERR "Couldn't create devinfo cache\n");
3431 ret = -ENOMEM;
3432 }
3433
3434 return ret;
3435}
3436
3437static inline int iommu_iova_cache_init(void)
3438{
3439 int ret = 0;
3440
3441 iommu_iova_cache = kmem_cache_create("iommu_iova",
3442 sizeof(struct iova),
3443 0,
3444 SLAB_HWCACHE_ALIGN,
ba395927
KA
3445 NULL);
3446 if (!iommu_iova_cache) {
3447 printk(KERN_ERR "Couldn't create iova cache\n");
3448 ret = -ENOMEM;
3449 }
3450
3451 return ret;
3452}
3453
3454static int __init iommu_init_mempool(void)
3455{
3456 int ret;
3457 ret = iommu_iova_cache_init();
3458 if (ret)
3459 return ret;
3460
3461 ret = iommu_domain_cache_init();
3462 if (ret)
3463 goto domain_error;
3464
3465 ret = iommu_devinfo_cache_init();
3466 if (!ret)
3467 return ret;
3468
3469 kmem_cache_destroy(iommu_domain_cache);
3470domain_error:
3471 kmem_cache_destroy(iommu_iova_cache);
3472
3473 return -ENOMEM;
3474}
3475
3476static void __init iommu_exit_mempool(void)
3477{
3478 kmem_cache_destroy(iommu_devinfo_cache);
3479 kmem_cache_destroy(iommu_domain_cache);
3480 kmem_cache_destroy(iommu_iova_cache);
3481
3482}
3483
556ab45f
DW
3484static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3485{
3486 struct dmar_drhd_unit *drhd;
3487 u32 vtbar;
3488 int rc;
3489
3490 /* We know that this device on this chipset has its own IOMMU.
3491 * If we find it under a different IOMMU, then the BIOS is lying
3492 * to us. Hope that the IOMMU for this device is actually
3493 * disabled, and it needs no translation...
3494 */
3495 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3496 if (rc) {
3497 /* "can't" happen */
3498 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3499 return;
3500 }
3501 vtbar &= 0xffff0000;
3502
3503 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3504 drhd = dmar_find_matched_drhd_unit(pdev);
3505 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3506 TAINT_FIRMWARE_WORKAROUND,
3507 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3508 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3509}
3510DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3511
ba395927
KA
3512static void __init init_no_remapping_devices(void)
3513{
3514 struct dmar_drhd_unit *drhd;
832bd858 3515 struct device *dev;
b683b230 3516 int i;
ba395927
KA
3517
3518 for_each_drhd_unit(drhd) {
3519 if (!drhd->include_all) {
b683b230
JL
3520 for_each_active_dev_scope(drhd->devices,
3521 drhd->devices_cnt, i, dev)
3522 break;
832bd858 3523 /* ignore DMAR unit if no devices exist */
ba395927
KA
3524 if (i == drhd->devices_cnt)
3525 drhd->ignored = 1;
3526 }
3527 }
3528
7c919779 3529 for_each_active_drhd_unit(drhd) {
7c919779 3530 if (drhd->include_all)
ba395927
KA
3531 continue;
3532
b683b230
JL
3533 for_each_active_dev_scope(drhd->devices,
3534 drhd->devices_cnt, i, dev)
832bd858 3535 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3536 break;
ba395927
KA
3537 if (i < drhd->devices_cnt)
3538 continue;
3539
c0771df8
DW
3540 /* This IOMMU has *only* gfx devices. Either bypass it or
3541 set the gfx_mapped flag, as appropriate */
3542 if (dmar_map_gfx) {
3543 intel_iommu_gfx_mapped = 1;
3544 } else {
3545 drhd->ignored = 1;
b683b230
JL
3546 for_each_active_dev_scope(drhd->devices,
3547 drhd->devices_cnt, i, dev)
832bd858 3548 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3549 }
3550 }
3551}
3552
f59c7b69
FY
3553#ifdef CONFIG_SUSPEND
3554static int init_iommu_hw(void)
3555{
3556 struct dmar_drhd_unit *drhd;
3557 struct intel_iommu *iommu = NULL;
3558
3559 for_each_active_iommu(iommu, drhd)
3560 if (iommu->qi)
3561 dmar_reenable_qi(iommu);
3562
b779260b
JC
3563 for_each_iommu(iommu, drhd) {
3564 if (drhd->ignored) {
3565 /*
3566 * we always have to disable PMRs or DMA may fail on
3567 * this device
3568 */
3569 if (force_on)
3570 iommu_disable_protect_mem_regions(iommu);
3571 continue;
3572 }
3573
f59c7b69
FY
3574 iommu_flush_write_buffer(iommu);
3575
3576 iommu_set_root_entry(iommu);
3577
3578 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3579 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3580 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3581 DMA_TLB_GLOBAL_FLUSH);
b779260b
JC
3582 if (iommu_enable_translation(iommu))
3583 return 1;
b94996c9 3584 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3585 }
3586
3587 return 0;
3588}
3589
3590static void iommu_flush_all(void)
3591{
3592 struct dmar_drhd_unit *drhd;
3593 struct intel_iommu *iommu;
3594
3595 for_each_active_iommu(iommu, drhd) {
3596 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3597 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3598 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3599 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3600 }
3601}
3602
134fac3f 3603static int iommu_suspend(void)
f59c7b69
FY
3604{
3605 struct dmar_drhd_unit *drhd;
3606 struct intel_iommu *iommu = NULL;
3607 unsigned long flag;
3608
3609 for_each_active_iommu(iommu, drhd) {
3610 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3611 GFP_ATOMIC);
3612 if (!iommu->iommu_state)
3613 goto nomem;
3614 }
3615
3616 iommu_flush_all();
3617
3618 for_each_active_iommu(iommu, drhd) {
3619 iommu_disable_translation(iommu);
3620
1f5b3c3f 3621 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3622
3623 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3624 readl(iommu->reg + DMAR_FECTL_REG);
3625 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3626 readl(iommu->reg + DMAR_FEDATA_REG);
3627 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3628 readl(iommu->reg + DMAR_FEADDR_REG);
3629 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3630 readl(iommu->reg + DMAR_FEUADDR_REG);
3631
1f5b3c3f 3632 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3633 }
3634 return 0;
3635
3636nomem:
3637 for_each_active_iommu(iommu, drhd)
3638 kfree(iommu->iommu_state);
3639
3640 return -ENOMEM;
3641}
3642
134fac3f 3643static void iommu_resume(void)
f59c7b69
FY
3644{
3645 struct dmar_drhd_unit *drhd;
3646 struct intel_iommu *iommu = NULL;
3647 unsigned long flag;
3648
3649 if (init_iommu_hw()) {
b779260b
JC
3650 if (force_on)
3651 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3652 else
3653 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3654 return;
f59c7b69
FY
3655 }
3656
3657 for_each_active_iommu(iommu, drhd) {
3658
1f5b3c3f 3659 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3660
3661 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3662 iommu->reg + DMAR_FECTL_REG);
3663 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3664 iommu->reg + DMAR_FEDATA_REG);
3665 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3666 iommu->reg + DMAR_FEADDR_REG);
3667 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3668 iommu->reg + DMAR_FEUADDR_REG);
3669
1f5b3c3f 3670 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3671 }
3672
3673 for_each_active_iommu(iommu, drhd)
3674 kfree(iommu->iommu_state);
f59c7b69
FY
3675}
3676
134fac3f 3677static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3678 .resume = iommu_resume,
3679 .suspend = iommu_suspend,
3680};
3681
134fac3f 3682static void __init init_iommu_pm_ops(void)
f59c7b69 3683{
134fac3f 3684 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3685}
3686
3687#else
99592ba4 3688static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3689#endif /* CONFIG_PM */
3690
318fe7df
SS
3691
3692int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
3693{
3694 struct acpi_dmar_reserved_memory *rmrr;
3695 struct dmar_rmrr_unit *rmrru;
3696
3697 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3698 if (!rmrru)
3699 return -ENOMEM;
3700
3701 rmrru->hdr = header;
3702 rmrr = (struct acpi_dmar_reserved_memory *)header;
3703 rmrru->base_address = rmrr->base_address;
3704 rmrru->end_address = rmrr->end_address;
2e455289
JL
3705 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3706 ((void *)rmrr) + rmrr->header.length,
3707 &rmrru->devices_cnt);
3708 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3709 kfree(rmrru);
3710 return -ENOMEM;
3711 }
318fe7df 3712
2e455289 3713 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3714
2e455289 3715 return 0;
318fe7df
SS
3716}
3717
318fe7df
SS
3718int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
3719{
3720 struct acpi_dmar_atsr *atsr;
3721 struct dmar_atsr_unit *atsru;
3722
3723 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
3724 atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
3725 if (!atsru)
3726 return -ENOMEM;
3727
3728 atsru->hdr = hdr;
3729 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3730 if (!atsru->include_all) {
3731 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
3732 (void *)atsr + atsr->header.length,
3733 &atsru->devices_cnt);
3734 if (atsru->devices_cnt && atsru->devices == NULL) {
3735 kfree(atsru);
3736 return -ENOMEM;
3737 }
3738 }
318fe7df 3739
0e242612 3740 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
3741
3742 return 0;
3743}
3744
9bdc531e
JL
3745static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
3746{
3747 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
3748 kfree(atsru);
3749}
3750
3751static void intel_iommu_free_dmars(void)
3752{
3753 struct dmar_rmrr_unit *rmrru, *rmrr_n;
3754 struct dmar_atsr_unit *atsru, *atsr_n;
3755
3756 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
3757 list_del(&rmrru->list);
3758 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
3759 kfree(rmrru);
318fe7df
SS
3760 }
3761
9bdc531e
JL
3762 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
3763 list_del(&atsru->list);
3764 intel_iommu_free_atsr(atsru);
3765 }
318fe7df
SS
3766}
3767
3768int dmar_find_matched_atsr_unit(struct pci_dev *dev)
3769{
b683b230 3770 int i, ret = 1;
318fe7df 3771 struct pci_bus *bus;
832bd858
DW
3772 struct pci_dev *bridge = NULL;
3773 struct device *tmp;
318fe7df
SS
3774 struct acpi_dmar_atsr *atsr;
3775 struct dmar_atsr_unit *atsru;
3776
3777 dev = pci_physfn(dev);
318fe7df 3778 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 3779 bridge = bus->self;
318fe7df 3780 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 3781 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 3782 return 0;
b5f82ddf 3783 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 3784 break;
318fe7df 3785 }
b5f82ddf
JL
3786 if (!bridge)
3787 return 0;
318fe7df 3788
0e242612 3789 rcu_read_lock();
b5f82ddf
JL
3790 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3791 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3792 if (atsr->segment != pci_domain_nr(dev->bus))
3793 continue;
3794
b683b230 3795 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 3796 if (tmp == &bridge->dev)
b683b230 3797 goto out;
b5f82ddf
JL
3798
3799 if (atsru->include_all)
b683b230 3800 goto out;
b5f82ddf 3801 }
b683b230
JL
3802 ret = 0;
3803out:
0e242612 3804 rcu_read_unlock();
318fe7df 3805
b683b230 3806 return ret;
318fe7df
SS
3807}
3808
59ce0515
JL
3809int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
3810{
3811 int ret = 0;
3812 struct dmar_rmrr_unit *rmrru;
3813 struct dmar_atsr_unit *atsru;
3814 struct acpi_dmar_atsr *atsr;
3815 struct acpi_dmar_reserved_memory *rmrr;
3816
3817 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
3818 return 0;
3819
3820 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
3821 rmrr = container_of(rmrru->hdr,
3822 struct acpi_dmar_reserved_memory, header);
3823 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3824 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
3825 ((void *)rmrr) + rmrr->header.length,
3826 rmrr->segment, rmrru->devices,
3827 rmrru->devices_cnt);
27e24950 3828 if(ret < 0)
59ce0515
JL
3829 return ret;
3830 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
3831 dmar_remove_dev_scope(info, rmrr->segment,
3832 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
3833 }
3834 }
3835
3836 list_for_each_entry(atsru, &dmar_atsr_units, list) {
3837 if (atsru->include_all)
3838 continue;
3839
3840 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
3841 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
3842 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
3843 (void *)atsr + atsr->header.length,
3844 atsr->segment, atsru->devices,
3845 atsru->devices_cnt);
3846 if (ret > 0)
3847 break;
3848 else if(ret < 0)
3849 return ret;
3850 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
3851 if (dmar_remove_dev_scope(info, atsr->segment,
3852 atsru->devices, atsru->devices_cnt))
3853 break;
3854 }
3855 }
3856
3857 return 0;
3858}
3859
99dcaded
FY
3860/*
3861 * Here we only respond to action of unbound device from driver.
3862 *
3863 * Added device is not attached to its DMAR domain here yet. That will happen
3864 * when mapping the device to iova.
3865 */
3866static int device_notifier(struct notifier_block *nb,
3867 unsigned long action, void *data)
3868{
3869 struct device *dev = data;
99dcaded
FY
3870 struct dmar_domain *domain;
3871
3d89194a 3872 if (iommu_dummy(dev))
44cd613c
DW
3873 return 0;
3874
7e7dfab7
JL
3875 if (action != BUS_NOTIFY_UNBOUND_DRIVER &&
3876 action != BUS_NOTIFY_DEL_DEVICE)
3877 return 0;
3878
1525a29a 3879 domain = find_domain(dev);
99dcaded
FY
3880 if (!domain)
3881 return 0;
3882
3a5670e8 3883 down_read(&dmar_global_lock);
bf9c9eda 3884 domain_remove_one_dev_info(domain, dev);
ab8dfe25 3885 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 3886 domain_exit(domain);
3a5670e8 3887 up_read(&dmar_global_lock);
a97590e5 3888
99dcaded
FY
3889 return 0;
3890}
3891
3892static struct notifier_block device_nb = {
3893 .notifier_call = device_notifier,
3894};
3895
75f05569
JL
3896static int intel_iommu_memory_notifier(struct notifier_block *nb,
3897 unsigned long val, void *v)
3898{
3899 struct memory_notify *mhp = v;
3900 unsigned long long start, end;
3901 unsigned long start_vpfn, last_vpfn;
3902
3903 switch (val) {
3904 case MEM_GOING_ONLINE:
3905 start = mhp->start_pfn << PAGE_SHIFT;
3906 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
3907 if (iommu_domain_identity_map(si_domain, start, end)) {
3908 pr_warn("dmar: failed to build identity map for [%llx-%llx]\n",
3909 start, end);
3910 return NOTIFY_BAD;
3911 }
3912 break;
3913
3914 case MEM_OFFLINE:
3915 case MEM_CANCEL_ONLINE:
3916 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
3917 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
3918 while (start_vpfn <= last_vpfn) {
3919 struct iova *iova;
3920 struct dmar_drhd_unit *drhd;
3921 struct intel_iommu *iommu;
ea8ea460 3922 struct page *freelist;
75f05569
JL
3923
3924 iova = find_iova(&si_domain->iovad, start_vpfn);
3925 if (iova == NULL) {
3926 pr_debug("dmar: failed get IOVA for PFN %lx\n",
3927 start_vpfn);
3928 break;
3929 }
3930
3931 iova = split_and_remove_iova(&si_domain->iovad, iova,
3932 start_vpfn, last_vpfn);
3933 if (iova == NULL) {
3934 pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n",
3935 start_vpfn, last_vpfn);
3936 return NOTIFY_BAD;
3937 }
3938
ea8ea460
DW
3939 freelist = domain_unmap(si_domain, iova->pfn_lo,
3940 iova->pfn_hi);
3941
75f05569
JL
3942 rcu_read_lock();
3943 for_each_active_iommu(iommu, drhd)
3944 iommu_flush_iotlb_psi(iommu, si_domain->id,
3945 iova->pfn_lo,
ea8ea460
DW
3946 iova->pfn_hi - iova->pfn_lo + 1,
3947 !freelist, 0);
75f05569 3948 rcu_read_unlock();
ea8ea460 3949 dma_free_pagelist(freelist);
75f05569
JL
3950
3951 start_vpfn = iova->pfn_hi + 1;
3952 free_iova_mem(iova);
3953 }
3954 break;
3955 }
3956
3957 return NOTIFY_OK;
3958}
3959
3960static struct notifier_block intel_iommu_memory_nb = {
3961 .notifier_call = intel_iommu_memory_notifier,
3962 .priority = 0
3963};
3964
a5459cfe
AW
3965
3966static ssize_t intel_iommu_show_version(struct device *dev,
3967 struct device_attribute *attr,
3968 char *buf)
3969{
3970 struct intel_iommu *iommu = dev_get_drvdata(dev);
3971 u32 ver = readl(iommu->reg + DMAR_VER_REG);
3972 return sprintf(buf, "%d:%d\n",
3973 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
3974}
3975static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
3976
3977static ssize_t intel_iommu_show_address(struct device *dev,
3978 struct device_attribute *attr,
3979 char *buf)
3980{
3981 struct intel_iommu *iommu = dev_get_drvdata(dev);
3982 return sprintf(buf, "%llx\n", iommu->reg_phys);
3983}
3984static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
3985
3986static ssize_t intel_iommu_show_cap(struct device *dev,
3987 struct device_attribute *attr,
3988 char *buf)
3989{
3990 struct intel_iommu *iommu = dev_get_drvdata(dev);
3991 return sprintf(buf, "%llx\n", iommu->cap);
3992}
3993static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
3994
3995static ssize_t intel_iommu_show_ecap(struct device *dev,
3996 struct device_attribute *attr,
3997 char *buf)
3998{
3999 struct intel_iommu *iommu = dev_get_drvdata(dev);
4000 return sprintf(buf, "%llx\n", iommu->ecap);
4001}
4002static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4003
4004static struct attribute *intel_iommu_attrs[] = {
4005 &dev_attr_version.attr,
4006 &dev_attr_address.attr,
4007 &dev_attr_cap.attr,
4008 &dev_attr_ecap.attr,
4009 NULL,
4010};
4011
4012static struct attribute_group intel_iommu_group = {
4013 .name = "intel-iommu",
4014 .attrs = intel_iommu_attrs,
4015};
4016
4017const struct attribute_group *intel_iommu_groups[] = {
4018 &intel_iommu_group,
4019 NULL,
4020};
4021
ba395927
KA
4022int __init intel_iommu_init(void)
4023{
9bdc531e 4024 int ret = -ENODEV;
3a93c841 4025 struct dmar_drhd_unit *drhd;
7c919779 4026 struct intel_iommu *iommu;
ba395927 4027
a59b50e9
JC
4028 /* VT-d is required for a TXT/tboot launch, so enforce that */
4029 force_on = tboot_force_iommu();
4030
3a5670e8
JL
4031 if (iommu_init_mempool()) {
4032 if (force_on)
4033 panic("tboot: Failed to initialize iommu memory\n");
4034 return -ENOMEM;
4035 }
4036
4037 down_write(&dmar_global_lock);
a59b50e9
JC
4038 if (dmar_table_init()) {
4039 if (force_on)
4040 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4041 goto out_free_dmar;
a59b50e9 4042 }
ba395927 4043
3a93c841
TI
4044 /*
4045 * Disable translation if already enabled prior to OS handover.
4046 */
7c919779 4047 for_each_active_iommu(iommu, drhd)
3a93c841
TI
4048 if (iommu->gcmd & DMA_GCMD_TE)
4049 iommu_disable_translation(iommu);
3a93c841 4050
c2c7286a 4051 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4052 if (force_on)
4053 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4054 goto out_free_dmar;
a59b50e9 4055 }
1886e8a9 4056
75f1cdf1 4057 if (no_iommu || dmar_disabled)
9bdc531e 4058 goto out_free_dmar;
2ae21010 4059
318fe7df
SS
4060 if (list_empty(&dmar_rmrr_units))
4061 printk(KERN_INFO "DMAR: No RMRR found\n");
4062
4063 if (list_empty(&dmar_atsr_units))
4064 printk(KERN_INFO "DMAR: No ATSR found\n");
4065
51a63e67
JC
4066 if (dmar_init_reserved_ranges()) {
4067 if (force_on)
4068 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4069 goto out_free_reserved_range;
51a63e67 4070 }
ba395927
KA
4071
4072 init_no_remapping_devices();
4073
b779260b 4074 ret = init_dmars();
ba395927 4075 if (ret) {
a59b50e9
JC
4076 if (force_on)
4077 panic("tboot: Failed to initialize DMARs\n");
ba395927 4078 printk(KERN_ERR "IOMMU: dmar init failed\n");
9bdc531e 4079 goto out_free_reserved_range;
ba395927 4080 }
3a5670e8 4081 up_write(&dmar_global_lock);
ba395927
KA
4082 printk(KERN_INFO
4083 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
4084
5e0d2a6f 4085 init_timer(&unmap_timer);
75f1cdf1
FT
4086#ifdef CONFIG_SWIOTLB
4087 swiotlb = 0;
4088#endif
19943b0e 4089 dma_ops = &intel_dma_ops;
4ed0d3e6 4090
134fac3f 4091 init_iommu_pm_ops();
a8bcbb0d 4092
a5459cfe
AW
4093 for_each_active_iommu(iommu, drhd)
4094 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4095 intel_iommu_groups,
4096 iommu->name);
4097
4236d97d 4098 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4099 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4100 if (si_domain && !hw_pass_through)
4101 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4102
8bc1f85c
ED
4103 intel_iommu_enabled = 1;
4104
ba395927 4105 return 0;
9bdc531e
JL
4106
4107out_free_reserved_range:
4108 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4109out_free_dmar:
4110 intel_iommu_free_dmars();
3a5670e8
JL
4111 up_write(&dmar_global_lock);
4112 iommu_exit_mempool();
9bdc531e 4113 return ret;
ba395927 4114}
e820482c 4115
579305f7
AW
4116static int iommu_detach_dev_cb(struct pci_dev *pdev, u16 alias, void *opaque)
4117{
4118 struct intel_iommu *iommu = opaque;
4119
4120 iommu_detach_dev(iommu, PCI_BUS_NUM(alias), alias & 0xff);
4121 return 0;
4122}
4123
4124/*
4125 * NB - intel-iommu lacks any sort of reference counting for the users of
4126 * dependent devices. If multiple endpoints have intersecting dependent
4127 * devices, unbinding the driver from any one of them will possibly leave
4128 * the others unable to operate.
4129 */
3199aa6b 4130static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
0bcb3e28 4131 struct device *dev)
3199aa6b 4132{
0bcb3e28 4133 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4134 return;
4135
579305f7 4136 pci_for_each_dma_alias(to_pci_dev(dev), &iommu_detach_dev_cb, iommu);
3199aa6b
HW
4137}
4138
2c2e2c38 4139static void domain_remove_one_dev_info(struct dmar_domain *domain,
bf9c9eda 4140 struct device *dev)
c7151a8d 4141{
bca2b916 4142 struct device_domain_info *info, *tmp;
c7151a8d
WH
4143 struct intel_iommu *iommu;
4144 unsigned long flags;
4145 int found = 0;
156baca8 4146 u8 bus, devfn;
c7151a8d 4147
bf9c9eda 4148 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4149 if (!iommu)
4150 return;
4151
4152 spin_lock_irqsave(&device_domain_lock, flags);
bca2b916 4153 list_for_each_entry_safe(info, tmp, &domain->devices, link) {
bf9c9eda
DW
4154 if (info->iommu == iommu && info->bus == bus &&
4155 info->devfn == devfn) {
109b9b04 4156 unlink_domain_info(info);
c7151a8d
WH
4157 spin_unlock_irqrestore(&device_domain_lock, flags);
4158
93a23a72 4159 iommu_disable_dev_iotlb(info);
c7151a8d 4160 iommu_detach_dev(iommu, info->bus, info->devfn);
bf9c9eda 4161 iommu_detach_dependent_devices(iommu, dev);
c7151a8d
WH
4162 free_devinfo_mem(info);
4163
4164 spin_lock_irqsave(&device_domain_lock, flags);
4165
4166 if (found)
4167 break;
4168 else
4169 continue;
4170 }
4171
4172 /* if there is no other devices under the same iommu
4173 * owned by this domain, clear this iommu in iommu_bmp
4174 * update iommu count and coherency
4175 */
8bbc4410 4176 if (info->iommu == iommu)
c7151a8d
WH
4177 found = 1;
4178 }
4179
3e7abe25
RD
4180 spin_unlock_irqrestore(&device_domain_lock, flags);
4181
c7151a8d 4182 if (found == 0) {
fb170fb4
JL
4183 domain_detach_iommu(domain, iommu);
4184 if (!domain_type_is_vm_or_si(domain))
4185 iommu_detach_domain(domain, iommu);
c7151a8d 4186 }
c7151a8d
WH
4187}
4188
2c2e2c38 4189static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4190{
4191 int adjust_width;
4192
4193 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
4194 domain_reserve_special_ranges(domain);
4195
4196 /* calculate AGAW */
4197 domain->gaw = guest_width;
4198 adjust_width = guestwidth_to_adjustwidth(guest_width);
4199 domain->agaw = width_to_agaw(adjust_width);
4200
5e98c4b1 4201 domain->iommu_coherency = 0;
c5b15255 4202 domain->iommu_snooping = 0;
6dd9a7c7 4203 domain->iommu_superpage = 0;
fe40f1e0 4204 domain->max_addr = 0;
5e98c4b1
WH
4205
4206 /* always allocate the top pgd */
4c923d47 4207 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4208 if (!domain->pgd)
4209 return -ENOMEM;
4210 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4211 return 0;
4212}
4213
5d450806 4214static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 4215{
5d450806 4216 struct dmar_domain *dmar_domain;
38717946 4217
ab8dfe25 4218 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4219 if (!dmar_domain) {
38717946 4220 printk(KERN_ERR
5d450806
JR
4221 "intel_iommu_domain_init: dmar_domain == NULL\n");
4222 return -ENOMEM;
38717946 4223 }
2c2e2c38 4224 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 4225 printk(KERN_ERR
5d450806 4226 "intel_iommu_domain_init() failed\n");
92d03cc8 4227 domain_exit(dmar_domain);
5d450806 4228 return -ENOMEM;
38717946 4229 }
8140a95d 4230 domain_update_iommu_cap(dmar_domain);
5d450806 4231 domain->priv = dmar_domain;
faa3d6f5 4232
8a0e715b
JR
4233 domain->geometry.aperture_start = 0;
4234 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4235 domain->geometry.force_aperture = true;
4236
5d450806 4237 return 0;
38717946 4238}
38717946 4239
5d450806 4240static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 4241{
5d450806
JR
4242 struct dmar_domain *dmar_domain = domain->priv;
4243
4244 domain->priv = NULL;
92d03cc8 4245 domain_exit(dmar_domain);
38717946 4246}
38717946 4247
4c5478c9
JR
4248static int intel_iommu_attach_device(struct iommu_domain *domain,
4249 struct device *dev)
38717946 4250{
4c5478c9 4251 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
4252 struct intel_iommu *iommu;
4253 int addr_width;
156baca8 4254 u8 bus, devfn;
faa3d6f5 4255
7207d8f9
DW
4256 /* normally dev is not mapped */
4257 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4258 struct dmar_domain *old_domain;
4259
1525a29a 4260 old_domain = find_domain(dev);
faa3d6f5 4261 if (old_domain) {
ab8dfe25 4262 if (domain_type_is_vm_or_si(dmar_domain))
bf9c9eda 4263 domain_remove_one_dev_info(old_domain, dev);
faa3d6f5
WH
4264 else
4265 domain_remove_dev_info(old_domain);
4266 }
4267 }
4268
156baca8 4269 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4270 if (!iommu)
4271 return -ENODEV;
4272
4273 /* check if this iommu agaw is sufficient for max mapped address */
4274 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4275 if (addr_width > cap_mgaw(iommu->cap))
4276 addr_width = cap_mgaw(iommu->cap);
4277
4278 if (dmar_domain->max_addr > (1LL << addr_width)) {
4279 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4280 "sufficient for the mapped address (%llx)\n",
a99c47a2 4281 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4282 return -EFAULT;
4283 }
a99c47a2
TL
4284 dmar_domain->gaw = addr_width;
4285
4286 /*
4287 * Knock out extra levels of page tables if necessary
4288 */
4289 while (iommu->agaw < dmar_domain->agaw) {
4290 struct dma_pte *pte;
4291
4292 pte = dmar_domain->pgd;
4293 if (dma_pte_present(pte)) {
25cbff16
SY
4294 dmar_domain->pgd = (struct dma_pte *)
4295 phys_to_virt(dma_pte_addr(pte));
7a661013 4296 free_pgtable_page(pte);
a99c47a2
TL
4297 }
4298 dmar_domain->agaw--;
4299 }
fe40f1e0 4300
5913c9bf 4301 return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL);
38717946 4302}
38717946 4303
4c5478c9
JR
4304static void intel_iommu_detach_device(struct iommu_domain *domain,
4305 struct device *dev)
38717946 4306{
4c5478c9 4307 struct dmar_domain *dmar_domain = domain->priv;
4c5478c9 4308
bf9c9eda 4309 domain_remove_one_dev_info(dmar_domain, dev);
faa3d6f5 4310}
c7151a8d 4311
b146a1c9
JR
4312static int intel_iommu_map(struct iommu_domain *domain,
4313 unsigned long iova, phys_addr_t hpa,
5009065d 4314 size_t size, int iommu_prot)
faa3d6f5 4315{
dde57a21 4316 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0 4317 u64 max_addr;
dde57a21 4318 int prot = 0;
faa3d6f5 4319 int ret;
fe40f1e0 4320
dde57a21
JR
4321 if (iommu_prot & IOMMU_READ)
4322 prot |= DMA_PTE_READ;
4323 if (iommu_prot & IOMMU_WRITE)
4324 prot |= DMA_PTE_WRITE;
9cf06697
SY
4325 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4326 prot |= DMA_PTE_SNP;
dde57a21 4327
163cc52c 4328 max_addr = iova + size;
dde57a21 4329 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4330 u64 end;
4331
4332 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4333 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4334 if (end < max_addr) {
8954da1f 4335 printk(KERN_ERR "%s: iommu width (%d) is not "
fe40f1e0 4336 "sufficient for the mapped address (%llx)\n",
8954da1f 4337 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4338 return -EFAULT;
4339 }
dde57a21 4340 dmar_domain->max_addr = max_addr;
fe40f1e0 4341 }
ad051221
DW
4342 /* Round up size to next multiple of PAGE_SIZE, if it and
4343 the low bits of hpa would take us onto the next page */
88cb6a74 4344 size = aligned_nrpages(hpa, size);
ad051221
DW
4345 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4346 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4347 return ret;
38717946 4348}
38717946 4349
5009065d 4350static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4351 unsigned long iova, size_t size)
38717946 4352{
dde57a21 4353 struct dmar_domain *dmar_domain = domain->priv;
ea8ea460
DW
4354 struct page *freelist = NULL;
4355 struct intel_iommu *iommu;
4356 unsigned long start_pfn, last_pfn;
4357 unsigned int npages;
4358 int iommu_id, num, ndomains, level = 0;
5cf0a76f
DW
4359
4360 /* Cope with horrid API which requires us to unmap more than the
4361 size argument if it happens to be a large-page mapping. */
4362 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4363 BUG();
4364
4365 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4366 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4367
ea8ea460
DW
4368 start_pfn = iova >> VTD_PAGE_SHIFT;
4369 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4370
4371 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4372
4373 npages = last_pfn - start_pfn + 1;
4374
4375 for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) {
4376 iommu = g_iommus[iommu_id];
4377
4378 /*
4379 * find bit position of dmar_domain
4380 */
4381 ndomains = cap_ndoms(iommu->cap);
4382 for_each_set_bit(num, iommu->domain_ids, ndomains) {
4383 if (iommu->domains[num] == dmar_domain)
4384 iommu_flush_iotlb_psi(iommu, num, start_pfn,
4385 npages, !freelist, 0);
4386 }
4387
4388 }
4389
4390 dma_free_pagelist(freelist);
fe40f1e0 4391
163cc52c
DW
4392 if (dmar_domain->max_addr == iova + size)
4393 dmar_domain->max_addr = iova;
b146a1c9 4394
5cf0a76f 4395 return size;
38717946 4396}
38717946 4397
d14d6577 4398static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4399 dma_addr_t iova)
38717946 4400{
d14d6577 4401 struct dmar_domain *dmar_domain = domain->priv;
38717946 4402 struct dma_pte *pte;
5cf0a76f 4403 int level = 0;
faa3d6f5 4404 u64 phys = 0;
38717946 4405
5cf0a76f 4406 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4407 if (pte)
faa3d6f5 4408 phys = dma_pte_addr(pte);
38717946 4409
faa3d6f5 4410 return phys;
38717946 4411}
a8bcbb0d 4412
dbb9fd86
SY
4413static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
4414 unsigned long cap)
4415{
4416 struct dmar_domain *dmar_domain = domain->priv;
4417
4418 if (cap == IOMMU_CAP_CACHE_COHERENCY)
4419 return dmar_domain->iommu_snooping;
323f99cb 4420 if (cap == IOMMU_CAP_INTR_REMAP)
95a02e97 4421 return irq_remapping_enabled;
dbb9fd86
SY
4422
4423 return 0;
4424}
4425
abdfdde2
AW
4426static int intel_iommu_add_device(struct device *dev)
4427{
a5459cfe 4428 struct intel_iommu *iommu;
abdfdde2 4429 struct iommu_group *group;
156baca8 4430 u8 bus, devfn;
70ae6f0d 4431
a5459cfe
AW
4432 iommu = device_to_iommu(dev, &bus, &devfn);
4433 if (!iommu)
70ae6f0d
AW
4434 return -ENODEV;
4435
a5459cfe 4436 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4437
e17f9ff4 4438 group = iommu_group_get_for_dev(dev);
783f157b 4439
e17f9ff4
AW
4440 if (IS_ERR(group))
4441 return PTR_ERR(group);
bcb71abe 4442
abdfdde2 4443 iommu_group_put(group);
e17f9ff4 4444 return 0;
abdfdde2 4445}
70ae6f0d 4446
abdfdde2
AW
4447static void intel_iommu_remove_device(struct device *dev)
4448{
a5459cfe
AW
4449 struct intel_iommu *iommu;
4450 u8 bus, devfn;
4451
4452 iommu = device_to_iommu(dev, &bus, &devfn);
4453 if (!iommu)
4454 return;
4455
abdfdde2 4456 iommu_group_remove_device(dev);
a5459cfe
AW
4457
4458 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4459}
4460
b22f6434 4461static const struct iommu_ops intel_iommu_ops = {
a8bcbb0d
JR
4462 .domain_init = intel_iommu_domain_init,
4463 .domain_destroy = intel_iommu_domain_destroy,
4464 .attach_dev = intel_iommu_attach_device,
4465 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4466 .map = intel_iommu_map,
4467 .unmap = intel_iommu_unmap,
a8bcbb0d 4468 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 4469 .domain_has_cap = intel_iommu_domain_has_cap,
abdfdde2
AW
4470 .add_device = intel_iommu_add_device,
4471 .remove_device = intel_iommu_remove_device,
6d1c56a9 4472 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4473};
9af88143 4474
9452618e
DV
4475static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4476{
4477 /* G4x/GM45 integrated gfx dmar support is totally busted. */
4478 printk(KERN_INFO "DMAR: Disabling IOMMU for graphics on this chipset\n");
4479 dmar_map_gfx = 0;
4480}
4481
4482DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4483DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4484DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4485DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4486DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4487DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4488DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4489
d34d6517 4490static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4491{
4492 /*
4493 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4494 * but needs it. Same seems to hold for the desktop versions.
9af88143
DW
4495 */
4496 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
4497 rwbf_quirk = 1;
4498}
4499
4500DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4501DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4502DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4503DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4504DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4505DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4506DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4507
eecfd57f
AJ
4508#define GGC 0x52
4509#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4510#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4511#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4512#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4513#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4514#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4515#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4516#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4517
d34d6517 4518static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4519{
4520 unsigned short ggc;
4521
eecfd57f 4522 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4523 return;
4524
eecfd57f 4525 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9eecabcb
DW
4526 printk(KERN_INFO "DMAR: BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
4527 dmar_map_gfx = 0;
6fbcfb3e
DW
4528 } else if (dmar_map_gfx) {
4529 /* we have to ensure the gfx device is idle before we flush */
4530 printk(KERN_INFO "DMAR: Disabling batched IOTLB flush on Ironlake\n");
4531 intel_iommu_strict = 1;
4532 }
9eecabcb
DW
4533}
4534DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4535DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4536DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4537DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4538
e0fc7e0b
DW
4539/* On Tylersburg chipsets, some BIOSes have been known to enable the
4540 ISOCH DMAR unit for the Azalia sound device, but not give it any
4541 TLB entries, which causes it to deadlock. Check for that. We do
4542 this in a function called from init_dmars(), instead of in a PCI
4543 quirk, because we don't want to print the obnoxious "BIOS broken"
4544 message if VT-d is actually disabled.
4545*/
4546static void __init check_tylersburg_isoch(void)
4547{
4548 struct pci_dev *pdev;
4549 uint32_t vtisochctrl;
4550
4551 /* If there's no Azalia in the system anyway, forget it. */
4552 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4553 if (!pdev)
4554 return;
4555 pci_dev_put(pdev);
4556
4557 /* System Management Registers. Might be hidden, in which case
4558 we can't do the sanity check. But that's OK, because the
4559 known-broken BIOSes _don't_ actually hide it, so far. */
4560 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4561 if (!pdev)
4562 return;
4563
4564 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4565 pci_dev_put(pdev);
4566 return;
4567 }
4568
4569 pci_dev_put(pdev);
4570
4571 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4572 if (vtisochctrl & 1)
4573 return;
4574
4575 /* Drop all bits other than the number of TLB entries */
4576 vtisochctrl &= 0x1c;
4577
4578 /* If we have the recommended number of TLB entries (16), fine. */
4579 if (vtisochctrl == 0x10)
4580 return;
4581
4582 /* Zero TLB entries? You get to ride the short bus to school. */
4583 if (!vtisochctrl) {
4584 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4585 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4586 dmi_get_system_info(DMI_BIOS_VENDOR),
4587 dmi_get_system_info(DMI_BIOS_VERSION),
4588 dmi_get_system_info(DMI_PRODUCT_VERSION));
4589 iommu_identity_mapping |= IDENTMAP_AZALIA;
4590 return;
4591 }
4592
4593 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
4594 vtisochctrl);
4595}
This page took 1.114966 seconds and 5 git commands to generate.