iommu/vt-d: Remove dmar_global_lock from device_notifier
[deliverable/linux.git] / drivers / iommu / intel-iommu.c
CommitLineData
ba395927 1/*
ea8ea460 2 * Copyright © 2006-2014 Intel Corporation.
ba395927
KA
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
ea8ea460
DW
13 * Authors: David Woodhouse <dwmw2@infradead.org>,
14 * Ashok Raj <ashok.raj@intel.com>,
15 * Shaohua Li <shaohua.li@intel.com>,
16 * Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>,
17 * Fenghua Yu <fenghua.yu@intel.com>
9f10e5bf 18 * Joerg Roedel <jroedel@suse.de>
ba395927
KA
19 */
20
9f10e5bf
JR
21#define pr_fmt(fmt) "DMAR: " fmt
22
ba395927
KA
23#include <linux/init.h>
24#include <linux/bitmap.h>
5e0d2a6f 25#include <linux/debugfs.h>
54485c30 26#include <linux/export.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
75f05569 35#include <linux/memory.h>
5e0d2a6f 36#include <linux/timer.h>
38717946 37#include <linux/iova.h>
5d450806 38#include <linux/iommu.h>
38717946 39#include <linux/intel-iommu.h>
134fac3f 40#include <linux/syscore_ops.h>
69575d38 41#include <linux/tboot.h>
adb2fe02 42#include <linux/dmi.h>
5cdede24 43#include <linux/pci-ats.h>
0ee332c1 44#include <linux/memblock.h>
36746436 45#include <linux/dma-contiguous.h>
091d42e4 46#include <linux/crash_dump.h>
8a8f422d 47#include <asm/irq_remapping.h>
ba395927 48#include <asm/cacheflush.h>
46a7fa27 49#include <asm/iommu.h>
ba395927 50
078e1ee2
JR
51#include "irq_remapping.h"
52
5b6985ce
FY
53#define ROOT_SIZE VTD_PAGE_SIZE
54#define CONTEXT_SIZE VTD_PAGE_SIZE
55
ba395927 56#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
18436afd 57#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
ba395927 58#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 59#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
60
61#define IOAPIC_RANGE_START (0xfee00000)
62#define IOAPIC_RANGE_END (0xfeefffff)
63#define IOVA_START_ADDR (0x1000)
64
65#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
66
4ed0d3e6 67#define MAX_AGAW_WIDTH 64
5c645b35 68#define MAX_AGAW_PFN_WIDTH (MAX_AGAW_WIDTH - VTD_PAGE_SHIFT)
4ed0d3e6 69
2ebe3151
DW
70#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
71#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
72
73/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
74 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
75#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
76 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
77#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 78
1b722500
RM
79/* IO virtual address start page frame number */
80#define IOVA_START_PFN (1)
81
f27be03b 82#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 83#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 84#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 85
df08cdc7
AM
86/* page table handling */
87#define LEVEL_STRIDE (9)
88#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
89
6d1c56a9
OBC
90/*
91 * This bitmap is used to advertise the page sizes our hardware support
92 * to the IOMMU core, which will then use this information to split
93 * physically contiguous memory regions it is mapping into page sizes
94 * that we support.
95 *
96 * Traditionally the IOMMU core just handed us the mappings directly,
97 * after making sure the size is an order of a 4KiB page and that the
98 * mapping has natural alignment.
99 *
100 * To retain this behavior, we currently advertise that we support
101 * all page sizes that are an order of 4KiB.
102 *
103 * If at some point we'd like to utilize the IOMMU core's new behavior,
104 * we could change this to advertise the real page sizes we support.
105 */
106#define INTEL_IOMMU_PGSIZES (~0xFFFUL)
107
df08cdc7
AM
108static inline int agaw_to_level(int agaw)
109{
110 return agaw + 2;
111}
112
113static inline int agaw_to_width(int agaw)
114{
5c645b35 115 return min_t(int, 30 + agaw * LEVEL_STRIDE, MAX_AGAW_WIDTH);
df08cdc7
AM
116}
117
118static inline int width_to_agaw(int width)
119{
5c645b35 120 return DIV_ROUND_UP(width - 30, LEVEL_STRIDE);
df08cdc7
AM
121}
122
123static inline unsigned int level_to_offset_bits(int level)
124{
125 return (level - 1) * LEVEL_STRIDE;
126}
127
128static inline int pfn_level_offset(unsigned long pfn, int level)
129{
130 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
131}
132
133static inline unsigned long level_mask(int level)
134{
135 return -1UL << level_to_offset_bits(level);
136}
137
138static inline unsigned long level_size(int level)
139{
140 return 1UL << level_to_offset_bits(level);
141}
142
143static inline unsigned long align_to_level(unsigned long pfn, int level)
144{
145 return (pfn + level_size(level) - 1) & level_mask(level);
146}
fd18de50 147
6dd9a7c7
YS
148static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
149{
5c645b35 150 return 1 << min_t(int, (lvl - 1) * LEVEL_STRIDE, MAX_AGAW_PFN_WIDTH);
6dd9a7c7
YS
151}
152
dd4e8319
DW
153/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
154 are never going to work. */
155static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
156{
157 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
158}
159
160static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
161{
162 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
163}
164static inline unsigned long page_to_dma_pfn(struct page *pg)
165{
166 return mm_to_dma_pfn(page_to_pfn(pg));
167}
168static inline unsigned long virt_to_dma_pfn(void *p)
169{
170 return page_to_dma_pfn(virt_to_page(p));
171}
172
d9630fe9
WH
173/* global iommu list, set NULL for ignored DMAR units */
174static struct intel_iommu **g_iommus;
175
e0fc7e0b 176static void __init check_tylersburg_isoch(void);
9af88143
DW
177static int rwbf_quirk;
178
b779260b
JC
179/*
180 * set to 1 to panic kernel if can't successfully enable VT-d
181 * (used when kernel is launched w/ TXT)
182 */
183static int force_on = 0;
184
46b08e1a
MM
185/*
186 * 0: Present
187 * 1-11: Reserved
188 * 12-63: Context Ptr (12 - (haw-1))
189 * 64-127: Reserved
190 */
191struct root_entry {
03ecc32c
DW
192 u64 lo;
193 u64 hi;
46b08e1a
MM
194};
195#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
46b08e1a 196
091d42e4
JR
197/*
198 * Take a root_entry and return the Lower Context Table Pointer (LCTP)
199 * if marked present.
200 */
201static phys_addr_t root_entry_lctp(struct root_entry *re)
202{
203 if (!(re->lo & 1))
204 return 0;
205
206 return re->lo & VTD_PAGE_MASK;
207}
208
209/*
210 * Take a root_entry and return the Upper Context Table Pointer (UCTP)
211 * if marked present.
212 */
213static phys_addr_t root_entry_uctp(struct root_entry *re)
214{
215 if (!(re->hi & 1))
216 return 0;
46b08e1a 217
091d42e4
JR
218 return re->hi & VTD_PAGE_MASK;
219}
7a8fc25e
MM
220/*
221 * low 64 bits:
222 * 0: present
223 * 1: fault processing disable
224 * 2-3: translation type
225 * 12-63: address space root
226 * high 64 bits:
227 * 0-2: address width
228 * 3-6: aval
229 * 8-23: domain id
230 */
231struct context_entry {
232 u64 lo;
233 u64 hi;
234};
c07e7d21 235
cf484d0e
JR
236static inline void context_clear_pasid_enable(struct context_entry *context)
237{
238 context->lo &= ~(1ULL << 11);
239}
240
241static inline bool context_pasid_enabled(struct context_entry *context)
242{
243 return !!(context->lo & (1ULL << 11));
244}
245
246static inline void context_set_copied(struct context_entry *context)
247{
248 context->hi |= (1ull << 3);
249}
250
251static inline bool context_copied(struct context_entry *context)
252{
253 return !!(context->hi & (1ULL << 3));
254}
255
256static inline bool __context_present(struct context_entry *context)
c07e7d21
MM
257{
258 return (context->lo & 1);
259}
cf484d0e
JR
260
261static inline bool context_present(struct context_entry *context)
262{
263 return context_pasid_enabled(context) ?
264 __context_present(context) :
265 __context_present(context) && !context_copied(context);
266}
267
c07e7d21
MM
268static inline void context_set_present(struct context_entry *context)
269{
270 context->lo |= 1;
271}
272
273static inline void context_set_fault_enable(struct context_entry *context)
274{
275 context->lo &= (((u64)-1) << 2) | 1;
276}
277
c07e7d21
MM
278static inline void context_set_translation_type(struct context_entry *context,
279 unsigned long value)
280{
281 context->lo &= (((u64)-1) << 4) | 3;
282 context->lo |= (value & 3) << 2;
283}
284
285static inline void context_set_address_root(struct context_entry *context,
286 unsigned long value)
287{
1a2262f9 288 context->lo &= ~VTD_PAGE_MASK;
c07e7d21
MM
289 context->lo |= value & VTD_PAGE_MASK;
290}
291
292static inline void context_set_address_width(struct context_entry *context,
293 unsigned long value)
294{
295 context->hi |= value & 7;
296}
297
298static inline void context_set_domain_id(struct context_entry *context,
299 unsigned long value)
300{
301 context->hi |= (value & ((1 << 16) - 1)) << 8;
302}
303
dbcd861f
JR
304static inline int context_domain_id(struct context_entry *c)
305{
306 return((c->hi >> 8) & 0xffff);
307}
308
c07e7d21
MM
309static inline void context_clear_entry(struct context_entry *context)
310{
311 context->lo = 0;
312 context->hi = 0;
313}
7a8fc25e 314
622ba12a
MM
315/*
316 * 0: readable
317 * 1: writable
318 * 2-6: reserved
319 * 7: super page
9cf06697
SY
320 * 8-10: available
321 * 11: snoop behavior
622ba12a
MM
322 * 12-63: Host physcial address
323 */
324struct dma_pte {
325 u64 val;
326};
622ba12a 327
19c239ce
MM
328static inline void dma_clear_pte(struct dma_pte *pte)
329{
330 pte->val = 0;
331}
332
19c239ce
MM
333static inline u64 dma_pte_addr(struct dma_pte *pte)
334{
c85994e4
DW
335#ifdef CONFIG_64BIT
336 return pte->val & VTD_PAGE_MASK;
337#else
338 /* Must have a full atomic 64-bit read */
1a8bd481 339 return __cmpxchg64(&pte->val, 0ULL, 0ULL) & VTD_PAGE_MASK;
c85994e4 340#endif
19c239ce
MM
341}
342
19c239ce
MM
343static inline bool dma_pte_present(struct dma_pte *pte)
344{
345 return (pte->val & 3) != 0;
346}
622ba12a 347
4399c8bf
AK
348static inline bool dma_pte_superpage(struct dma_pte *pte)
349{
c3c75eb7 350 return (pte->val & DMA_PTE_LARGE_PAGE);
4399c8bf
AK
351}
352
75e6bf96
DW
353static inline int first_pte_in_page(struct dma_pte *pte)
354{
355 return !((unsigned long)pte & ~VTD_PAGE_MASK);
356}
357
2c2e2c38
FY
358/*
359 * This domain is a statically identity mapping domain.
360 * 1. This domain creats a static 1:1 mapping to all usable memory.
361 * 2. It maps to each iommu if successful.
362 * 3. Each iommu mapps to this domain if successful.
363 */
19943b0e
DW
364static struct dmar_domain *si_domain;
365static int hw_pass_through = 1;
2c2e2c38 366
28ccce0d
JR
367/*
368 * Domain represents a virtual machine, more than one devices
1ce28feb
WH
369 * across iommus may be owned in one domain, e.g. kvm guest.
370 */
ab8dfe25 371#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 0)
1ce28feb 372
2c2e2c38 373/* si_domain contains mulitple devices */
ab8dfe25 374#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 1)
2c2e2c38 375
29a27719
JR
376#define for_each_domain_iommu(idx, domain) \
377 for (idx = 0; idx < g_num_of_iommus; idx++) \
378 if (domain->iommu_refcnt[idx])
379
99126f7c 380struct dmar_domain {
4c923d47 381 int nid; /* node id */
29a27719
JR
382
383 unsigned iommu_refcnt[DMAR_UNITS_SUPPORTED];
384 /* Refcount of devices per iommu */
385
99126f7c 386
c0e8a6c8
JR
387 u16 iommu_did[DMAR_UNITS_SUPPORTED];
388 /* Domain ids per IOMMU. Use u16 since
389 * domain ids are 16 bit wide according
390 * to VT-d spec, section 9.3 */
391
00a77deb 392 struct list_head devices; /* all devices' list */
99126f7c
MM
393 struct iova_domain iovad; /* iova's that belong to this domain */
394
395 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
396 int gaw; /* max guest address width */
397
398 /* adjusted guest address width, 0 is level 2 30-bit */
399 int agaw;
400
3b5410e7 401 int flags; /* flags to find out type of domain */
8e604097
WH
402
403 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 404 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d 405 int iommu_count; /* reference count of iommu */
6dd9a7c7
YS
406 int iommu_superpage;/* Level of superpages supported:
407 0 == 4KiB (no superpages), 1 == 2MiB,
408 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
fe40f1e0 409 u64 max_addr; /* maximum mapped address */
00a77deb
JR
410
411 struct iommu_domain domain; /* generic domain data structure for
412 iommu core */
99126f7c
MM
413};
414
a647dacb
MM
415/* PCI domain-device relationship */
416struct device_domain_info {
417 struct list_head link; /* link to domain siblings */
418 struct list_head global; /* link to global list */
276dbf99 419 u8 bus; /* PCI bus number */
a647dacb 420 u8 devfn; /* PCI devfn number */
0bcb3e28 421 struct device *dev; /* it's NULL for PCIe-to-PCI bridge */
93a23a72 422 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
423 struct dmar_domain *domain; /* pointer to domain */
424};
425
b94e4117
JL
426struct dmar_rmrr_unit {
427 struct list_head list; /* list of rmrr units */
428 struct acpi_dmar_header *hdr; /* ACPI header */
429 u64 base_address; /* reserved base address*/
430 u64 end_address; /* reserved end address */
832bd858 431 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
432 int devices_cnt; /* target device count */
433};
434
435struct dmar_atsr_unit {
436 struct list_head list; /* list of ATSR units */
437 struct acpi_dmar_header *hdr; /* ACPI header */
832bd858 438 struct dmar_dev_scope *devices; /* target devices */
b94e4117
JL
439 int devices_cnt; /* target device count */
440 u8 include_all:1; /* include all ports */
441};
442
443static LIST_HEAD(dmar_atsr_units);
444static LIST_HEAD(dmar_rmrr_units);
445
446#define for_each_rmrr_units(rmrr) \
447 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
448
5e0d2a6f 449static void flush_unmaps_timeout(unsigned long data);
450
b707cb02 451static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
5e0d2a6f 452
80b20dd8 453#define HIGH_WATER_MARK 250
454struct deferred_flush_tables {
455 int next;
456 struct iova *iova[HIGH_WATER_MARK];
457 struct dmar_domain *domain[HIGH_WATER_MARK];
ea8ea460 458 struct page *freelist[HIGH_WATER_MARK];
80b20dd8 459};
460
461static struct deferred_flush_tables *deferred_flush;
462
5e0d2a6f 463/* bitmap for indexing intel_iommus */
5e0d2a6f 464static int g_num_of_iommus;
465
466static DEFINE_SPINLOCK(async_umap_flush_lock);
467static LIST_HEAD(unmaps_to_do);
468
469static int timer_on;
470static long list_size;
5e0d2a6f 471
92d03cc8 472static void domain_exit(struct dmar_domain *domain);
ba395927 473static void domain_remove_dev_info(struct dmar_domain *domain);
e6de0f8d
JR
474static void dmar_remove_one_dev_info(struct dmar_domain *domain,
475 struct device *dev);
2452d9db
JR
476static void domain_context_clear(struct intel_iommu *iommu,
477 struct device *dev);
55d94043
JR
478static void __dmar_remove_one_dev_info(struct dmar_domain *domain,
479 struct device *dev);
2a46ddf7
JL
480static int domain_detach_iommu(struct dmar_domain *domain,
481 struct intel_iommu *iommu);
ba395927 482
d3f13810 483#ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON
0cd5c3c8
KM
484int dmar_disabled = 0;
485#else
486int dmar_disabled = 1;
d3f13810 487#endif /*CONFIG_INTEL_IOMMU_DEFAULT_ON*/
0cd5c3c8 488
8bc1f85c
ED
489int intel_iommu_enabled = 0;
490EXPORT_SYMBOL_GPL(intel_iommu_enabled);
491
2d9e667e 492static int dmar_map_gfx = 1;
7d3b03ce 493static int dmar_forcedac;
5e0d2a6f 494static int intel_iommu_strict;
6dd9a7c7 495static int intel_iommu_superpage = 1;
c83b2f20
DW
496static int intel_iommu_ecs = 1;
497
498/* We only actually use ECS when PASID support (on the new bit 40)
499 * is also advertised. Some early implementations — the ones with
500 * PASID support on bit 28 — have issues even when we *only* use
501 * extended root/context tables. */
502#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \
503 ecap_pasid(iommu->ecap))
ba395927 504
c0771df8
DW
505int intel_iommu_gfx_mapped;
506EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped);
507
ba395927
KA
508#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
509static DEFINE_SPINLOCK(device_domain_lock);
510static LIST_HEAD(device_domain_list);
511
b22f6434 512static const struct iommu_ops intel_iommu_ops;
a8bcbb0d 513
4158c2ec
JR
514static bool translation_pre_enabled(struct intel_iommu *iommu)
515{
516 return (iommu->flags & VTD_FLAG_TRANS_PRE_ENABLED);
517}
518
091d42e4
JR
519static void clear_translation_pre_enabled(struct intel_iommu *iommu)
520{
521 iommu->flags &= ~VTD_FLAG_TRANS_PRE_ENABLED;
522}
523
4158c2ec
JR
524static void init_translation_status(struct intel_iommu *iommu)
525{
526 u32 gsts;
527
528 gsts = readl(iommu->reg + DMAR_GSTS_REG);
529 if (gsts & DMA_GSTS_TES)
530 iommu->flags |= VTD_FLAG_TRANS_PRE_ENABLED;
531}
532
00a77deb
JR
533/* Convert generic 'struct iommu_domain to private struct dmar_domain */
534static struct dmar_domain *to_dmar_domain(struct iommu_domain *dom)
535{
536 return container_of(dom, struct dmar_domain, domain);
537}
538
ba395927
KA
539static int __init intel_iommu_setup(char *str)
540{
541 if (!str)
542 return -EINVAL;
543 while (*str) {
0cd5c3c8
KM
544 if (!strncmp(str, "on", 2)) {
545 dmar_disabled = 0;
9f10e5bf 546 pr_info("IOMMU enabled\n");
0cd5c3c8 547 } else if (!strncmp(str, "off", 3)) {
ba395927 548 dmar_disabled = 1;
9f10e5bf 549 pr_info("IOMMU disabled\n");
ba395927
KA
550 } else if (!strncmp(str, "igfx_off", 8)) {
551 dmar_map_gfx = 0;
9f10e5bf 552 pr_info("Disable GFX device mapping\n");
7d3b03ce 553 } else if (!strncmp(str, "forcedac", 8)) {
9f10e5bf 554 pr_info("Forcing DAC for PCI devices\n");
7d3b03ce 555 dmar_forcedac = 1;
5e0d2a6f 556 } else if (!strncmp(str, "strict", 6)) {
9f10e5bf 557 pr_info("Disable batched IOTLB flush\n");
5e0d2a6f 558 intel_iommu_strict = 1;
6dd9a7c7 559 } else if (!strncmp(str, "sp_off", 6)) {
9f10e5bf 560 pr_info("Disable supported super page\n");
6dd9a7c7 561 intel_iommu_superpage = 0;
c83b2f20
DW
562 } else if (!strncmp(str, "ecs_off", 7)) {
563 printk(KERN_INFO
564 "Intel-IOMMU: disable extended context table support\n");
565 intel_iommu_ecs = 0;
ba395927
KA
566 }
567
568 str += strcspn(str, ",");
569 while (*str == ',')
570 str++;
571 }
572 return 0;
573}
574__setup("intel_iommu=", intel_iommu_setup);
575
576static struct kmem_cache *iommu_domain_cache;
577static struct kmem_cache *iommu_devinfo_cache;
ba395927 578
9452d5bf
JR
579static struct dmar_domain* get_iommu_domain(struct intel_iommu *iommu, u16 did)
580{
8bf47816
JR
581 struct dmar_domain **domains;
582 int idx = did >> 8;
583
584 domains = iommu->domains[idx];
585 if (!domains)
586 return NULL;
587
588 return domains[did & 0xff];
9452d5bf
JR
589}
590
591static void set_iommu_domain(struct intel_iommu *iommu, u16 did,
592 struct dmar_domain *domain)
593{
8bf47816
JR
594 struct dmar_domain **domains;
595 int idx = did >> 8;
596
597 if (!iommu->domains[idx]) {
598 size_t size = 256 * sizeof(struct dmar_domain *);
599 iommu->domains[idx] = kzalloc(size, GFP_ATOMIC);
600 }
601
602 domains = iommu->domains[idx];
603 if (WARN_ON(!domains))
604 return;
605 else
606 domains[did & 0xff] = domain;
9452d5bf
JR
607}
608
4c923d47 609static inline void *alloc_pgtable_page(int node)
eb3fa7cb 610{
4c923d47
SS
611 struct page *page;
612 void *vaddr = NULL;
eb3fa7cb 613
4c923d47
SS
614 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
615 if (page)
616 vaddr = page_address(page);
eb3fa7cb 617 return vaddr;
ba395927
KA
618}
619
620static inline void free_pgtable_page(void *vaddr)
621{
622 free_page((unsigned long)vaddr);
623}
624
625static inline void *alloc_domain_mem(void)
626{
354bb65e 627 return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC);
ba395927
KA
628}
629
38717946 630static void free_domain_mem(void *vaddr)
ba395927
KA
631{
632 kmem_cache_free(iommu_domain_cache, vaddr);
633}
634
635static inline void * alloc_devinfo_mem(void)
636{
354bb65e 637 return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC);
ba395927
KA
638}
639
640static inline void free_devinfo_mem(void *vaddr)
641{
642 kmem_cache_free(iommu_devinfo_cache, vaddr);
643}
644
ab8dfe25
JL
645static inline int domain_type_is_vm(struct dmar_domain *domain)
646{
647 return domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE;
648}
649
28ccce0d
JR
650static inline int domain_type_is_si(struct dmar_domain *domain)
651{
652 return domain->flags & DOMAIN_FLAG_STATIC_IDENTITY;
653}
654
ab8dfe25
JL
655static inline int domain_type_is_vm_or_si(struct dmar_domain *domain)
656{
657 return domain->flags & (DOMAIN_FLAG_VIRTUAL_MACHINE |
658 DOMAIN_FLAG_STATIC_IDENTITY);
659}
1b573683 660
162d1b10
JL
661static inline int domain_pfn_supported(struct dmar_domain *domain,
662 unsigned long pfn)
663{
664 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
665
666 return !(addr_width < BITS_PER_LONG && pfn >> addr_width);
667}
668
4ed0d3e6 669static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
670{
671 unsigned long sagaw;
672 int agaw = -1;
673
674 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 675 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
676 agaw >= 0; agaw--) {
677 if (test_bit(agaw, &sagaw))
678 break;
679 }
680
681 return agaw;
682}
683
4ed0d3e6
FY
684/*
685 * Calculate max SAGAW for each iommu.
686 */
687int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
688{
689 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
690}
691
692/*
693 * calculate agaw for each iommu.
694 * "SAGAW" may be different across iommus, use a default agaw, and
695 * get a supported less agaw for iommus that don't support the default agaw.
696 */
697int iommu_calculate_agaw(struct intel_iommu *iommu)
698{
699 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
700}
701
2c2e2c38 702/* This functionin only returns single iommu in a domain */
8c11e798
WH
703static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
704{
705 int iommu_id;
706
2c2e2c38 707 /* si_domain and vm domain should not get here. */
ab8dfe25 708 BUG_ON(domain_type_is_vm_or_si(domain));
29a27719
JR
709 for_each_domain_iommu(iommu_id, domain)
710 break;
711
8c11e798
WH
712 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
713 return NULL;
714
715 return g_iommus[iommu_id];
716}
717
8e604097
WH
718static void domain_update_iommu_coherency(struct dmar_domain *domain)
719{
d0501960
DW
720 struct dmar_drhd_unit *drhd;
721 struct intel_iommu *iommu;
2f119c78
QL
722 bool found = false;
723 int i;
2e12bc29 724
d0501960 725 domain->iommu_coherency = 1;
8e604097 726
29a27719 727 for_each_domain_iommu(i, domain) {
2f119c78 728 found = true;
8e604097
WH
729 if (!ecap_coherent(g_iommus[i]->ecap)) {
730 domain->iommu_coherency = 0;
731 break;
732 }
8e604097 733 }
d0501960
DW
734 if (found)
735 return;
736
737 /* No hardware attached; use lowest common denominator */
738 rcu_read_lock();
739 for_each_active_iommu(iommu, drhd) {
740 if (!ecap_coherent(iommu->ecap)) {
741 domain->iommu_coherency = 0;
742 break;
743 }
744 }
745 rcu_read_unlock();
8e604097
WH
746}
747
161f6934 748static int domain_update_iommu_snooping(struct intel_iommu *skip)
58c610bd 749{
161f6934
JL
750 struct dmar_drhd_unit *drhd;
751 struct intel_iommu *iommu;
752 int ret = 1;
58c610bd 753
161f6934
JL
754 rcu_read_lock();
755 for_each_active_iommu(iommu, drhd) {
756 if (iommu != skip) {
757 if (!ecap_sc_support(iommu->ecap)) {
758 ret = 0;
759 break;
760 }
58c610bd 761 }
58c610bd 762 }
161f6934
JL
763 rcu_read_unlock();
764
765 return ret;
58c610bd
SY
766}
767
161f6934 768static int domain_update_iommu_superpage(struct intel_iommu *skip)
6dd9a7c7 769{
8140a95d 770 struct dmar_drhd_unit *drhd;
161f6934 771 struct intel_iommu *iommu;
8140a95d 772 int mask = 0xf;
6dd9a7c7
YS
773
774 if (!intel_iommu_superpage) {
161f6934 775 return 0;
6dd9a7c7
YS
776 }
777
8140a95d 778 /* set iommu_superpage to the smallest common denominator */
0e242612 779 rcu_read_lock();
8140a95d 780 for_each_active_iommu(iommu, drhd) {
161f6934
JL
781 if (iommu != skip) {
782 mask &= cap_super_page_val(iommu->cap);
783 if (!mask)
784 break;
6dd9a7c7
YS
785 }
786 }
0e242612
JL
787 rcu_read_unlock();
788
161f6934 789 return fls(mask);
6dd9a7c7
YS
790}
791
58c610bd
SY
792/* Some capabilities may be different across iommus */
793static void domain_update_iommu_cap(struct dmar_domain *domain)
794{
795 domain_update_iommu_coherency(domain);
161f6934
JL
796 domain->iommu_snooping = domain_update_iommu_snooping(NULL);
797 domain->iommu_superpage = domain_update_iommu_superpage(NULL);
58c610bd
SY
798}
799
03ecc32c
DW
800static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
801 u8 bus, u8 devfn, int alloc)
802{
803 struct root_entry *root = &iommu->root_entry[bus];
804 struct context_entry *context;
805 u64 *entry;
806
c83b2f20 807 if (ecs_enabled(iommu)) {
03ecc32c
DW
808 if (devfn >= 0x80) {
809 devfn -= 0x80;
810 entry = &root->hi;
811 }
812 devfn *= 2;
813 }
814 entry = &root->lo;
815 if (*entry & 1)
816 context = phys_to_virt(*entry & VTD_PAGE_MASK);
817 else {
818 unsigned long phy_addr;
819 if (!alloc)
820 return NULL;
821
822 context = alloc_pgtable_page(iommu->node);
823 if (!context)
824 return NULL;
825
826 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
827 phy_addr = virt_to_phys((void *)context);
828 *entry = phy_addr | 1;
829 __iommu_flush_cache(iommu, entry, sizeof(*entry));
830 }
831 return &context[devfn];
832}
833
4ed6a540
DW
834static int iommu_dummy(struct device *dev)
835{
836 return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
837}
838
156baca8 839static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
c7151a8d
WH
840{
841 struct dmar_drhd_unit *drhd = NULL;
b683b230 842 struct intel_iommu *iommu;
156baca8
DW
843 struct device *tmp;
844 struct pci_dev *ptmp, *pdev = NULL;
aa4d066a 845 u16 segment = 0;
c7151a8d
WH
846 int i;
847
4ed6a540
DW
848 if (iommu_dummy(dev))
849 return NULL;
850
156baca8
DW
851 if (dev_is_pci(dev)) {
852 pdev = to_pci_dev(dev);
853 segment = pci_domain_nr(pdev->bus);
ca5b74d2 854 } else if (has_acpi_companion(dev))
156baca8
DW
855 dev = &ACPI_COMPANION(dev)->dev;
856
0e242612 857 rcu_read_lock();
b683b230 858 for_each_active_iommu(iommu, drhd) {
156baca8 859 if (pdev && segment != drhd->segment)
276dbf99 860 continue;
c7151a8d 861
b683b230 862 for_each_active_dev_scope(drhd->devices,
156baca8
DW
863 drhd->devices_cnt, i, tmp) {
864 if (tmp == dev) {
865 *bus = drhd->devices[i].bus;
866 *devfn = drhd->devices[i].devfn;
b683b230 867 goto out;
156baca8
DW
868 }
869
870 if (!pdev || !dev_is_pci(tmp))
871 continue;
872
873 ptmp = to_pci_dev(tmp);
874 if (ptmp->subordinate &&
875 ptmp->subordinate->number <= pdev->bus->number &&
876 ptmp->subordinate->busn_res.end >= pdev->bus->number)
877 goto got_pdev;
924b6231 878 }
c7151a8d 879
156baca8
DW
880 if (pdev && drhd->include_all) {
881 got_pdev:
882 *bus = pdev->bus->number;
883 *devfn = pdev->devfn;
b683b230 884 goto out;
156baca8 885 }
c7151a8d 886 }
b683b230 887 iommu = NULL;
156baca8 888 out:
0e242612 889 rcu_read_unlock();
c7151a8d 890
b683b230 891 return iommu;
c7151a8d
WH
892}
893
5331fe6f
WH
894static void domain_flush_cache(struct dmar_domain *domain,
895 void *addr, int size)
896{
897 if (!domain->iommu_coherency)
898 clflush_cache_range(addr, size);
899}
900
ba395927
KA
901static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
902{
ba395927 903 struct context_entry *context;
03ecc32c 904 int ret = 0;
ba395927
KA
905 unsigned long flags;
906
907 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c
DW
908 context = iommu_context_addr(iommu, bus, devfn, 0);
909 if (context)
910 ret = context_present(context);
ba395927
KA
911 spin_unlock_irqrestore(&iommu->lock, flags);
912 return ret;
913}
914
915static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
916{
ba395927
KA
917 struct context_entry *context;
918 unsigned long flags;
919
920 spin_lock_irqsave(&iommu->lock, flags);
03ecc32c 921 context = iommu_context_addr(iommu, bus, devfn, 0);
ba395927 922 if (context) {
03ecc32c
DW
923 context_clear_entry(context);
924 __iommu_flush_cache(iommu, context, sizeof(*context));
ba395927
KA
925 }
926 spin_unlock_irqrestore(&iommu->lock, flags);
927}
928
929static void free_context_table(struct intel_iommu *iommu)
930{
ba395927
KA
931 int i;
932 unsigned long flags;
933 struct context_entry *context;
934
935 spin_lock_irqsave(&iommu->lock, flags);
936 if (!iommu->root_entry) {
937 goto out;
938 }
939 for (i = 0; i < ROOT_ENTRY_NR; i++) {
03ecc32c 940 context = iommu_context_addr(iommu, i, 0, 0);
ba395927
KA
941 if (context)
942 free_pgtable_page(context);
03ecc32c 943
c83b2f20 944 if (!ecs_enabled(iommu))
03ecc32c
DW
945 continue;
946
947 context = iommu_context_addr(iommu, i, 0x80, 0);
948 if (context)
949 free_pgtable_page(context);
950
ba395927
KA
951 }
952 free_pgtable_page(iommu->root_entry);
953 iommu->root_entry = NULL;
954out:
955 spin_unlock_irqrestore(&iommu->lock, flags);
956}
957
b026fd28 958static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
5cf0a76f 959 unsigned long pfn, int *target_level)
ba395927 960{
ba395927
KA
961 struct dma_pte *parent, *pte = NULL;
962 int level = agaw_to_level(domain->agaw);
4399c8bf 963 int offset;
ba395927
KA
964
965 BUG_ON(!domain->pgd);
f9423606 966
162d1b10 967 if (!domain_pfn_supported(domain, pfn))
f9423606
JS
968 /* Address beyond IOMMU's addressing capabilities. */
969 return NULL;
970
ba395927
KA
971 parent = domain->pgd;
972
5cf0a76f 973 while (1) {
ba395927
KA
974 void *tmp_page;
975
b026fd28 976 offset = pfn_level_offset(pfn, level);
ba395927 977 pte = &parent[offset];
5cf0a76f 978 if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte)))
6dd9a7c7 979 break;
5cf0a76f 980 if (level == *target_level)
ba395927
KA
981 break;
982
19c239ce 983 if (!dma_pte_present(pte)) {
c85994e4
DW
984 uint64_t pteval;
985
4c923d47 986 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 987
206a73c1 988 if (!tmp_page)
ba395927 989 return NULL;
206a73c1 990
c85994e4 991 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 992 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
effad4b5 993 if (cmpxchg64(&pte->val, 0ULL, pteval))
c85994e4
DW
994 /* Someone else set it while we were thinking; use theirs. */
995 free_pgtable_page(tmp_page);
effad4b5 996 else
c85994e4 997 domain_flush_cache(domain, pte, sizeof(*pte));
ba395927 998 }
5cf0a76f
DW
999 if (level == 1)
1000 break;
1001
19c239ce 1002 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1003 level--;
1004 }
1005
5cf0a76f
DW
1006 if (!*target_level)
1007 *target_level = level;
1008
ba395927
KA
1009 return pte;
1010}
1011
6dd9a7c7 1012
ba395927 1013/* return address's pte at specific level */
90dcfb5e
DW
1014static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
1015 unsigned long pfn,
6dd9a7c7 1016 int level, int *large_page)
ba395927
KA
1017{
1018 struct dma_pte *parent, *pte = NULL;
1019 int total = agaw_to_level(domain->agaw);
1020 int offset;
1021
1022 parent = domain->pgd;
1023 while (level <= total) {
90dcfb5e 1024 offset = pfn_level_offset(pfn, total);
ba395927
KA
1025 pte = &parent[offset];
1026 if (level == total)
1027 return pte;
1028
6dd9a7c7
YS
1029 if (!dma_pte_present(pte)) {
1030 *large_page = total;
ba395927 1031 break;
6dd9a7c7
YS
1032 }
1033
e16922af 1034 if (dma_pte_superpage(pte)) {
6dd9a7c7
YS
1035 *large_page = total;
1036 return pte;
1037 }
1038
19c239ce 1039 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
1040 total--;
1041 }
1042 return NULL;
1043}
1044
ba395927 1045/* clear last level pte, a tlb flush should be followed */
5cf0a76f 1046static void dma_pte_clear_range(struct dmar_domain *domain,
595badf5
DW
1047 unsigned long start_pfn,
1048 unsigned long last_pfn)
ba395927 1049{
6dd9a7c7 1050 unsigned int large_page = 1;
310a5ab9 1051 struct dma_pte *first_pte, *pte;
66eae846 1052
162d1b10
JL
1053 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1054 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1055 BUG_ON(start_pfn > last_pfn);
ba395927 1056
04b18e65 1057 /* we don't need lock here; nobody else touches the iova range */
59c36286 1058 do {
6dd9a7c7
YS
1059 large_page = 1;
1060 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
310a5ab9 1061 if (!pte) {
6dd9a7c7 1062 start_pfn = align_to_level(start_pfn + 1, large_page + 1);
310a5ab9
DW
1063 continue;
1064 }
6dd9a7c7 1065 do {
310a5ab9 1066 dma_clear_pte(pte);
6dd9a7c7 1067 start_pfn += lvl_to_nr_pages(large_page);
310a5ab9 1068 pte++;
75e6bf96
DW
1069 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
1070
310a5ab9
DW
1071 domain_flush_cache(domain, first_pte,
1072 (void *)pte - (void *)first_pte);
59c36286
DW
1073
1074 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
1075}
1076
3269ee0b
AW
1077static void dma_pte_free_level(struct dmar_domain *domain, int level,
1078 struct dma_pte *pte, unsigned long pfn,
1079 unsigned long start_pfn, unsigned long last_pfn)
1080{
1081 pfn = max(start_pfn, pfn);
1082 pte = &pte[pfn_level_offset(pfn, level)];
1083
1084 do {
1085 unsigned long level_pfn;
1086 struct dma_pte *level_pte;
1087
1088 if (!dma_pte_present(pte) || dma_pte_superpage(pte))
1089 goto next;
1090
1091 level_pfn = pfn & level_mask(level - 1);
1092 level_pte = phys_to_virt(dma_pte_addr(pte));
1093
1094 if (level > 2)
1095 dma_pte_free_level(domain, level - 1, level_pte,
1096 level_pfn, start_pfn, last_pfn);
1097
1098 /* If range covers entire pagetable, free it */
1099 if (!(start_pfn > level_pfn ||
08336fd2 1100 last_pfn < level_pfn + level_size(level) - 1)) {
3269ee0b
AW
1101 dma_clear_pte(pte);
1102 domain_flush_cache(domain, pte, sizeof(*pte));
1103 free_pgtable_page(level_pte);
1104 }
1105next:
1106 pfn += level_size(level);
1107 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1108}
1109
ba395927
KA
1110/* free page table pages. last level pte should already be cleared */
1111static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
1112 unsigned long start_pfn,
1113 unsigned long last_pfn)
ba395927 1114{
162d1b10
JL
1115 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1116 BUG_ON(!domain_pfn_supported(domain, last_pfn));
59c36286 1117 BUG_ON(start_pfn > last_pfn);
ba395927 1118
d41a4adb
JL
1119 dma_pte_clear_range(domain, start_pfn, last_pfn);
1120
f3a0a52f 1121 /* We don't need lock here; nobody else touches the iova range */
3269ee0b
AW
1122 dma_pte_free_level(domain, agaw_to_level(domain->agaw),
1123 domain->pgd, 0, start_pfn, last_pfn);
6660c63a 1124
ba395927 1125 /* free pgd */
d794dc9b 1126 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
1127 free_pgtable_page(domain->pgd);
1128 domain->pgd = NULL;
1129 }
1130}
1131
ea8ea460
DW
1132/* When a page at a given level is being unlinked from its parent, we don't
1133 need to *modify* it at all. All we need to do is make a list of all the
1134 pages which can be freed just as soon as we've flushed the IOTLB and we
1135 know the hardware page-walk will no longer touch them.
1136 The 'pte' argument is the *parent* PTE, pointing to the page that is to
1137 be freed. */
1138static struct page *dma_pte_list_pagetables(struct dmar_domain *domain,
1139 int level, struct dma_pte *pte,
1140 struct page *freelist)
1141{
1142 struct page *pg;
1143
1144 pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT);
1145 pg->freelist = freelist;
1146 freelist = pg;
1147
1148 if (level == 1)
1149 return freelist;
1150
adeb2590
JL
1151 pte = page_address(pg);
1152 do {
ea8ea460
DW
1153 if (dma_pte_present(pte) && !dma_pte_superpage(pte))
1154 freelist = dma_pte_list_pagetables(domain, level - 1,
1155 pte, freelist);
adeb2590
JL
1156 pte++;
1157 } while (!first_pte_in_page(pte));
ea8ea460
DW
1158
1159 return freelist;
1160}
1161
1162static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level,
1163 struct dma_pte *pte, unsigned long pfn,
1164 unsigned long start_pfn,
1165 unsigned long last_pfn,
1166 struct page *freelist)
1167{
1168 struct dma_pte *first_pte = NULL, *last_pte = NULL;
1169
1170 pfn = max(start_pfn, pfn);
1171 pte = &pte[pfn_level_offset(pfn, level)];
1172
1173 do {
1174 unsigned long level_pfn;
1175
1176 if (!dma_pte_present(pte))
1177 goto next;
1178
1179 level_pfn = pfn & level_mask(level);
1180
1181 /* If range covers entire pagetable, free it */
1182 if (start_pfn <= level_pfn &&
1183 last_pfn >= level_pfn + level_size(level) - 1) {
1184 /* These suborbinate page tables are going away entirely. Don't
1185 bother to clear them; we're just going to *free* them. */
1186 if (level > 1 && !dma_pte_superpage(pte))
1187 freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist);
1188
1189 dma_clear_pte(pte);
1190 if (!first_pte)
1191 first_pte = pte;
1192 last_pte = pte;
1193 } else if (level > 1) {
1194 /* Recurse down into a level that isn't *entirely* obsolete */
1195 freelist = dma_pte_clear_level(domain, level - 1,
1196 phys_to_virt(dma_pte_addr(pte)),
1197 level_pfn, start_pfn, last_pfn,
1198 freelist);
1199 }
1200next:
1201 pfn += level_size(level);
1202 } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
1203
1204 if (first_pte)
1205 domain_flush_cache(domain, first_pte,
1206 (void *)++last_pte - (void *)first_pte);
1207
1208 return freelist;
1209}
1210
1211/* We can't just free the pages because the IOMMU may still be walking
1212 the page tables, and may have cached the intermediate levels. The
1213 pages can only be freed after the IOTLB flush has been done. */
1214struct page *domain_unmap(struct dmar_domain *domain,
1215 unsigned long start_pfn,
1216 unsigned long last_pfn)
1217{
ea8ea460
DW
1218 struct page *freelist = NULL;
1219
162d1b10
JL
1220 BUG_ON(!domain_pfn_supported(domain, start_pfn));
1221 BUG_ON(!domain_pfn_supported(domain, last_pfn));
ea8ea460
DW
1222 BUG_ON(start_pfn > last_pfn);
1223
1224 /* we don't need lock here; nobody else touches the iova range */
1225 freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
1226 domain->pgd, 0, start_pfn, last_pfn, NULL);
1227
1228 /* free pgd */
1229 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
1230 struct page *pgd_page = virt_to_page(domain->pgd);
1231 pgd_page->freelist = freelist;
1232 freelist = pgd_page;
1233
1234 domain->pgd = NULL;
1235 }
1236
1237 return freelist;
1238}
1239
1240void dma_free_pagelist(struct page *freelist)
1241{
1242 struct page *pg;
1243
1244 while ((pg = freelist)) {
1245 freelist = pg->freelist;
1246 free_pgtable_page(page_address(pg));
1247 }
1248}
1249
ba395927
KA
1250/* iommu handling */
1251static int iommu_alloc_root_entry(struct intel_iommu *iommu)
1252{
1253 struct root_entry *root;
1254 unsigned long flags;
1255
4c923d47 1256 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ffebeb46 1257 if (!root) {
9f10e5bf 1258 pr_err("Allocating root entry for %s failed\n",
ffebeb46 1259 iommu->name);
ba395927 1260 return -ENOMEM;
ffebeb46 1261 }
ba395927 1262
5b6985ce 1263 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
1264
1265 spin_lock_irqsave(&iommu->lock, flags);
1266 iommu->root_entry = root;
1267 spin_unlock_irqrestore(&iommu->lock, flags);
1268
1269 return 0;
1270}
1271
ba395927
KA
1272static void iommu_set_root_entry(struct intel_iommu *iommu)
1273{
03ecc32c 1274 u64 addr;
c416daa9 1275 u32 sts;
ba395927
KA
1276 unsigned long flag;
1277
03ecc32c 1278 addr = virt_to_phys(iommu->root_entry);
c83b2f20 1279 if (ecs_enabled(iommu))
03ecc32c 1280 addr |= DMA_RTADDR_RTT;
ba395927 1281
1f5b3c3f 1282 raw_spin_lock_irqsave(&iommu->register_lock, flag);
03ecc32c 1283 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
ba395927 1284
c416daa9 1285 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1286
1287 /* Make sure hardware complete it */
1288 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1289 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927 1290
1f5b3c3f 1291 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1292}
1293
1294static void iommu_flush_write_buffer(struct intel_iommu *iommu)
1295{
1296 u32 val;
1297 unsigned long flag;
1298
9af88143 1299 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 1300 return;
ba395927 1301
1f5b3c3f 1302 raw_spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 1303 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1304
1305 /* Make sure hardware complete it */
1306 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1307 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927 1308
1f5b3c3f 1309 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1310}
1311
1312/* return value determine if we need a write buffer flush */
4c25a2c1
DW
1313static void __iommu_flush_context(struct intel_iommu *iommu,
1314 u16 did, u16 source_id, u8 function_mask,
1315 u64 type)
ba395927
KA
1316{
1317 u64 val = 0;
1318 unsigned long flag;
1319
ba395927
KA
1320 switch (type) {
1321 case DMA_CCMD_GLOBAL_INVL:
1322 val = DMA_CCMD_GLOBAL_INVL;
1323 break;
1324 case DMA_CCMD_DOMAIN_INVL:
1325 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
1326 break;
1327 case DMA_CCMD_DEVICE_INVL:
1328 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
1329 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
1330 break;
1331 default:
1332 BUG();
1333 }
1334 val |= DMA_CCMD_ICC;
1335
1f5b3c3f 1336 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1337 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
1338
1339 /* Make sure hardware complete it */
1340 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
1341 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
1342
1f5b3c3f 1343 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1344}
1345
ba395927 1346/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
1347static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
1348 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
1349{
1350 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
1351 u64 val = 0, val_iva = 0;
1352 unsigned long flag;
1353
ba395927
KA
1354 switch (type) {
1355 case DMA_TLB_GLOBAL_FLUSH:
1356 /* global flush doesn't need set IVA_REG */
1357 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
1358 break;
1359 case DMA_TLB_DSI_FLUSH:
1360 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
1361 break;
1362 case DMA_TLB_PSI_FLUSH:
1363 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
ea8ea460 1364 /* IH bit is passed in as part of address */
ba395927
KA
1365 val_iva = size_order | addr;
1366 break;
1367 default:
1368 BUG();
1369 }
1370 /* Note: set drain read/write */
1371#if 0
1372 /*
1373 * This is probably to be super secure.. Looks like we can
1374 * ignore it without any impact.
1375 */
1376 if (cap_read_drain(iommu->cap))
1377 val |= DMA_TLB_READ_DRAIN;
1378#endif
1379 if (cap_write_drain(iommu->cap))
1380 val |= DMA_TLB_WRITE_DRAIN;
1381
1f5b3c3f 1382 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1383 /* Note: Only uses first TLB reg currently */
1384 if (val_iva)
1385 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1386 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1387
1388 /* Make sure hardware complete it */
1389 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1390 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1391
1f5b3c3f 1392 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1393
1394 /* check IOTLB invalidation granularity */
1395 if (DMA_TLB_IAIG(val) == 0)
9f10e5bf 1396 pr_err("Flush IOTLB failed\n");
ba395927 1397 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
9f10e5bf 1398 pr_debug("TLB flush request %Lx, actual %Lx\n",
5b6985ce
FY
1399 (unsigned long long)DMA_TLB_IIRG(type),
1400 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1401}
1402
64ae892b
DW
1403static struct device_domain_info *
1404iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu,
1405 u8 bus, u8 devfn)
93a23a72 1406{
2f119c78 1407 bool found = false;
93a23a72 1408 struct device_domain_info *info;
0bcb3e28 1409 struct pci_dev *pdev;
93a23a72 1410
55d94043
JR
1411 assert_spin_locked(&device_domain_lock);
1412
93a23a72
YZ
1413 if (!ecap_dev_iotlb_support(iommu->ecap))
1414 return NULL;
1415
1416 if (!iommu->qi)
1417 return NULL;
1418
93a23a72 1419 list_for_each_entry(info, &domain->devices, link)
c3b497c6
JL
1420 if (info->iommu == iommu && info->bus == bus &&
1421 info->devfn == devfn) {
2f119c78 1422 found = true;
93a23a72
YZ
1423 break;
1424 }
93a23a72 1425
0bcb3e28 1426 if (!found || !info->dev || !dev_is_pci(info->dev))
93a23a72
YZ
1427 return NULL;
1428
0bcb3e28
DW
1429 pdev = to_pci_dev(info->dev);
1430
1431 if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS))
93a23a72
YZ
1432 return NULL;
1433
0bcb3e28 1434 if (!dmar_find_matched_atsr_unit(pdev))
93a23a72
YZ
1435 return NULL;
1436
93a23a72
YZ
1437 return info;
1438}
1439
1440static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1441{
0bcb3e28 1442 if (!info || !dev_is_pci(info->dev))
93a23a72
YZ
1443 return;
1444
0bcb3e28 1445 pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT);
93a23a72
YZ
1446}
1447
1448static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1449{
0bcb3e28
DW
1450 if (!info->dev || !dev_is_pci(info->dev) ||
1451 !pci_ats_enabled(to_pci_dev(info->dev)))
93a23a72
YZ
1452 return;
1453
0bcb3e28 1454 pci_disable_ats(to_pci_dev(info->dev));
93a23a72
YZ
1455}
1456
1457static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1458 u64 addr, unsigned mask)
1459{
1460 u16 sid, qdep;
1461 unsigned long flags;
1462 struct device_domain_info *info;
1463
1464 spin_lock_irqsave(&device_domain_lock, flags);
1465 list_for_each_entry(info, &domain->devices, link) {
0bcb3e28
DW
1466 struct pci_dev *pdev;
1467 if (!info->dev || !dev_is_pci(info->dev))
1468 continue;
1469
1470 pdev = to_pci_dev(info->dev);
1471 if (!pci_ats_enabled(pdev))
93a23a72
YZ
1472 continue;
1473
1474 sid = info->bus << 8 | info->devfn;
0bcb3e28 1475 qdep = pci_ats_queue_depth(pdev);
93a23a72
YZ
1476 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1477 }
1478 spin_unlock_irqrestore(&device_domain_lock, flags);
1479}
1480
a1ddcbe9
JR
1481static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
1482 struct dmar_domain *domain,
1483 unsigned long pfn, unsigned int pages,
1484 int ih, int map)
ba395927 1485{
9dd2fe89 1486 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1487 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
a1ddcbe9 1488 u16 did = domain->iommu_did[iommu->seq_id];
ba395927 1489
ba395927
KA
1490 BUG_ON(pages == 0);
1491
ea8ea460
DW
1492 if (ih)
1493 ih = 1 << 6;
ba395927 1494 /*
9dd2fe89
YZ
1495 * Fallback to domain selective flush if no PSI support or the size is
1496 * too big.
ba395927
KA
1497 * PSI requires page size to be 2 ^ x, and the base address is naturally
1498 * aligned to the size
1499 */
9dd2fe89
YZ
1500 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1501 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1502 DMA_TLB_DSI_FLUSH);
9dd2fe89 1503 else
ea8ea460 1504 iommu->flush.flush_iotlb(iommu, did, addr | ih, mask,
9dd2fe89 1505 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1506
1507 /*
82653633
NA
1508 * In caching mode, changes of pages from non-present to present require
1509 * flush. However, device IOTLB doesn't need to be flushed in this case.
bf92df30 1510 */
82653633 1511 if (!cap_caching_mode(iommu->cap) || !map)
9452d5bf
JR
1512 iommu_flush_dev_iotlb(get_iommu_domain(iommu, did),
1513 addr, mask);
ba395927
KA
1514}
1515
f8bab735 1516static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1517{
1518 u32 pmen;
1519 unsigned long flags;
1520
1f5b3c3f 1521 raw_spin_lock_irqsave(&iommu->register_lock, flags);
f8bab735 1522 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1523 pmen &= ~DMA_PMEN_EPM;
1524 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1525
1526 /* wait for the protected region status bit to clear */
1527 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1528 readl, !(pmen & DMA_PMEN_PRS), pmen);
1529
1f5b3c3f 1530 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
f8bab735 1531}
1532
2a41ccee 1533static void iommu_enable_translation(struct intel_iommu *iommu)
ba395927
KA
1534{
1535 u32 sts;
1536 unsigned long flags;
1537
1f5b3c3f 1538 raw_spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1539 iommu->gcmd |= DMA_GCMD_TE;
1540 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1541
1542 /* Make sure hardware complete it */
1543 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1544 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1545
1f5b3c3f 1546 raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
ba395927
KA
1547}
1548
2a41ccee 1549static void iommu_disable_translation(struct intel_iommu *iommu)
ba395927
KA
1550{
1551 u32 sts;
1552 unsigned long flag;
1553
1f5b3c3f 1554 raw_spin_lock_irqsave(&iommu->register_lock, flag);
ba395927
KA
1555 iommu->gcmd &= ~DMA_GCMD_TE;
1556 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1557
1558 /* Make sure hardware complete it */
1559 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1560 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927 1561
1f5b3c3f 1562 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
1563}
1564
3460a6d9 1565
ba395927
KA
1566static int iommu_init_domains(struct intel_iommu *iommu)
1567{
8bf47816
JR
1568 u32 ndomains, nlongs;
1569 size_t size;
ba395927
KA
1570
1571 ndomains = cap_ndoms(iommu->cap);
8bf47816 1572 pr_debug("%s: Number of Domains supported <%d>\n",
9f10e5bf 1573 iommu->name, ndomains);
ba395927
KA
1574 nlongs = BITS_TO_LONGS(ndomains);
1575
94a91b50
DD
1576 spin_lock_init(&iommu->lock);
1577
ba395927
KA
1578 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1579 if (!iommu->domain_ids) {
9f10e5bf
JR
1580 pr_err("%s: Allocating domain id array failed\n",
1581 iommu->name);
ba395927
KA
1582 return -ENOMEM;
1583 }
8bf47816
JR
1584
1585 size = ((ndomains >> 8) + 1) * sizeof(struct dmar_domain **);
1586 iommu->domains = kzalloc(size, GFP_KERNEL);
1587
1588 if (iommu->domains) {
1589 size = 256 * sizeof(struct dmar_domain *);
1590 iommu->domains[0] = kzalloc(size, GFP_KERNEL);
1591 }
1592
1593 if (!iommu->domains || !iommu->domains[0]) {
9f10e5bf
JR
1594 pr_err("%s: Allocating domain array failed\n",
1595 iommu->name);
852bdb04 1596 kfree(iommu->domain_ids);
8bf47816 1597 kfree(iommu->domains);
852bdb04 1598 iommu->domain_ids = NULL;
8bf47816 1599 iommu->domains = NULL;
ba395927
KA
1600 return -ENOMEM;
1601 }
1602
8bf47816
JR
1603
1604
ba395927 1605 /*
c0e8a6c8
JR
1606 * If Caching mode is set, then invalid translations are tagged
1607 * with domain-id 0, hence we need to pre-allocate it. We also
1608 * use domain-id 0 as a marker for non-allocated domain-id, so
1609 * make sure it is not used for a real domain.
ba395927 1610 */
c0e8a6c8
JR
1611 set_bit(0, iommu->domain_ids);
1612
ba395927
KA
1613 return 0;
1614}
ba395927 1615
ffebeb46 1616static void disable_dmar_iommu(struct intel_iommu *iommu)
ba395927 1617{
29a27719 1618 struct device_domain_info *info, *tmp;
55d94043 1619 unsigned long flags;
ba395927 1620
29a27719
JR
1621 if (!iommu->domains || !iommu->domain_ids)
1622 return;
a4eaa86c 1623
55d94043 1624 spin_lock_irqsave(&device_domain_lock, flags);
29a27719
JR
1625 list_for_each_entry_safe(info, tmp, &device_domain_list, global) {
1626 struct dmar_domain *domain;
1627
1628 if (info->iommu != iommu)
1629 continue;
1630
1631 if (!info->dev || !info->domain)
1632 continue;
1633
1634 domain = info->domain;
1635
e6de0f8d 1636 dmar_remove_one_dev_info(domain, info->dev);
29a27719
JR
1637
1638 if (!domain_type_is_vm_or_si(domain))
1639 domain_exit(domain);
ba395927 1640 }
55d94043 1641 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
1642
1643 if (iommu->gcmd & DMA_GCMD_TE)
1644 iommu_disable_translation(iommu);
ffebeb46 1645}
ba395927 1646
ffebeb46
JL
1647static void free_dmar_iommu(struct intel_iommu *iommu)
1648{
1649 if ((iommu->domains) && (iommu->domain_ids)) {
8bf47816
JR
1650 int elems = (cap_ndoms(iommu->cap) >> 8) + 1;
1651 int i;
1652
1653 for (i = 0; i < elems; i++)
1654 kfree(iommu->domains[i]);
ffebeb46
JL
1655 kfree(iommu->domains);
1656 kfree(iommu->domain_ids);
1657 iommu->domains = NULL;
1658 iommu->domain_ids = NULL;
1659 }
ba395927 1660
d9630fe9
WH
1661 g_iommus[iommu->seq_id] = NULL;
1662
ba395927
KA
1663 /* free context mapping */
1664 free_context_table(iommu);
ba395927
KA
1665}
1666
ab8dfe25 1667static struct dmar_domain *alloc_domain(int flags)
ba395927 1668{
ba395927 1669 struct dmar_domain *domain;
ba395927
KA
1670
1671 domain = alloc_domain_mem();
1672 if (!domain)
1673 return NULL;
1674
ab8dfe25 1675 memset(domain, 0, sizeof(*domain));
4c923d47 1676 domain->nid = -1;
ab8dfe25 1677 domain->flags = flags;
92d03cc8 1678 INIT_LIST_HEAD(&domain->devices);
2c2e2c38
FY
1679
1680 return domain;
1681}
1682
d160aca5
JR
1683/* Must be called with iommu->lock */
1684static int domain_attach_iommu(struct dmar_domain *domain,
fb170fb4
JL
1685 struct intel_iommu *iommu)
1686{
d160aca5 1687 unsigned long ndomains;
55d94043 1688 int num;
c0e8a6c8 1689
55d94043 1690 assert_spin_locked(&device_domain_lock);
d160aca5 1691 assert_spin_locked(&iommu->lock);
fb170fb4 1692
29a27719
JR
1693 domain->iommu_refcnt[iommu->seq_id] += 1;
1694 domain->iommu_count += 1;
1695 if (domain->iommu_refcnt[iommu->seq_id] == 1) {
d160aca5
JR
1696 ndomains = cap_ndoms(iommu->cap);
1697 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1698
1699 if (num >= ndomains) {
1700 pr_err("%s: No free domain ids\n", iommu->name);
1701 domain->iommu_refcnt[iommu->seq_id] -= 1;
1702 domain->iommu_count -= 1;
55d94043 1703 return -ENOSPC;
d160aca5
JR
1704 }
1705
1706 set_bit(num, iommu->domain_ids);
1707 set_iommu_domain(iommu, num, domain);
1708
1709 domain->iommu_did[iommu->seq_id] = num;
1710 domain->nid = iommu->node;
1711
fb170fb4
JL
1712 domain_update_iommu_cap(domain);
1713 }
d160aca5 1714
55d94043 1715 return 0;
fb170fb4
JL
1716}
1717
1718static int domain_detach_iommu(struct dmar_domain *domain,
1719 struct intel_iommu *iommu)
1720{
d160aca5 1721 int num, count = INT_MAX;
d160aca5 1722
55d94043 1723 assert_spin_locked(&device_domain_lock);
d160aca5 1724 assert_spin_locked(&iommu->lock);
fb170fb4 1725
29a27719
JR
1726 domain->iommu_refcnt[iommu->seq_id] -= 1;
1727 count = --domain->iommu_count;
1728 if (domain->iommu_refcnt[iommu->seq_id] == 0) {
d160aca5
JR
1729 num = domain->iommu_did[iommu->seq_id];
1730 clear_bit(num, iommu->domain_ids);
1731 set_iommu_domain(iommu, num, NULL);
1732
fb170fb4 1733 domain_update_iommu_cap(domain);
c0e8a6c8 1734 domain->iommu_did[iommu->seq_id] = 0;
fb170fb4 1735 }
fb170fb4
JL
1736
1737 return count;
1738}
1739
ba395927 1740static struct iova_domain reserved_iova_list;
8a443df4 1741static struct lock_class_key reserved_rbtree_key;
ba395927 1742
51a63e67 1743static int dmar_init_reserved_ranges(void)
ba395927
KA
1744{
1745 struct pci_dev *pdev = NULL;
1746 struct iova *iova;
1747 int i;
ba395927 1748
0fb5fe87
RM
1749 init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN,
1750 DMA_32BIT_PFN);
ba395927 1751
8a443df4
MG
1752 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1753 &reserved_rbtree_key);
1754
ba395927
KA
1755 /* IOAPIC ranges shouldn't be accessed by DMA */
1756 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1757 IOVA_PFN(IOAPIC_RANGE_END));
51a63e67 1758 if (!iova) {
9f10e5bf 1759 pr_err("Reserve IOAPIC range failed\n");
51a63e67
JC
1760 return -ENODEV;
1761 }
ba395927
KA
1762
1763 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1764 for_each_pci_dev(pdev) {
1765 struct resource *r;
1766
1767 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1768 r = &pdev->resource[i];
1769 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1770 continue;
1a4a4551
DW
1771 iova = reserve_iova(&reserved_iova_list,
1772 IOVA_PFN(r->start),
1773 IOVA_PFN(r->end));
51a63e67 1774 if (!iova) {
9f10e5bf 1775 pr_err("Reserve iova failed\n");
51a63e67
JC
1776 return -ENODEV;
1777 }
ba395927
KA
1778 }
1779 }
51a63e67 1780 return 0;
ba395927
KA
1781}
1782
1783static void domain_reserve_special_ranges(struct dmar_domain *domain)
1784{
1785 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1786}
1787
1788static inline int guestwidth_to_adjustwidth(int gaw)
1789{
1790 int agaw;
1791 int r = (gaw - 12) % 9;
1792
1793 if (r == 0)
1794 agaw = gaw;
1795 else
1796 agaw = gaw + 9 - r;
1797 if (agaw > 64)
1798 agaw = 64;
1799 return agaw;
1800}
1801
dc534b25
JR
1802static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
1803 int guest_width)
ba395927 1804{
ba395927
KA
1805 int adjust_width, agaw;
1806 unsigned long sagaw;
1807
0fb5fe87
RM
1808 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
1809 DMA_32BIT_PFN);
ba395927
KA
1810 domain_reserve_special_ranges(domain);
1811
1812 /* calculate AGAW */
ba395927
KA
1813 if (guest_width > cap_mgaw(iommu->cap))
1814 guest_width = cap_mgaw(iommu->cap);
1815 domain->gaw = guest_width;
1816 adjust_width = guestwidth_to_adjustwidth(guest_width);
1817 agaw = width_to_agaw(adjust_width);
1818 sagaw = cap_sagaw(iommu->cap);
1819 if (!test_bit(agaw, &sagaw)) {
1820 /* hardware doesn't support it, choose a bigger one */
9f10e5bf 1821 pr_debug("Hardware doesn't support agaw %d\n", agaw);
ba395927
KA
1822 agaw = find_next_bit(&sagaw, 5, agaw);
1823 if (agaw >= 5)
1824 return -ENODEV;
1825 }
1826 domain->agaw = agaw;
ba395927 1827
8e604097
WH
1828 if (ecap_coherent(iommu->ecap))
1829 domain->iommu_coherency = 1;
1830 else
1831 domain->iommu_coherency = 0;
1832
58c610bd
SY
1833 if (ecap_sc_support(iommu->ecap))
1834 domain->iommu_snooping = 1;
1835 else
1836 domain->iommu_snooping = 0;
1837
214e39aa
DW
1838 if (intel_iommu_superpage)
1839 domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
1840 else
1841 domain->iommu_superpage = 0;
1842
4c923d47 1843 domain->nid = iommu->node;
c7151a8d 1844
ba395927 1845 /* always allocate the top pgd */
4c923d47 1846 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1847 if (!domain->pgd)
1848 return -ENOMEM;
5b6985ce 1849 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1850 return 0;
1851}
1852
1853static void domain_exit(struct dmar_domain *domain)
1854{
ea8ea460 1855 struct page *freelist = NULL;
ba395927
KA
1856
1857 /* Domain 0 is reserved, so dont process it */
1858 if (!domain)
1859 return;
1860
7b668357
AW
1861 /* Flush any lazy unmaps that may reference this domain */
1862 if (!intel_iommu_strict)
1863 flush_unmaps_timeout(0);
1864
d160aca5
JR
1865 /* Remove associated devices and clear attached or cached domains */
1866 rcu_read_lock();
ba395927 1867 domain_remove_dev_info(domain);
d160aca5 1868 rcu_read_unlock();
92d03cc8 1869
ba395927
KA
1870 /* destroy iovas */
1871 put_iova_domain(&domain->iovad);
ba395927 1872
ea8ea460 1873 freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1874
ea8ea460
DW
1875 dma_free_pagelist(freelist);
1876
ba395927
KA
1877 free_domain_mem(domain);
1878}
1879
64ae892b
DW
1880static int domain_context_mapping_one(struct dmar_domain *domain,
1881 struct intel_iommu *iommu,
28ccce0d 1882 u8 bus, u8 devfn)
ba395927 1883{
c6c2cebd 1884 u16 did = domain->iommu_did[iommu->seq_id];
28ccce0d
JR
1885 int translation = CONTEXT_TT_MULTI_LEVEL;
1886 struct device_domain_info *info = NULL;
ba395927 1887 struct context_entry *context;
ba395927 1888 unsigned long flags;
ea6606b0 1889 struct dma_pte *pgd;
55d94043 1890 int ret, agaw;
28ccce0d 1891
c6c2cebd
JR
1892 WARN_ON(did == 0);
1893
28ccce0d
JR
1894 if (hw_pass_through && domain_type_is_si(domain))
1895 translation = CONTEXT_TT_PASS_THROUGH;
ba395927
KA
1896
1897 pr_debug("Set context mapping for %02x:%02x.%d\n",
1898 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1899
ba395927 1900 BUG_ON(!domain->pgd);
5331fe6f 1901
55d94043
JR
1902 spin_lock_irqsave(&device_domain_lock, flags);
1903 spin_lock(&iommu->lock);
1904
1905 ret = -ENOMEM;
03ecc32c 1906 context = iommu_context_addr(iommu, bus, devfn, 1);
ba395927 1907 if (!context)
55d94043
JR
1908 goto out_unlock;
1909
1910 ret = 0;
1911 if (context_present(context))
1912 goto out_unlock;
ba395927 1913
ea6606b0
WH
1914 pgd = domain->pgd;
1915
de24e553 1916 context_clear_entry(context);
c6c2cebd 1917 context_set_domain_id(context, did);
4ed0d3e6 1918
de24e553
JR
1919 /*
1920 * Skip top levels of page tables for iommu which has less agaw
1921 * than default. Unnecessary for PT mode.
1922 */
93a23a72 1923 if (translation != CONTEXT_TT_PASS_THROUGH) {
de24e553 1924 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
55d94043 1925 ret = -ENOMEM;
de24e553 1926 pgd = phys_to_virt(dma_pte_addr(pgd));
55d94043
JR
1927 if (!dma_pte_present(pgd))
1928 goto out_unlock;
de24e553
JR
1929 }
1930
64ae892b 1931 info = iommu_support_dev_iotlb(domain, iommu, bus, devfn);
93a23a72
YZ
1932 translation = info ? CONTEXT_TT_DEV_IOTLB :
1933 CONTEXT_TT_MULTI_LEVEL;
de24e553 1934
93a23a72
YZ
1935 context_set_address_root(context, virt_to_phys(pgd));
1936 context_set_address_width(context, iommu->agaw);
de24e553
JR
1937 } else {
1938 /*
1939 * In pass through mode, AW must be programmed to
1940 * indicate the largest AGAW value supported by
1941 * hardware. And ASR is ignored by hardware.
1942 */
1943 context_set_address_width(context, iommu->msagaw);
93a23a72 1944 }
4ed0d3e6
FY
1945
1946 context_set_translation_type(context, translation);
c07e7d21
MM
1947 context_set_fault_enable(context);
1948 context_set_present(context);
5331fe6f 1949 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1950
4c25a2c1
DW
1951 /*
1952 * It's a non-present to present mapping. If hardware doesn't cache
1953 * non-present entry we only need to flush the write-buffer. If the
1954 * _does_ cache non-present entries, then it does so in the special
1955 * domain #0, which we have to flush:
1956 */
1957 if (cap_caching_mode(iommu->cap)) {
1958 iommu->flush.flush_context(iommu, 0,
1959 (((u16)bus) << 8) | devfn,
1960 DMA_CCMD_MASK_NOBIT,
1961 DMA_CCMD_DEVICE_INVL);
c6c2cebd 1962 iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1963 } else {
ba395927 1964 iommu_flush_write_buffer(iommu);
4c25a2c1 1965 }
93a23a72 1966 iommu_enable_dev_iotlb(info);
55d94043
JR
1967
1968 ret = 0;
1969
1970out_unlock:
1971 spin_unlock(&iommu->lock);
1972 spin_unlock_irqrestore(&device_domain_lock, flags);
c7151a8d 1973
ba395927
KA
1974 return 0;
1975}
1976
579305f7
AW
1977struct domain_context_mapping_data {
1978 struct dmar_domain *domain;
1979 struct intel_iommu *iommu;
579305f7
AW
1980};
1981
1982static int domain_context_mapping_cb(struct pci_dev *pdev,
1983 u16 alias, void *opaque)
1984{
1985 struct domain_context_mapping_data *data = opaque;
1986
1987 return domain_context_mapping_one(data->domain, data->iommu,
28ccce0d 1988 PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
1989}
1990
ba395927 1991static int
28ccce0d 1992domain_context_mapping(struct dmar_domain *domain, struct device *dev)
ba395927 1993{
64ae892b 1994 struct intel_iommu *iommu;
156baca8 1995 u8 bus, devfn;
579305f7 1996 struct domain_context_mapping_data data;
64ae892b 1997
e1f167f3 1998 iommu = device_to_iommu(dev, &bus, &devfn);
64ae892b
DW
1999 if (!iommu)
2000 return -ENODEV;
ba395927 2001
579305f7 2002 if (!dev_is_pci(dev))
28ccce0d 2003 return domain_context_mapping_one(domain, iommu, bus, devfn);
579305f7
AW
2004
2005 data.domain = domain;
2006 data.iommu = iommu;
579305f7
AW
2007
2008 return pci_for_each_dma_alias(to_pci_dev(dev),
2009 &domain_context_mapping_cb, &data);
2010}
2011
2012static int domain_context_mapped_cb(struct pci_dev *pdev,
2013 u16 alias, void *opaque)
2014{
2015 struct intel_iommu *iommu = opaque;
2016
2017 return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
ba395927
KA
2018}
2019
e1f167f3 2020static int domain_context_mapped(struct device *dev)
ba395927 2021{
5331fe6f 2022 struct intel_iommu *iommu;
156baca8 2023 u8 bus, devfn;
5331fe6f 2024
e1f167f3 2025 iommu = device_to_iommu(dev, &bus, &devfn);
5331fe6f
WH
2026 if (!iommu)
2027 return -ENODEV;
ba395927 2028
579305f7
AW
2029 if (!dev_is_pci(dev))
2030 return device_context_mapped(iommu, bus, devfn);
e1f167f3 2031
579305f7
AW
2032 return !pci_for_each_dma_alias(to_pci_dev(dev),
2033 domain_context_mapped_cb, iommu);
ba395927
KA
2034}
2035
f532959b
FY
2036/* Returns a number of VTD pages, but aligned to MM page size */
2037static inline unsigned long aligned_nrpages(unsigned long host_addr,
2038 size_t size)
2039{
2040 host_addr &= ~PAGE_MASK;
2041 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
2042}
2043
6dd9a7c7
YS
2044/* Return largest possible superpage level for a given mapping */
2045static inline int hardware_largepage_caps(struct dmar_domain *domain,
2046 unsigned long iov_pfn,
2047 unsigned long phy_pfn,
2048 unsigned long pages)
2049{
2050 int support, level = 1;
2051 unsigned long pfnmerge;
2052
2053 support = domain->iommu_superpage;
2054
2055 /* To use a large page, the virtual *and* physical addresses
2056 must be aligned to 2MiB/1GiB/etc. Lower bits set in either
2057 of them will mean we have to use smaller pages. So just
2058 merge them and check both at once. */
2059 pfnmerge = iov_pfn | phy_pfn;
2060
2061 while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) {
2062 pages >>= VTD_STRIDE_SHIFT;
2063 if (!pages)
2064 break;
2065 pfnmerge >>= VTD_STRIDE_SHIFT;
2066 level++;
2067 support--;
2068 }
2069 return level;
2070}
2071
9051aa02
DW
2072static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2073 struct scatterlist *sg, unsigned long phys_pfn,
2074 unsigned long nr_pages, int prot)
e1605495
DW
2075{
2076 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 2077 phys_addr_t uninitialized_var(pteval);
cc4f14aa 2078 unsigned long sg_res = 0;
6dd9a7c7
YS
2079 unsigned int largepage_lvl = 0;
2080 unsigned long lvl_pages = 0;
e1605495 2081
162d1b10 2082 BUG_ON(!domain_pfn_supported(domain, iov_pfn + nr_pages - 1));
e1605495
DW
2083
2084 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
2085 return -EINVAL;
2086
2087 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
2088
cc4f14aa
JL
2089 if (!sg) {
2090 sg_res = nr_pages;
9051aa02
DW
2091 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
2092 }
2093
6dd9a7c7 2094 while (nr_pages > 0) {
c85994e4
DW
2095 uint64_t tmp;
2096
e1605495 2097 if (!sg_res) {
f532959b 2098 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
2099 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
2100 sg->dma_length = sg->length;
2101 pteval = page_to_phys(sg_page(sg)) | prot;
6dd9a7c7 2102 phys_pfn = pteval >> VTD_PAGE_SHIFT;
e1605495 2103 }
6dd9a7c7 2104
e1605495 2105 if (!pte) {
6dd9a7c7
YS
2106 largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res);
2107
5cf0a76f 2108 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl);
e1605495
DW
2109 if (!pte)
2110 return -ENOMEM;
6dd9a7c7 2111 /* It is large page*/
6491d4d0 2112 if (largepage_lvl > 1) {
6dd9a7c7 2113 pteval |= DMA_PTE_LARGE_PAGE;
d41a4adb
JL
2114 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2115 /*
2116 * Ensure that old small page tables are
2117 * removed to make room for superpage,
2118 * if they exist.
2119 */
6491d4d0 2120 dma_pte_free_pagetable(domain, iov_pfn,
d41a4adb 2121 iov_pfn + lvl_pages - 1);
6491d4d0 2122 } else {
6dd9a7c7 2123 pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE;
6491d4d0 2124 }
6dd9a7c7 2125
e1605495
DW
2126 }
2127 /* We don't need lock here, nobody else
2128 * touches the iova range
2129 */
7766a3fb 2130 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 2131 if (tmp) {
1bf20f0d 2132 static int dumps = 5;
9f10e5bf
JR
2133 pr_crit("ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
2134 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
2135 if (dumps) {
2136 dumps--;
2137 debug_dma_dump_mappings(NULL);
2138 }
2139 WARN_ON(1);
2140 }
6dd9a7c7
YS
2141
2142 lvl_pages = lvl_to_nr_pages(largepage_lvl);
2143
2144 BUG_ON(nr_pages < lvl_pages);
2145 BUG_ON(sg_res < lvl_pages);
2146
2147 nr_pages -= lvl_pages;
2148 iov_pfn += lvl_pages;
2149 phys_pfn += lvl_pages;
2150 pteval += lvl_pages * VTD_PAGE_SIZE;
2151 sg_res -= lvl_pages;
2152
2153 /* If the next PTE would be the first in a new page, then we
2154 need to flush the cache on the entries we've just written.
2155 And then we'll need to recalculate 'pte', so clear it and
2156 let it get set again in the if (!pte) block above.
2157
2158 If we're done (!nr_pages) we need to flush the cache too.
2159
2160 Also if we've been setting superpages, we may need to
2161 recalculate 'pte' and switch back to smaller pages for the
2162 end of the mapping, if the trailing size is not enough to
2163 use another superpage (i.e. sg_res < lvl_pages). */
e1605495 2164 pte++;
6dd9a7c7
YS
2165 if (!nr_pages || first_pte_in_page(pte) ||
2166 (largepage_lvl > 1 && sg_res < lvl_pages)) {
e1605495
DW
2167 domain_flush_cache(domain, first_pte,
2168 (void *)pte - (void *)first_pte);
2169 pte = NULL;
2170 }
6dd9a7c7
YS
2171
2172 if (!sg_res && nr_pages)
e1605495
DW
2173 sg = sg_next(sg);
2174 }
2175 return 0;
2176}
2177
9051aa02
DW
2178static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2179 struct scatterlist *sg, unsigned long nr_pages,
2180 int prot)
ba395927 2181{
9051aa02
DW
2182 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
2183}
6f6a00e4 2184
9051aa02
DW
2185static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
2186 unsigned long phys_pfn, unsigned long nr_pages,
2187 int prot)
2188{
2189 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
2190}
2191
2452d9db 2192static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 2193{
c7151a8d
WH
2194 if (!iommu)
2195 return;
8c11e798
WH
2196
2197 clear_context_table(iommu, bus, devfn);
2198 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 2199 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2200 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
2201}
2202
109b9b04
DW
2203static inline void unlink_domain_info(struct device_domain_info *info)
2204{
2205 assert_spin_locked(&device_domain_lock);
2206 list_del(&info->link);
2207 list_del(&info->global);
2208 if (info->dev)
0bcb3e28 2209 info->dev->archdata.iommu = NULL;
109b9b04
DW
2210}
2211
ba395927
KA
2212static void domain_remove_dev_info(struct dmar_domain *domain)
2213{
3a74ca01 2214 struct device_domain_info *info, *tmp;
55d94043 2215 unsigned long flags;
ba395927 2216
55d94043 2217 spin_lock_irqsave(&device_domain_lock, flags);
76f45fe3 2218 list_for_each_entry_safe(info, tmp, &domain->devices, link)
55d94043
JR
2219 __dmar_remove_one_dev_info(domain, info->dev);
2220 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927
KA
2221}
2222
2223/*
2224 * find_domain
1525a29a 2225 * Note: we use struct device->archdata.iommu stores the info
ba395927 2226 */
1525a29a 2227static struct dmar_domain *find_domain(struct device *dev)
ba395927
KA
2228{
2229 struct device_domain_info *info;
2230
2231 /* No lock here, assumes no domain exit in normal case */
1525a29a 2232 info = dev->archdata.iommu;
ba395927
KA
2233 if (info)
2234 return info->domain;
2235 return NULL;
2236}
2237
5a8f40e8 2238static inline struct device_domain_info *
745f2586
JL
2239dmar_search_domain_by_dev_info(int segment, int bus, int devfn)
2240{
2241 struct device_domain_info *info;
2242
2243 list_for_each_entry(info, &device_domain_list, global)
41e80dca 2244 if (info->iommu->segment == segment && info->bus == bus &&
745f2586 2245 info->devfn == devfn)
5a8f40e8 2246 return info;
745f2586
JL
2247
2248 return NULL;
2249}
2250
5db31569
JR
2251static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu,
2252 int bus, int devfn,
2253 struct device *dev,
2254 struct dmar_domain *domain)
745f2586 2255{
5a8f40e8 2256 struct dmar_domain *found = NULL;
745f2586
JL
2257 struct device_domain_info *info;
2258 unsigned long flags;
d160aca5 2259 int ret;
745f2586
JL
2260
2261 info = alloc_devinfo_mem();
2262 if (!info)
b718cd3d 2263 return NULL;
745f2586 2264
745f2586
JL
2265 info->bus = bus;
2266 info->devfn = devfn;
2267 info->dev = dev;
2268 info->domain = domain;
5a8f40e8 2269 info->iommu = iommu;
745f2586
JL
2270
2271 spin_lock_irqsave(&device_domain_lock, flags);
2272 if (dev)
0bcb3e28 2273 found = find_domain(dev);
5a8f40e8
DW
2274 else {
2275 struct device_domain_info *info2;
41e80dca 2276 info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn);
5a8f40e8
DW
2277 if (info2)
2278 found = info2->domain;
2279 }
745f2586
JL
2280 if (found) {
2281 spin_unlock_irqrestore(&device_domain_lock, flags);
2282 free_devinfo_mem(info);
b718cd3d
DW
2283 /* Caller must free the original domain */
2284 return found;
745f2586
JL
2285 }
2286
d160aca5
JR
2287 spin_lock(&iommu->lock);
2288 ret = domain_attach_iommu(domain, iommu);
2289 spin_unlock(&iommu->lock);
2290
2291 if (ret) {
c6c2cebd
JR
2292 spin_unlock_irqrestore(&device_domain_lock, flags);
2293 return NULL;
2294 }
c6c2cebd 2295
b718cd3d
DW
2296 list_add(&info->link, &domain->devices);
2297 list_add(&info->global, &device_domain_list);
2298 if (dev)
2299 dev->archdata.iommu = info;
2300 spin_unlock_irqrestore(&device_domain_lock, flags);
2301
cc4e2575
JR
2302 if (dev && domain_context_mapping(domain, dev)) {
2303 pr_err("Domain context map for %s failed\n", dev_name(dev));
e6de0f8d 2304 dmar_remove_one_dev_info(domain, dev);
cc4e2575
JR
2305 return NULL;
2306 }
2307
b718cd3d 2308 return domain;
745f2586
JL
2309}
2310
579305f7
AW
2311static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque)
2312{
2313 *(u16 *)opaque = alias;
2314 return 0;
2315}
2316
ba395927 2317/* domain is initialized */
146922ec 2318static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw)
ba395927 2319{
cc4e2575 2320 struct device_domain_info *info = NULL;
579305f7
AW
2321 struct dmar_domain *domain, *tmp;
2322 struct intel_iommu *iommu;
ba395927 2323 unsigned long flags;
cc4e2575 2324 u16 dma_alias;
aa4d066a 2325 u8 bus, devfn;
ba395927 2326
146922ec 2327 domain = find_domain(dev);
ba395927
KA
2328 if (domain)
2329 return domain;
2330
579305f7
AW
2331 iommu = device_to_iommu(dev, &bus, &devfn);
2332 if (!iommu)
2333 return NULL;
2334
146922ec
DW
2335 if (dev_is_pci(dev)) {
2336 struct pci_dev *pdev = to_pci_dev(dev);
276dbf99 2337
579305f7
AW
2338 pci_for_each_dma_alias(pdev, get_last_alias, &dma_alias);
2339
2340 spin_lock_irqsave(&device_domain_lock, flags);
2341 info = dmar_search_domain_by_dev_info(pci_domain_nr(pdev->bus),
2342 PCI_BUS_NUM(dma_alias),
2343 dma_alias & 0xff);
2344 if (info) {
2345 iommu = info->iommu;
2346 domain = info->domain;
5a8f40e8 2347 }
579305f7 2348 spin_unlock_irqrestore(&device_domain_lock, flags);
ba395927 2349
579305f7
AW
2350 /* DMA alias already has a domain, uses it */
2351 if (info)
2352 goto found_domain;
2353 }
ba395927 2354
146922ec 2355 /* Allocate and initialize new domain for the device */
ab8dfe25 2356 domain = alloc_domain(0);
745f2586 2357 if (!domain)
579305f7 2358 return NULL;
dc534b25 2359 if (domain_init(domain, iommu, gaw)) {
579305f7
AW
2360 domain_exit(domain);
2361 return NULL;
2c2e2c38 2362 }
ba395927 2363
579305f7
AW
2364 /* register PCI DMA alias device */
2365 if (dev_is_pci(dev)) {
5db31569
JR
2366 tmp = dmar_insert_one_dev_info(iommu, PCI_BUS_NUM(dma_alias),
2367 dma_alias & 0xff, NULL, domain);
579305f7
AW
2368
2369 if (!tmp || tmp != domain) {
2370 domain_exit(domain);
2371 domain = tmp;
2372 }
2373
b718cd3d 2374 if (!domain)
579305f7 2375 return NULL;
ba395927
KA
2376 }
2377
2378found_domain:
5db31569 2379 tmp = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
579305f7
AW
2380
2381 if (!tmp || tmp != domain) {
2382 domain_exit(domain);
2383 domain = tmp;
2384 }
b718cd3d
DW
2385
2386 return domain;
ba395927
KA
2387}
2388
2c2e2c38 2389static int iommu_identity_mapping;
e0fc7e0b
DW
2390#define IDENTMAP_ALL 1
2391#define IDENTMAP_GFX 2
2392#define IDENTMAP_AZALIA 4
2c2e2c38 2393
b213203e
DW
2394static int iommu_domain_identity_map(struct dmar_domain *domain,
2395 unsigned long long start,
2396 unsigned long long end)
ba395927 2397{
c5395d5c
DW
2398 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
2399 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
2400
2401 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
2402 dma_to_mm_pfn(last_vpfn))) {
9f10e5bf 2403 pr_err("Reserving iova failed\n");
b213203e 2404 return -ENOMEM;
ba395927
KA
2405 }
2406
af1089ce 2407 pr_debug("Mapping reserved region %llx-%llx\n", start, end);
ba395927
KA
2408 /*
2409 * RMRR range might have overlap with physical memory range,
2410 * clear it first
2411 */
c5395d5c 2412 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 2413
c5395d5c
DW
2414 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
2415 last_vpfn - first_vpfn + 1,
61df7443 2416 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
2417}
2418
0b9d9753 2419static int iommu_prepare_identity_map(struct device *dev,
b213203e
DW
2420 unsigned long long start,
2421 unsigned long long end)
2422{
2423 struct dmar_domain *domain;
2424 int ret;
2425
0b9d9753 2426 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
2427 if (!domain)
2428 return -ENOMEM;
2429
19943b0e
DW
2430 /* For _hardware_ passthrough, don't bother. But for software
2431 passthrough, we do it anyway -- it may indicate a memory
2432 range which is reserved in E820, so which didn't get set
2433 up to start with in si_domain */
2434 if (domain == si_domain && hw_pass_through) {
9f10e5bf
JR
2435 pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
2436 dev_name(dev), start, end);
19943b0e
DW
2437 return 0;
2438 }
2439
9f10e5bf
JR
2440 pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2441 dev_name(dev), start, end);
2442
5595b528
DW
2443 if (end < start) {
2444 WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n"
2445 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2446 dmi_get_system_info(DMI_BIOS_VENDOR),
2447 dmi_get_system_info(DMI_BIOS_VERSION),
2448 dmi_get_system_info(DMI_PRODUCT_VERSION));
2449 ret = -EIO;
2450 goto error;
2451 }
2452
2ff729f5
DW
2453 if (end >> agaw_to_width(domain->agaw)) {
2454 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2455 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2456 agaw_to_width(domain->agaw),
2457 dmi_get_system_info(DMI_BIOS_VENDOR),
2458 dmi_get_system_info(DMI_BIOS_VERSION),
2459 dmi_get_system_info(DMI_PRODUCT_VERSION));
2460 ret = -EIO;
2461 goto error;
2462 }
19943b0e 2463
b213203e 2464 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2465 if (ret)
2466 goto error;
2467
b213203e
DW
2468 return 0;
2469
2470 error:
ba395927
KA
2471 domain_exit(domain);
2472 return ret;
ba395927
KA
2473}
2474
2475static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
0b9d9753 2476 struct device *dev)
ba395927 2477{
0b9d9753 2478 if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927 2479 return 0;
0b9d9753
DW
2480 return iommu_prepare_identity_map(dev, rmrr->base_address,
2481 rmrr->end_address);
ba395927
KA
2482}
2483
d3f13810 2484#ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA
49a0429e
KA
2485static inline void iommu_prepare_isa(void)
2486{
2487 struct pci_dev *pdev;
2488 int ret;
2489
2490 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2491 if (!pdev)
2492 return;
2493
9f10e5bf 2494 pr_info("Prepare 0-16MiB unity mapping for LPC\n");
0b9d9753 2495 ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1);
49a0429e
KA
2496
2497 if (ret)
9f10e5bf 2498 pr_err("Failed to create 0-16MiB identity map - floppy might not work\n");
49a0429e 2499
9b27e82d 2500 pci_dev_put(pdev);
49a0429e
KA
2501}
2502#else
2503static inline void iommu_prepare_isa(void)
2504{
2505 return;
2506}
d3f13810 2507#endif /* !CONFIG_INTEL_IOMMU_FLPY_WA */
49a0429e 2508
2c2e2c38 2509static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2 2510
071e1374 2511static int __init si_domain_init(int hw)
2c2e2c38 2512{
c7ab48d2 2513 int nid, ret = 0;
2c2e2c38 2514
ab8dfe25 2515 si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY);
2c2e2c38
FY
2516 if (!si_domain)
2517 return -EFAULT;
2518
2c2e2c38
FY
2519 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2520 domain_exit(si_domain);
2521 return -EFAULT;
2522 }
2523
0dc79715 2524 pr_debug("Identity mapping domain allocated\n");
2c2e2c38 2525
19943b0e
DW
2526 if (hw)
2527 return 0;
2528
c7ab48d2 2529 for_each_online_node(nid) {
5dfe8660
TH
2530 unsigned long start_pfn, end_pfn;
2531 int i;
2532
2533 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
2534 ret = iommu_domain_identity_map(si_domain,
2535 PFN_PHYS(start_pfn), PFN_PHYS(end_pfn));
2536 if (ret)
2537 return ret;
2538 }
c7ab48d2
DW
2539 }
2540
2c2e2c38
FY
2541 return 0;
2542}
2543
9b226624 2544static int identity_mapping(struct device *dev)
2c2e2c38
FY
2545{
2546 struct device_domain_info *info;
2547
2548 if (likely(!iommu_identity_mapping))
2549 return 0;
2550
9b226624 2551 info = dev->archdata.iommu;
cb452a40
MT
2552 if (info && info != DUMMY_DEVICE_DOMAIN_INFO)
2553 return (info->domain == si_domain);
2c2e2c38 2554
2c2e2c38
FY
2555 return 0;
2556}
2557
28ccce0d 2558static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
2c2e2c38 2559{
0ac72664 2560 struct dmar_domain *ndomain;
5a8f40e8 2561 struct intel_iommu *iommu;
156baca8 2562 u8 bus, devfn;
2c2e2c38 2563
5913c9bf 2564 iommu = device_to_iommu(dev, &bus, &devfn);
5a8f40e8
DW
2565 if (!iommu)
2566 return -ENODEV;
2567
5db31569 2568 ndomain = dmar_insert_one_dev_info(iommu, bus, devfn, dev, domain);
0ac72664
DW
2569 if (ndomain != domain)
2570 return -EBUSY;
2c2e2c38
FY
2571
2572 return 0;
2573}
2574
0b9d9753 2575static bool device_has_rmrr(struct device *dev)
ea2447f7
TM
2576{
2577 struct dmar_rmrr_unit *rmrr;
832bd858 2578 struct device *tmp;
ea2447f7
TM
2579 int i;
2580
0e242612 2581 rcu_read_lock();
ea2447f7 2582 for_each_rmrr_units(rmrr) {
b683b230
JL
2583 /*
2584 * Return TRUE if this RMRR contains the device that
2585 * is passed in.
2586 */
2587 for_each_active_dev_scope(rmrr->devices,
2588 rmrr->devices_cnt, i, tmp)
0b9d9753 2589 if (tmp == dev) {
0e242612 2590 rcu_read_unlock();
ea2447f7 2591 return true;
b683b230 2592 }
ea2447f7 2593 }
0e242612 2594 rcu_read_unlock();
ea2447f7
TM
2595 return false;
2596}
2597
c875d2c1
AW
2598/*
2599 * There are a couple cases where we need to restrict the functionality of
2600 * devices associated with RMRRs. The first is when evaluating a device for
2601 * identity mapping because problems exist when devices are moved in and out
2602 * of domains and their respective RMRR information is lost. This means that
2603 * a device with associated RMRRs will never be in a "passthrough" domain.
2604 * The second is use of the device through the IOMMU API. This interface
2605 * expects to have full control of the IOVA space for the device. We cannot
2606 * satisfy both the requirement that RMRR access is maintained and have an
2607 * unencumbered IOVA space. We also have no ability to quiesce the device's
2608 * use of the RMRR space or even inform the IOMMU API user of the restriction.
2609 * We therefore prevent devices associated with an RMRR from participating in
2610 * the IOMMU API, which eliminates them from device assignment.
2611 *
2612 * In both cases we assume that PCI USB devices with RMRRs have them largely
2613 * for historical reasons and that the RMRR space is not actively used post
2614 * boot. This exclusion may change if vendors begin to abuse it.
18436afd
DW
2615 *
2616 * The same exception is made for graphics devices, with the requirement that
2617 * any use of the RMRR regions will be torn down before assigning the device
2618 * to a guest.
c875d2c1
AW
2619 */
2620static bool device_is_rmrr_locked(struct device *dev)
2621{
2622 if (!device_has_rmrr(dev))
2623 return false;
2624
2625 if (dev_is_pci(dev)) {
2626 struct pci_dev *pdev = to_pci_dev(dev);
2627
18436afd 2628 if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
c875d2c1
AW
2629 return false;
2630 }
2631
2632 return true;
2633}
2634
3bdb2591 2635static int iommu_should_identity_map(struct device *dev, int startup)
6941af28 2636{
ea2447f7 2637
3bdb2591
DW
2638 if (dev_is_pci(dev)) {
2639 struct pci_dev *pdev = to_pci_dev(dev);
ea2447f7 2640
c875d2c1 2641 if (device_is_rmrr_locked(dev))
3bdb2591 2642 return 0;
e0fc7e0b 2643
3bdb2591
DW
2644 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2645 return 1;
e0fc7e0b 2646
3bdb2591
DW
2647 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2648 return 1;
6941af28 2649
3bdb2591 2650 if (!(iommu_identity_mapping & IDENTMAP_ALL))
3dfc813d 2651 return 0;
3bdb2591
DW
2652
2653 /*
2654 * We want to start off with all devices in the 1:1 domain, and
2655 * take them out later if we find they can't access all of memory.
2656 *
2657 * However, we can't do this for PCI devices behind bridges,
2658 * because all PCI devices behind the same bridge will end up
2659 * with the same source-id on their transactions.
2660 *
2661 * Practically speaking, we can't change things around for these
2662 * devices at run-time, because we can't be sure there'll be no
2663 * DMA transactions in flight for any of their siblings.
2664 *
2665 * So PCI devices (unless they're on the root bus) as well as
2666 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2667 * the 1:1 domain, just in _case_ one of their siblings turns out
2668 * not to be able to map all of memory.
2669 */
2670 if (!pci_is_pcie(pdev)) {
2671 if (!pci_is_root_bus(pdev->bus))
2672 return 0;
2673 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2674 return 0;
2675 } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE)
3dfc813d 2676 return 0;
3bdb2591
DW
2677 } else {
2678 if (device_has_rmrr(dev))
2679 return 0;
2680 }
3dfc813d 2681
3bdb2591 2682 /*
3dfc813d 2683 * At boot time, we don't yet know if devices will be 64-bit capable.
3bdb2591 2684 * Assume that they will — if they turn out not to be, then we can
3dfc813d
DW
2685 * take them out of the 1:1 domain later.
2686 */
8fcc5372
CW
2687 if (!startup) {
2688 /*
2689 * If the device's dma_mask is less than the system's memory
2690 * size then this is not a candidate for identity mapping.
2691 */
3bdb2591 2692 u64 dma_mask = *dev->dma_mask;
8fcc5372 2693
3bdb2591
DW
2694 if (dev->coherent_dma_mask &&
2695 dev->coherent_dma_mask < dma_mask)
2696 dma_mask = dev->coherent_dma_mask;
8fcc5372 2697
3bdb2591 2698 return dma_mask >= dma_get_required_mask(dev);
8fcc5372 2699 }
6941af28
DW
2700
2701 return 1;
2702}
2703
cf04eee8
DW
2704static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw)
2705{
2706 int ret;
2707
2708 if (!iommu_should_identity_map(dev, 1))
2709 return 0;
2710
28ccce0d 2711 ret = domain_add_dev_info(si_domain, dev);
cf04eee8 2712 if (!ret)
9f10e5bf
JR
2713 pr_info("%s identity mapping for device %s\n",
2714 hw ? "Hardware" : "Software", dev_name(dev));
cf04eee8
DW
2715 else if (ret == -ENODEV)
2716 /* device not associated with an iommu */
2717 ret = 0;
2718
2719 return ret;
2720}
2721
2722
071e1374 2723static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2724{
2c2e2c38 2725 struct pci_dev *pdev = NULL;
cf04eee8
DW
2726 struct dmar_drhd_unit *drhd;
2727 struct intel_iommu *iommu;
2728 struct device *dev;
2729 int i;
2730 int ret = 0;
2c2e2c38 2731
2c2e2c38 2732 for_each_pci_dev(pdev) {
cf04eee8
DW
2733 ret = dev_prepare_static_identity_mapping(&pdev->dev, hw);
2734 if (ret)
2735 return ret;
2736 }
2737
2738 for_each_active_iommu(iommu, drhd)
2739 for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) {
2740 struct acpi_device_physical_node *pn;
2741 struct acpi_device *adev;
2742
2743 if (dev->bus != &acpi_bus_type)
2744 continue;
86080ccc 2745
cf04eee8
DW
2746 adev= to_acpi_device(dev);
2747 mutex_lock(&adev->physical_node_lock);
2748 list_for_each_entry(pn, &adev->physical_node_list, node) {
2749 ret = dev_prepare_static_identity_mapping(pn->dev, hw);
2750 if (ret)
2751 break;
eae460b6 2752 }
cf04eee8
DW
2753 mutex_unlock(&adev->physical_node_lock);
2754 if (ret)
2755 return ret;
62edf5dc 2756 }
2c2e2c38
FY
2757
2758 return 0;
2759}
2760
ffebeb46
JL
2761static void intel_iommu_init_qi(struct intel_iommu *iommu)
2762{
2763 /*
2764 * Start from the sane iommu hardware state.
2765 * If the queued invalidation is already initialized by us
2766 * (for example, while enabling interrupt-remapping) then
2767 * we got the things already rolling from a sane state.
2768 */
2769 if (!iommu->qi) {
2770 /*
2771 * Clear any previous faults.
2772 */
2773 dmar_fault(-1, iommu);
2774 /*
2775 * Disable queued invalidation if supported and already enabled
2776 * before OS handover.
2777 */
2778 dmar_disable_qi(iommu);
2779 }
2780
2781 if (dmar_enable_qi(iommu)) {
2782 /*
2783 * Queued Invalidate not enabled, use Register Based Invalidate
2784 */
2785 iommu->flush.flush_context = __iommu_flush_context;
2786 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
9f10e5bf 2787 pr_info("%s: Using Register based invalidation\n",
ffebeb46
JL
2788 iommu->name);
2789 } else {
2790 iommu->flush.flush_context = qi_flush_context;
2791 iommu->flush.flush_iotlb = qi_flush_iotlb;
9f10e5bf 2792 pr_info("%s: Using Queued invalidation\n", iommu->name);
ffebeb46
JL
2793 }
2794}
2795
091d42e4
JR
2796static int copy_context_table(struct intel_iommu *iommu,
2797 struct root_entry *old_re,
2798 struct context_entry **tbl,
2799 int bus, bool ext)
2800{
2801 struct context_entry *old_ce = NULL, *new_ce = NULL, ce;
dbcd861f 2802 int tbl_idx, pos = 0, idx, devfn, ret = 0, did;
091d42e4
JR
2803 phys_addr_t old_ce_phys;
2804
2805 tbl_idx = ext ? bus * 2 : bus;
2806
2807 for (devfn = 0; devfn < 256; devfn++) {
2808 /* First calculate the correct index */
2809 idx = (ext ? devfn * 2 : devfn) % 256;
2810
2811 if (idx == 0) {
2812 /* First save what we may have and clean up */
2813 if (new_ce) {
2814 tbl[tbl_idx] = new_ce;
2815 __iommu_flush_cache(iommu, new_ce,
2816 VTD_PAGE_SIZE);
2817 pos = 1;
2818 }
2819
2820 if (old_ce)
2821 iounmap(old_ce);
2822
2823 ret = 0;
2824 if (devfn < 0x80)
2825 old_ce_phys = root_entry_lctp(old_re);
2826 else
2827 old_ce_phys = root_entry_uctp(old_re);
2828
2829 if (!old_ce_phys) {
2830 if (ext && devfn == 0) {
2831 /* No LCTP, try UCTP */
2832 devfn = 0x7f;
2833 continue;
2834 } else {
2835 goto out;
2836 }
2837 }
2838
2839 ret = -ENOMEM;
2840 old_ce = ioremap_cache(old_ce_phys, PAGE_SIZE);
2841 if (!old_ce)
2842 goto out;
2843
2844 new_ce = alloc_pgtable_page(iommu->node);
2845 if (!new_ce)
2846 goto out_unmap;
2847
2848 ret = 0;
2849 }
2850
2851 /* Now copy the context entry */
2852 ce = old_ce[idx];
2853
cf484d0e 2854 if (!__context_present(&ce))
091d42e4
JR
2855 continue;
2856
dbcd861f
JR
2857 did = context_domain_id(&ce);
2858 if (did >= 0 && did < cap_ndoms(iommu->cap))
2859 set_bit(did, iommu->domain_ids);
2860
cf484d0e
JR
2861 /*
2862 * We need a marker for copied context entries. This
2863 * marker needs to work for the old format as well as
2864 * for extended context entries.
2865 *
2866 * Bit 67 of the context entry is used. In the old
2867 * format this bit is available to software, in the
2868 * extended format it is the PGE bit, but PGE is ignored
2869 * by HW if PASIDs are disabled (and thus still
2870 * available).
2871 *
2872 * So disable PASIDs first and then mark the entry
2873 * copied. This means that we don't copy PASID
2874 * translations from the old kernel, but this is fine as
2875 * faults there are not fatal.
2876 */
2877 context_clear_pasid_enable(&ce);
2878 context_set_copied(&ce);
2879
091d42e4
JR
2880 new_ce[idx] = ce;
2881 }
2882
2883 tbl[tbl_idx + pos] = new_ce;
2884
2885 __iommu_flush_cache(iommu, new_ce, VTD_PAGE_SIZE);
2886
2887out_unmap:
2888 iounmap(old_ce);
2889
2890out:
2891 return ret;
2892}
2893
2894static int copy_translation_tables(struct intel_iommu *iommu)
2895{
2896 struct context_entry **ctxt_tbls;
2897 struct root_entry *old_rt;
2898 phys_addr_t old_rt_phys;
2899 int ctxt_table_entries;
2900 unsigned long flags;
2901 u64 rtaddr_reg;
2902 int bus, ret;
c3361f2f 2903 bool new_ext, ext;
091d42e4
JR
2904
2905 rtaddr_reg = dmar_readq(iommu->reg + DMAR_RTADDR_REG);
2906 ext = !!(rtaddr_reg & DMA_RTADDR_RTT);
c3361f2f
JR
2907 new_ext = !!ecap_ecs(iommu->ecap);
2908
2909 /*
2910 * The RTT bit can only be changed when translation is disabled,
2911 * but disabling translation means to open a window for data
2912 * corruption. So bail out and don't copy anything if we would
2913 * have to change the bit.
2914 */
2915 if (new_ext != ext)
2916 return -EINVAL;
091d42e4
JR
2917
2918 old_rt_phys = rtaddr_reg & VTD_PAGE_MASK;
2919 if (!old_rt_phys)
2920 return -EINVAL;
2921
2922 old_rt = ioremap_cache(old_rt_phys, PAGE_SIZE);
2923 if (!old_rt)
2924 return -ENOMEM;
2925
2926 /* This is too big for the stack - allocate it from slab */
2927 ctxt_table_entries = ext ? 512 : 256;
2928 ret = -ENOMEM;
2929 ctxt_tbls = kzalloc(ctxt_table_entries * sizeof(void *), GFP_KERNEL);
2930 if (!ctxt_tbls)
2931 goto out_unmap;
2932
2933 for (bus = 0; bus < 256; bus++) {
2934 ret = copy_context_table(iommu, &old_rt[bus],
2935 ctxt_tbls, bus, ext);
2936 if (ret) {
2937 pr_err("%s: Failed to copy context table for bus %d\n",
2938 iommu->name, bus);
2939 continue;
2940 }
2941 }
2942
2943 spin_lock_irqsave(&iommu->lock, flags);
2944
2945 /* Context tables are copied, now write them to the root_entry table */
2946 for (bus = 0; bus < 256; bus++) {
2947 int idx = ext ? bus * 2 : bus;
2948 u64 val;
2949
2950 if (ctxt_tbls[idx]) {
2951 val = virt_to_phys(ctxt_tbls[idx]) | 1;
2952 iommu->root_entry[bus].lo = val;
2953 }
2954
2955 if (!ext || !ctxt_tbls[idx + 1])
2956 continue;
2957
2958 val = virt_to_phys(ctxt_tbls[idx + 1]) | 1;
2959 iommu->root_entry[bus].hi = val;
2960 }
2961
2962 spin_unlock_irqrestore(&iommu->lock, flags);
2963
2964 kfree(ctxt_tbls);
2965
2966 __iommu_flush_cache(iommu, iommu->root_entry, PAGE_SIZE);
2967
2968 ret = 0;
2969
2970out_unmap:
2971 iounmap(old_rt);
2972
2973 return ret;
2974}
2975
b779260b 2976static int __init init_dmars(void)
ba395927
KA
2977{
2978 struct dmar_drhd_unit *drhd;
2979 struct dmar_rmrr_unit *rmrr;
a87f4918 2980 bool copied_tables = false;
832bd858 2981 struct device *dev;
ba395927 2982 struct intel_iommu *iommu;
9d783ba0 2983 int i, ret;
2c2e2c38 2984
ba395927
KA
2985 /*
2986 * for each drhd
2987 * allocate root
2988 * initialize and program root entry to not present
2989 * endfor
2990 */
2991 for_each_drhd_unit(drhd) {
5e0d2a6f 2992 /*
2993 * lock not needed as this is only incremented in the single
2994 * threaded kernel __init code path all other access are read
2995 * only
2996 */
78d8e704 2997 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
1b198bb0
MT
2998 g_num_of_iommus++;
2999 continue;
3000 }
9f10e5bf 3001 pr_err_once("Exceeded %d IOMMUs\n", DMAR_UNITS_SUPPORTED);
5e0d2a6f 3002 }
3003
ffebeb46
JL
3004 /* Preallocate enough resources for IOMMU hot-addition */
3005 if (g_num_of_iommus < DMAR_UNITS_SUPPORTED)
3006 g_num_of_iommus = DMAR_UNITS_SUPPORTED;
3007
d9630fe9
WH
3008 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
3009 GFP_KERNEL);
3010 if (!g_iommus) {
9f10e5bf 3011 pr_err("Allocating global iommu array failed\n");
d9630fe9
WH
3012 ret = -ENOMEM;
3013 goto error;
3014 }
3015
80b20dd8 3016 deferred_flush = kzalloc(g_num_of_iommus *
3017 sizeof(struct deferred_flush_tables), GFP_KERNEL);
3018 if (!deferred_flush) {
5e0d2a6f 3019 ret = -ENOMEM;
989d51fc 3020 goto free_g_iommus;
5e0d2a6f 3021 }
3022
7c919779 3023 for_each_active_iommu(iommu, drhd) {
d9630fe9 3024 g_iommus[iommu->seq_id] = iommu;
ba395927 3025
b63d80d1
JR
3026 intel_iommu_init_qi(iommu);
3027
e61d98d8
SS
3028 ret = iommu_init_domains(iommu);
3029 if (ret)
989d51fc 3030 goto free_iommu;
e61d98d8 3031
4158c2ec
JR
3032 init_translation_status(iommu);
3033
091d42e4
JR
3034 if (translation_pre_enabled(iommu) && !is_kdump_kernel()) {
3035 iommu_disable_translation(iommu);
3036 clear_translation_pre_enabled(iommu);
3037 pr_warn("Translation was enabled for %s but we are not in kdump mode\n",
3038 iommu->name);
3039 }
4158c2ec 3040
ba395927
KA
3041 /*
3042 * TBD:
3043 * we could share the same root & context tables
25985edc 3044 * among all IOMMU's. Need to Split it later.
ba395927
KA
3045 */
3046 ret = iommu_alloc_root_entry(iommu);
ffebeb46 3047 if (ret)
989d51fc 3048 goto free_iommu;
5f0a7f76 3049
091d42e4
JR
3050 if (translation_pre_enabled(iommu)) {
3051 pr_info("Translation already enabled - trying to copy translation structures\n");
3052
3053 ret = copy_translation_tables(iommu);
3054 if (ret) {
3055 /*
3056 * We found the IOMMU with translation
3057 * enabled - but failed to copy over the
3058 * old root-entry table. Try to proceed
3059 * by disabling translation now and
3060 * allocating a clean root-entry table.
3061 * This might cause DMAR faults, but
3062 * probably the dump will still succeed.
3063 */
3064 pr_err("Failed to copy translation tables from previous kernel for %s\n",
3065 iommu->name);
3066 iommu_disable_translation(iommu);
3067 clear_translation_pre_enabled(iommu);
3068 } else {
3069 pr_info("Copied translation tables from previous kernel for %s\n",
3070 iommu->name);
a87f4918 3071 copied_tables = true;
091d42e4
JR
3072 }
3073 }
3074
5f0a7f76
JR
3075 iommu_flush_write_buffer(iommu);
3076 iommu_set_root_entry(iommu);
3077 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
3078 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3079
4ed0d3e6 3080 if (!ecap_pass_through(iommu->ecap))
19943b0e 3081 hw_pass_through = 0;
ba395927
KA
3082 }
3083
19943b0e 3084 if (iommu_pass_through)
e0fc7e0b
DW
3085 iommu_identity_mapping |= IDENTMAP_ALL;
3086
d3f13810 3087#ifdef CONFIG_INTEL_IOMMU_BROKEN_GFX_WA
e0fc7e0b 3088 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 3089#endif
e0fc7e0b 3090
86080ccc
JR
3091 if (iommu_identity_mapping) {
3092 ret = si_domain_init(hw_pass_through);
3093 if (ret)
3094 goto free_iommu;
3095 }
3096
e0fc7e0b
DW
3097 check_tylersburg_isoch();
3098
a87f4918
JR
3099 /*
3100 * If we copied translations from a previous kernel in the kdump
3101 * case, we can not assign the devices to domains now, as that
3102 * would eliminate the old mappings. So skip this part and defer
3103 * the assignment to device driver initialization time.
3104 */
3105 if (copied_tables)
3106 goto domains_done;
3107
ba395927 3108 /*
19943b0e
DW
3109 * If pass through is not set or not enabled, setup context entries for
3110 * identity mappings for rmrr, gfx, and isa and may fall back to static
3111 * identity mapping if iommu_identity_mapping is set.
ba395927 3112 */
19943b0e
DW
3113 if (iommu_identity_mapping) {
3114 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 3115 if (ret) {
9f10e5bf 3116 pr_crit("Failed to setup IOMMU pass-through\n");
989d51fc 3117 goto free_iommu;
ba395927
KA
3118 }
3119 }
ba395927 3120 /*
19943b0e
DW
3121 * For each rmrr
3122 * for each dev attached to rmrr
3123 * do
3124 * locate drhd for dev, alloc domain for dev
3125 * allocate free domain
3126 * allocate page table entries for rmrr
3127 * if context not allocated for bus
3128 * allocate and init context
3129 * set present in root table for this bus
3130 * init context with domain, translation etc
3131 * endfor
3132 * endfor
ba395927 3133 */
9f10e5bf 3134 pr_info("Setting RMRR:\n");
19943b0e 3135 for_each_rmrr_units(rmrr) {
b683b230
JL
3136 /* some BIOS lists non-exist devices in DMAR table. */
3137 for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt,
832bd858 3138 i, dev) {
0b9d9753 3139 ret = iommu_prepare_rmrr_dev(rmrr, dev);
19943b0e 3140 if (ret)
9f10e5bf 3141 pr_err("Mapping reserved region failed\n");
ba395927 3142 }
4ed0d3e6 3143 }
49a0429e 3144
19943b0e
DW
3145 iommu_prepare_isa();
3146
a87f4918
JR
3147domains_done:
3148
ba395927
KA
3149 /*
3150 * for each drhd
3151 * enable fault log
3152 * global invalidate context cache
3153 * global invalidate iotlb
3154 * enable translation
3155 */
7c919779 3156 for_each_iommu(iommu, drhd) {
51a63e67
JC
3157 if (drhd->ignored) {
3158 /*
3159 * we always have to disable PMRs or DMA may fail on
3160 * this device
3161 */
3162 if (force_on)
7c919779 3163 iommu_disable_protect_mem_regions(iommu);
ba395927 3164 continue;
51a63e67 3165 }
ba395927
KA
3166
3167 iommu_flush_write_buffer(iommu);
3168
3460a6d9
KA
3169 ret = dmar_set_interrupt(iommu);
3170 if (ret)
989d51fc 3171 goto free_iommu;
3460a6d9 3172
8939ddf6
JR
3173 if (!translation_pre_enabled(iommu))
3174 iommu_enable_translation(iommu);
3175
b94996c9 3176 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
3177 }
3178
3179 return 0;
989d51fc
JL
3180
3181free_iommu:
ffebeb46
JL
3182 for_each_active_iommu(iommu, drhd) {
3183 disable_dmar_iommu(iommu);
a868e6b7 3184 free_dmar_iommu(iommu);
ffebeb46 3185 }
9bdc531e 3186 kfree(deferred_flush);
989d51fc 3187free_g_iommus:
d9630fe9 3188 kfree(g_iommus);
989d51fc 3189error:
ba395927
KA
3190 return ret;
3191}
3192
5a5e02a6 3193/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
3194static struct iova *intel_alloc_iova(struct device *dev,
3195 struct dmar_domain *domain,
3196 unsigned long nrpages, uint64_t dma_mask)
ba395927 3197{
ba395927 3198 struct iova *iova = NULL;
ba395927 3199
875764de
DW
3200 /* Restrict dma_mask to the width that the iommu can handle */
3201 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
3202
3203 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
3204 /*
3205 * First try to allocate an io virtual address in
284901a9 3206 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 3207 * from higher range
ba395927 3208 */
875764de
DW
3209 iova = alloc_iova(&domain->iovad, nrpages,
3210 IOVA_PFN(DMA_BIT_MASK(32)), 1);
3211 if (iova)
3212 return iova;
3213 }
3214 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
3215 if (unlikely(!iova)) {
9f10e5bf 3216 pr_err("Allocating %ld-page iova for %s failed",
207e3592 3217 nrpages, dev_name(dev));
f76aec76
KA
3218 return NULL;
3219 }
3220
3221 return iova;
3222}
3223
d4b709f4 3224static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev)
f76aec76
KA
3225{
3226 struct dmar_domain *domain;
f76aec76 3227
d4b709f4 3228 domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
f76aec76 3229 if (!domain) {
9f10e5bf 3230 pr_err("Allocating domain for %s failed\n",
d4b709f4 3231 dev_name(dev));
4fe05bbc 3232 return NULL;
ba395927
KA
3233 }
3234
f76aec76
KA
3235 return domain;
3236}
3237
d4b709f4 3238static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev)
147202aa
DW
3239{
3240 struct device_domain_info *info;
3241
3242 /* No lock here, assumes no domain exit in normal case */
d4b709f4 3243 info = dev->archdata.iommu;
147202aa
DW
3244 if (likely(info))
3245 return info->domain;
3246
3247 return __get_valid_domain_for_dev(dev);
3248}
3249
ecb509ec 3250/* Check if the dev needs to go through non-identity map and unmap process.*/
73676832 3251static int iommu_no_mapping(struct device *dev)
2c2e2c38
FY
3252{
3253 int found;
3254
3d89194a 3255 if (iommu_dummy(dev))
1e4c64c4
DW
3256 return 1;
3257
2c2e2c38 3258 if (!iommu_identity_mapping)
1e4c64c4 3259 return 0;
2c2e2c38 3260
9b226624 3261 found = identity_mapping(dev);
2c2e2c38 3262 if (found) {
ecb509ec 3263 if (iommu_should_identity_map(dev, 0))
2c2e2c38
FY
3264 return 1;
3265 else {
3266 /*
3267 * 32 bit DMA is removed from si_domain and fall back
3268 * to non-identity mapping.
3269 */
e6de0f8d 3270 dmar_remove_one_dev_info(si_domain, dev);
9f10e5bf
JR
3271 pr_info("32bit %s uses non-identity mapping\n",
3272 dev_name(dev));
2c2e2c38
FY
3273 return 0;
3274 }
3275 } else {
3276 /*
3277 * In case of a detached 64 bit DMA device from vm, the device
3278 * is put into si_domain for identity mapping.
3279 */
ecb509ec 3280 if (iommu_should_identity_map(dev, 0)) {
2c2e2c38 3281 int ret;
28ccce0d 3282 ret = domain_add_dev_info(si_domain, dev);
2c2e2c38 3283 if (!ret) {
9f10e5bf
JR
3284 pr_info("64bit %s uses identity mapping\n",
3285 dev_name(dev));
2c2e2c38
FY
3286 return 1;
3287 }
3288 }
3289 }
3290
1e4c64c4 3291 return 0;
2c2e2c38
FY
3292}
3293
5040a918 3294static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr,
bb9e6d65 3295 size_t size, int dir, u64 dma_mask)
f76aec76 3296{
f76aec76 3297 struct dmar_domain *domain;
5b6985ce 3298 phys_addr_t start_paddr;
f76aec76
KA
3299 struct iova *iova;
3300 int prot = 0;
6865f0d1 3301 int ret;
8c11e798 3302 struct intel_iommu *iommu;
33041ec0 3303 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
3304
3305 BUG_ON(dir == DMA_NONE);
2c2e2c38 3306
5040a918 3307 if (iommu_no_mapping(dev))
6865f0d1 3308 return paddr;
f76aec76 3309
5040a918 3310 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3311 if (!domain)
3312 return 0;
3313
8c11e798 3314 iommu = domain_get_iommu(domain);
88cb6a74 3315 size = aligned_nrpages(paddr, size);
f76aec76 3316
5040a918 3317 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask);
f76aec76
KA
3318 if (!iova)
3319 goto error;
3320
ba395927
KA
3321 /*
3322 * Check if DMAR supports zero-length reads on write only
3323 * mappings..
3324 */
3325 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3326 !cap_zlr(iommu->cap))
ba395927
KA
3327 prot |= DMA_PTE_READ;
3328 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3329 prot |= DMA_PTE_WRITE;
3330 /*
6865f0d1 3331 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 3332 * page. Note: if two part of one page are separately mapped, we
6865f0d1 3333 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
3334 * is not a big problem
3335 */
0ab36de2 3336 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 3337 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
3338 if (ret)
3339 goto error;
3340
1f0ef2aa
DW
3341 /* it's a non-present to present mapping. Only flush if caching mode */
3342 if (cap_caching_mode(iommu->cap))
a1ddcbe9
JR
3343 iommu_flush_iotlb_psi(iommu, domain,
3344 mm_to_dma_pfn(iova->pfn_lo),
3345 size, 0, 1);
1f0ef2aa 3346 else
8c11e798 3347 iommu_flush_write_buffer(iommu);
f76aec76 3348
03d6a246
DW
3349 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
3350 start_paddr += paddr & ~PAGE_MASK;
3351 return start_paddr;
ba395927 3352
ba395927 3353error:
f76aec76
KA
3354 if (iova)
3355 __free_iova(&domain->iovad, iova);
9f10e5bf 3356 pr_err("Device %s request: %zx@%llx dir %d --- failed\n",
5040a918 3357 dev_name(dev), size, (unsigned long long)paddr, dir);
ba395927
KA
3358 return 0;
3359}
3360
ffbbef5c
FT
3361static dma_addr_t intel_map_page(struct device *dev, struct page *page,
3362 unsigned long offset, size_t size,
3363 enum dma_data_direction dir,
3364 struct dma_attrs *attrs)
bb9e6d65 3365{
ffbbef5c 3366 return __intel_map_single(dev, page_to_phys(page) + offset, size,
46333e37 3367 dir, *dev->dma_mask);
bb9e6d65
FT
3368}
3369
5e0d2a6f 3370static void flush_unmaps(void)
3371{
80b20dd8 3372 int i, j;
5e0d2a6f 3373
5e0d2a6f 3374 timer_on = 0;
3375
3376 /* just flush them all */
3377 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
3378 struct intel_iommu *iommu = g_iommus[i];
3379 if (!iommu)
3380 continue;
c42d9f32 3381
9dd2fe89
YZ
3382 if (!deferred_flush[i].next)
3383 continue;
3384
78d5f0f5
NA
3385 /* In caching mode, global flushes turn emulation expensive */
3386 if (!cap_caching_mode(iommu->cap))
3387 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 3388 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 3389 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
3390 unsigned long mask;
3391 struct iova *iova = deferred_flush[i].iova[j];
78d5f0f5
NA
3392 struct dmar_domain *domain = deferred_flush[i].domain[j];
3393
3394 /* On real hardware multiple invalidations are expensive */
3395 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3396 iommu_flush_iotlb_psi(iommu, domain,
a156ef99 3397 iova->pfn_lo, iova_size(iova),
ea8ea460 3398 !deferred_flush[i].freelist[j], 0);
78d5f0f5 3399 else {
a156ef99 3400 mask = ilog2(mm_to_dma_pfn(iova_size(iova)));
78d5f0f5
NA
3401 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
3402 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
3403 }
93a23a72 3404 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
ea8ea460
DW
3405 if (deferred_flush[i].freelist[j])
3406 dma_free_pagelist(deferred_flush[i].freelist[j]);
80b20dd8 3407 }
9dd2fe89 3408 deferred_flush[i].next = 0;
5e0d2a6f 3409 }
3410
5e0d2a6f 3411 list_size = 0;
5e0d2a6f 3412}
3413
3414static void flush_unmaps_timeout(unsigned long data)
3415{
80b20dd8 3416 unsigned long flags;
3417
3418 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 3419 flush_unmaps();
80b20dd8 3420 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 3421}
3422
ea8ea460 3423static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist)
5e0d2a6f 3424{
3425 unsigned long flags;
80b20dd8 3426 int next, iommu_id;
8c11e798 3427 struct intel_iommu *iommu;
5e0d2a6f 3428
3429 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 3430 if (list_size == HIGH_WATER_MARK)
3431 flush_unmaps();
3432
8c11e798
WH
3433 iommu = domain_get_iommu(dom);
3434 iommu_id = iommu->seq_id;
c42d9f32 3435
80b20dd8 3436 next = deferred_flush[iommu_id].next;
3437 deferred_flush[iommu_id].domain[next] = dom;
3438 deferred_flush[iommu_id].iova[next] = iova;
ea8ea460 3439 deferred_flush[iommu_id].freelist[next] = freelist;
80b20dd8 3440 deferred_flush[iommu_id].next++;
5e0d2a6f 3441
3442 if (!timer_on) {
3443 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
3444 timer_on = 1;
3445 }
3446 list_size++;
3447 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
3448}
3449
d41a4adb 3450static void intel_unmap(struct device *dev, dma_addr_t dev_addr)
ba395927 3451{
f76aec76 3452 struct dmar_domain *domain;
d794dc9b 3453 unsigned long start_pfn, last_pfn;
ba395927 3454 struct iova *iova;
8c11e798 3455 struct intel_iommu *iommu;
ea8ea460 3456 struct page *freelist;
ba395927 3457
73676832 3458 if (iommu_no_mapping(dev))
f76aec76 3459 return;
2c2e2c38 3460
1525a29a 3461 domain = find_domain(dev);
ba395927
KA
3462 BUG_ON(!domain);
3463
8c11e798
WH
3464 iommu = domain_get_iommu(domain);
3465
ba395927 3466 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
3467 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
3468 (unsigned long long)dev_addr))
ba395927 3469 return;
ba395927 3470
d794dc9b
DW
3471 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
3472 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 3473
d794dc9b 3474 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
207e3592 3475 dev_name(dev), start_pfn, last_pfn);
ba395927 3476
ea8ea460 3477 freelist = domain_unmap(domain, start_pfn, last_pfn);
d794dc9b 3478
5e0d2a6f 3479 if (intel_iommu_strict) {
a1ddcbe9 3480 iommu_flush_iotlb_psi(iommu, domain, start_pfn,
ea8ea460 3481 last_pfn - start_pfn + 1, !freelist, 0);
5e0d2a6f 3482 /* free iova */
3483 __free_iova(&domain->iovad, iova);
ea8ea460 3484 dma_free_pagelist(freelist);
5e0d2a6f 3485 } else {
ea8ea460 3486 add_unmap(domain, iova, freelist);
5e0d2a6f 3487 /*
3488 * queue up the release of the unmap to save the 1/6th of the
3489 * cpu used up by the iotlb flush operation...
3490 */
5e0d2a6f 3491 }
ba395927
KA
3492}
3493
d41a4adb
JL
3494static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
3495 size_t size, enum dma_data_direction dir,
3496 struct dma_attrs *attrs)
3497{
3498 intel_unmap(dev, dev_addr);
3499}
3500
5040a918 3501static void *intel_alloc_coherent(struct device *dev, size_t size,
baa676fc
AP
3502 dma_addr_t *dma_handle, gfp_t flags,
3503 struct dma_attrs *attrs)
ba395927 3504{
36746436 3505 struct page *page = NULL;
ba395927
KA
3506 int order;
3507
5b6985ce 3508 size = PAGE_ALIGN(size);
ba395927 3509 order = get_order(size);
e8bb910d 3510
5040a918 3511 if (!iommu_no_mapping(dev))
e8bb910d 3512 flags &= ~(GFP_DMA | GFP_DMA32);
5040a918
DW
3513 else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) {
3514 if (dev->coherent_dma_mask < DMA_BIT_MASK(32))
e8bb910d
AW
3515 flags |= GFP_DMA;
3516 else
3517 flags |= GFP_DMA32;
3518 }
ba395927 3519
36746436
AM
3520 if (flags & __GFP_WAIT) {
3521 unsigned int count = size >> PAGE_SHIFT;
3522
3523 page = dma_alloc_from_contiguous(dev, count, order);
3524 if (page && iommu_no_mapping(dev) &&
3525 page_to_phys(page) + size > dev->coherent_dma_mask) {
3526 dma_release_from_contiguous(dev, page, count);
3527 page = NULL;
3528 }
3529 }
3530
3531 if (!page)
3532 page = alloc_pages(flags, order);
3533 if (!page)
ba395927 3534 return NULL;
36746436 3535 memset(page_address(page), 0, size);
ba395927 3536
36746436 3537 *dma_handle = __intel_map_single(dev, page_to_phys(page), size,
bb9e6d65 3538 DMA_BIDIRECTIONAL,
5040a918 3539 dev->coherent_dma_mask);
ba395927 3540 if (*dma_handle)
36746436
AM
3541 return page_address(page);
3542 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3543 __free_pages(page, order);
3544
ba395927
KA
3545 return NULL;
3546}
3547
5040a918 3548static void intel_free_coherent(struct device *dev, size_t size, void *vaddr,
baa676fc 3549 dma_addr_t dma_handle, struct dma_attrs *attrs)
ba395927
KA
3550{
3551 int order;
36746436 3552 struct page *page = virt_to_page(vaddr);
ba395927 3553
5b6985ce 3554 size = PAGE_ALIGN(size);
ba395927
KA
3555 order = get_order(size);
3556
d41a4adb 3557 intel_unmap(dev, dma_handle);
36746436
AM
3558 if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
3559 __free_pages(page, order);
ba395927
KA
3560}
3561
5040a918 3562static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist,
d7ab5c46
FT
3563 int nelems, enum dma_data_direction dir,
3564 struct dma_attrs *attrs)
ba395927 3565{
d41a4adb 3566 intel_unmap(dev, sglist[0].dma_address);
ba395927
KA
3567}
3568
ba395927 3569static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 3570 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
3571{
3572 int i;
c03ab37c 3573 struct scatterlist *sg;
ba395927 3574
c03ab37c 3575 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 3576 BUG_ON(!sg_page(sg));
4cf2e75d 3577 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 3578 sg->dma_length = sg->length;
ba395927
KA
3579 }
3580 return nelems;
3581}
3582
5040a918 3583static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems,
d7ab5c46 3584 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 3585{
ba395927 3586 int i;
ba395927 3587 struct dmar_domain *domain;
f76aec76
KA
3588 size_t size = 0;
3589 int prot = 0;
f76aec76
KA
3590 struct iova *iova = NULL;
3591 int ret;
c03ab37c 3592 struct scatterlist *sg;
b536d24d 3593 unsigned long start_vpfn;
8c11e798 3594 struct intel_iommu *iommu;
ba395927
KA
3595
3596 BUG_ON(dir == DMA_NONE);
5040a918
DW
3597 if (iommu_no_mapping(dev))
3598 return intel_nontranslate_map_sg(dev, sglist, nelems, dir);
ba395927 3599
5040a918 3600 domain = get_valid_domain_for_dev(dev);
f76aec76
KA
3601 if (!domain)
3602 return 0;
3603
8c11e798
WH
3604 iommu = domain_get_iommu(domain);
3605
b536d24d 3606 for_each_sg(sglist, sg, nelems, i)
88cb6a74 3607 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 3608
5040a918
DW
3609 iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size),
3610 *dev->dma_mask);
f76aec76 3611 if (!iova) {
c03ab37c 3612 sglist->dma_length = 0;
f76aec76
KA
3613 return 0;
3614 }
3615
3616 /*
3617 * Check if DMAR supports zero-length reads on write only
3618 * mappings..
3619 */
3620 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 3621 !cap_zlr(iommu->cap))
f76aec76
KA
3622 prot |= DMA_PTE_READ;
3623 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
3624 prot |= DMA_PTE_WRITE;
3625
b536d24d 3626 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 3627
f532959b 3628 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495 3629 if (unlikely(ret)) {
e1605495
DW
3630 dma_pte_free_pagetable(domain, start_vpfn,
3631 start_vpfn + size - 1);
e1605495
DW
3632 __free_iova(&domain->iovad, iova);
3633 return 0;
ba395927
KA
3634 }
3635
1f0ef2aa
DW
3636 /* it's a non-present to present mapping. Only flush if caching mode */
3637 if (cap_caching_mode(iommu->cap))
a1ddcbe9 3638 iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1);
1f0ef2aa 3639 else
8c11e798 3640 iommu_flush_write_buffer(iommu);
1f0ef2aa 3641
ba395927
KA
3642 return nelems;
3643}
3644
dfb805e8
FT
3645static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
3646{
3647 return !dma_addr;
3648}
3649
160c1d8e 3650struct dma_map_ops intel_dma_ops = {
baa676fc
AP
3651 .alloc = intel_alloc_coherent,
3652 .free = intel_free_coherent,
ba395927
KA
3653 .map_sg = intel_map_sg,
3654 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
3655 .map_page = intel_map_page,
3656 .unmap_page = intel_unmap_page,
dfb805e8 3657 .mapping_error = intel_mapping_error,
ba395927
KA
3658};
3659
3660static inline int iommu_domain_cache_init(void)
3661{
3662 int ret = 0;
3663
3664 iommu_domain_cache = kmem_cache_create("iommu_domain",
3665 sizeof(struct dmar_domain),
3666 0,
3667 SLAB_HWCACHE_ALIGN,
3668
3669 NULL);
3670 if (!iommu_domain_cache) {
9f10e5bf 3671 pr_err("Couldn't create iommu_domain cache\n");
ba395927
KA
3672 ret = -ENOMEM;
3673 }
3674
3675 return ret;
3676}
3677
3678static inline int iommu_devinfo_cache_init(void)
3679{
3680 int ret = 0;
3681
3682 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
3683 sizeof(struct device_domain_info),
3684 0,
3685 SLAB_HWCACHE_ALIGN,
ba395927
KA
3686 NULL);
3687 if (!iommu_devinfo_cache) {
9f10e5bf 3688 pr_err("Couldn't create devinfo cache\n");
ba395927
KA
3689 ret = -ENOMEM;
3690 }
3691
3692 return ret;
3693}
3694
ba395927
KA
3695static int __init iommu_init_mempool(void)
3696{
3697 int ret;
3698 ret = iommu_iova_cache_init();
3699 if (ret)
3700 return ret;
3701
3702 ret = iommu_domain_cache_init();
3703 if (ret)
3704 goto domain_error;
3705
3706 ret = iommu_devinfo_cache_init();
3707 if (!ret)
3708 return ret;
3709
3710 kmem_cache_destroy(iommu_domain_cache);
3711domain_error:
85b45456 3712 iommu_iova_cache_destroy();
ba395927
KA
3713
3714 return -ENOMEM;
3715}
3716
3717static void __init iommu_exit_mempool(void)
3718{
3719 kmem_cache_destroy(iommu_devinfo_cache);
3720 kmem_cache_destroy(iommu_domain_cache);
85b45456 3721 iommu_iova_cache_destroy();
ba395927
KA
3722}
3723
556ab45f
DW
3724static void quirk_ioat_snb_local_iommu(struct pci_dev *pdev)
3725{
3726 struct dmar_drhd_unit *drhd;
3727 u32 vtbar;
3728 int rc;
3729
3730 /* We know that this device on this chipset has its own IOMMU.
3731 * If we find it under a different IOMMU, then the BIOS is lying
3732 * to us. Hope that the IOMMU for this device is actually
3733 * disabled, and it needs no translation...
3734 */
3735 rc = pci_bus_read_config_dword(pdev->bus, PCI_DEVFN(0, 0), 0xb0, &vtbar);
3736 if (rc) {
3737 /* "can't" happen */
3738 dev_info(&pdev->dev, "failed to run vt-d quirk\n");
3739 return;
3740 }
3741 vtbar &= 0xffff0000;
3742
3743 /* we know that the this iommu should be at offset 0xa000 from vtbar */
3744 drhd = dmar_find_matched_drhd_unit(pdev);
3745 if (WARN_TAINT_ONCE(!drhd || drhd->reg_base_addr - vtbar != 0xa000,
3746 TAINT_FIRMWARE_WORKAROUND,
3747 "BIOS assigned incorrect VT-d unit for Intel(R) QuickData Technology device\n"))
3748 pdev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
3749}
3750DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quirk_ioat_snb_local_iommu);
3751
ba395927
KA
3752static void __init init_no_remapping_devices(void)
3753{
3754 struct dmar_drhd_unit *drhd;
832bd858 3755 struct device *dev;
b683b230 3756 int i;
ba395927
KA
3757
3758 for_each_drhd_unit(drhd) {
3759 if (!drhd->include_all) {
b683b230
JL
3760 for_each_active_dev_scope(drhd->devices,
3761 drhd->devices_cnt, i, dev)
3762 break;
832bd858 3763 /* ignore DMAR unit if no devices exist */
ba395927
KA
3764 if (i == drhd->devices_cnt)
3765 drhd->ignored = 1;
3766 }
3767 }
3768
7c919779 3769 for_each_active_drhd_unit(drhd) {
7c919779 3770 if (drhd->include_all)
ba395927
KA
3771 continue;
3772
b683b230
JL
3773 for_each_active_dev_scope(drhd->devices,
3774 drhd->devices_cnt, i, dev)
832bd858 3775 if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev)))
ba395927 3776 break;
ba395927
KA
3777 if (i < drhd->devices_cnt)
3778 continue;
3779
c0771df8
DW
3780 /* This IOMMU has *only* gfx devices. Either bypass it or
3781 set the gfx_mapped flag, as appropriate */
3782 if (dmar_map_gfx) {
3783 intel_iommu_gfx_mapped = 1;
3784 } else {
3785 drhd->ignored = 1;
b683b230
JL
3786 for_each_active_dev_scope(drhd->devices,
3787 drhd->devices_cnt, i, dev)
832bd858 3788 dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3789 }
3790 }
3791}
3792
f59c7b69
FY
3793#ifdef CONFIG_SUSPEND
3794static int init_iommu_hw(void)
3795{
3796 struct dmar_drhd_unit *drhd;
3797 struct intel_iommu *iommu = NULL;
3798
3799 for_each_active_iommu(iommu, drhd)
3800 if (iommu->qi)
3801 dmar_reenable_qi(iommu);
3802
b779260b
JC
3803 for_each_iommu(iommu, drhd) {
3804 if (drhd->ignored) {
3805 /*
3806 * we always have to disable PMRs or DMA may fail on
3807 * this device
3808 */
3809 if (force_on)
3810 iommu_disable_protect_mem_regions(iommu);
3811 continue;
3812 }
3813
f59c7b69
FY
3814 iommu_flush_write_buffer(iommu);
3815
3816 iommu_set_root_entry(iommu);
3817
3818 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3819 DMA_CCMD_GLOBAL_INVL);
2a41ccee
JL
3820 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
3821 iommu_enable_translation(iommu);
b94996c9 3822 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3823 }
3824
3825 return 0;
3826}
3827
3828static void iommu_flush_all(void)
3829{
3830 struct dmar_drhd_unit *drhd;
3831 struct intel_iommu *iommu;
3832
3833 for_each_active_iommu(iommu, drhd) {
3834 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3835 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3836 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3837 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3838 }
3839}
3840
134fac3f 3841static int iommu_suspend(void)
f59c7b69
FY
3842{
3843 struct dmar_drhd_unit *drhd;
3844 struct intel_iommu *iommu = NULL;
3845 unsigned long flag;
3846
3847 for_each_active_iommu(iommu, drhd) {
3848 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3849 GFP_ATOMIC);
3850 if (!iommu->iommu_state)
3851 goto nomem;
3852 }
3853
3854 iommu_flush_all();
3855
3856 for_each_active_iommu(iommu, drhd) {
3857 iommu_disable_translation(iommu);
3858
1f5b3c3f 3859 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3860
3861 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3862 readl(iommu->reg + DMAR_FECTL_REG);
3863 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3864 readl(iommu->reg + DMAR_FEDATA_REG);
3865 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3866 readl(iommu->reg + DMAR_FEADDR_REG);
3867 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3868 readl(iommu->reg + DMAR_FEUADDR_REG);
3869
1f5b3c3f 3870 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3871 }
3872 return 0;
3873
3874nomem:
3875 for_each_active_iommu(iommu, drhd)
3876 kfree(iommu->iommu_state);
3877
3878 return -ENOMEM;
3879}
3880
134fac3f 3881static void iommu_resume(void)
f59c7b69
FY
3882{
3883 struct dmar_drhd_unit *drhd;
3884 struct intel_iommu *iommu = NULL;
3885 unsigned long flag;
3886
3887 if (init_iommu_hw()) {
b779260b
JC
3888 if (force_on)
3889 panic("tboot: IOMMU setup failed, DMAR can not resume!\n");
3890 else
3891 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
134fac3f 3892 return;
f59c7b69
FY
3893 }
3894
3895 for_each_active_iommu(iommu, drhd) {
3896
1f5b3c3f 3897 raw_spin_lock_irqsave(&iommu->register_lock, flag);
f59c7b69
FY
3898
3899 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3900 iommu->reg + DMAR_FECTL_REG);
3901 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3902 iommu->reg + DMAR_FEDATA_REG);
3903 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3904 iommu->reg + DMAR_FEADDR_REG);
3905 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3906 iommu->reg + DMAR_FEUADDR_REG);
3907
1f5b3c3f 3908 raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
f59c7b69
FY
3909 }
3910
3911 for_each_active_iommu(iommu, drhd)
3912 kfree(iommu->iommu_state);
f59c7b69
FY
3913}
3914
134fac3f 3915static struct syscore_ops iommu_syscore_ops = {
f59c7b69
FY
3916 .resume = iommu_resume,
3917 .suspend = iommu_suspend,
3918};
3919
134fac3f 3920static void __init init_iommu_pm_ops(void)
f59c7b69 3921{
134fac3f 3922 register_syscore_ops(&iommu_syscore_ops);
f59c7b69
FY
3923}
3924
3925#else
99592ba4 3926static inline void init_iommu_pm_ops(void) {}
f59c7b69
FY
3927#endif /* CONFIG_PM */
3928
318fe7df 3929
c2a0b538 3930int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
318fe7df
SS
3931{
3932 struct acpi_dmar_reserved_memory *rmrr;
3933 struct dmar_rmrr_unit *rmrru;
3934
3935 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
3936 if (!rmrru)
3937 return -ENOMEM;
3938
3939 rmrru->hdr = header;
3940 rmrr = (struct acpi_dmar_reserved_memory *)header;
3941 rmrru->base_address = rmrr->base_address;
3942 rmrru->end_address = rmrr->end_address;
2e455289
JL
3943 rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1),
3944 ((void *)rmrr) + rmrr->header.length,
3945 &rmrru->devices_cnt);
3946 if (rmrru->devices_cnt && rmrru->devices == NULL) {
3947 kfree(rmrru);
3948 return -ENOMEM;
3949 }
318fe7df 3950
2e455289 3951 list_add(&rmrru->list, &dmar_rmrr_units);
318fe7df 3952
2e455289 3953 return 0;
318fe7df
SS
3954}
3955
6b197249
JL
3956static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
3957{
3958 struct dmar_atsr_unit *atsru;
3959 struct acpi_dmar_atsr *tmp;
3960
3961 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
3962 tmp = (struct acpi_dmar_atsr *)atsru->hdr;
3963 if (atsr->segment != tmp->segment)
3964 continue;
3965 if (atsr->header.length != tmp->header.length)
3966 continue;
3967 if (memcmp(atsr, tmp, atsr->header.length) == 0)
3968 return atsru;
3969 }
3970
3971 return NULL;
3972}
3973
3974int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
318fe7df
SS
3975{
3976 struct acpi_dmar_atsr *atsr;
3977 struct dmar_atsr_unit *atsru;
3978
6b197249
JL
3979 if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
3980 return 0;
3981
318fe7df 3982 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
6b197249
JL
3983 atsru = dmar_find_atsr(atsr);
3984 if (atsru)
3985 return 0;
3986
3987 atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
318fe7df
SS
3988 if (!atsru)
3989 return -ENOMEM;
3990
6b197249
JL
3991 /*
3992 * If memory is allocated from slab by ACPI _DSM method, we need to
3993 * copy the memory content because the memory buffer will be freed
3994 * on return.
3995 */
3996 atsru->hdr = (void *)(atsru + 1);
3997 memcpy(atsru->hdr, hdr, hdr->length);
318fe7df 3998 atsru->include_all = atsr->flags & 0x1;
2e455289
JL
3999 if (!atsru->include_all) {
4000 atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
4001 (void *)atsr + atsr->header.length,
4002 &atsru->devices_cnt);
4003 if (atsru->devices_cnt && atsru->devices == NULL) {
4004 kfree(atsru);
4005 return -ENOMEM;
4006 }
4007 }
318fe7df 4008
0e242612 4009 list_add_rcu(&atsru->list, &dmar_atsr_units);
318fe7df
SS
4010
4011 return 0;
4012}
4013
9bdc531e
JL
4014static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
4015{
4016 dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt);
4017 kfree(atsru);
4018}
4019
6b197249
JL
4020int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4021{
4022 struct acpi_dmar_atsr *atsr;
4023 struct dmar_atsr_unit *atsru;
4024
4025 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4026 atsru = dmar_find_atsr(atsr);
4027 if (atsru) {
4028 list_del_rcu(&atsru->list);
4029 synchronize_rcu();
4030 intel_iommu_free_atsr(atsru);
4031 }
4032
4033 return 0;
4034}
4035
4036int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
4037{
4038 int i;
4039 struct device *dev;
4040 struct acpi_dmar_atsr *atsr;
4041 struct dmar_atsr_unit *atsru;
4042
4043 atsr = container_of(hdr, struct acpi_dmar_atsr, header);
4044 atsru = dmar_find_atsr(atsr);
4045 if (!atsru)
4046 return 0;
4047
4048 if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
4049 for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
4050 i, dev)
4051 return -EBUSY;
4052
4053 return 0;
4054}
4055
ffebeb46
JL
4056static int intel_iommu_add(struct dmar_drhd_unit *dmaru)
4057{
4058 int sp, ret = 0;
4059 struct intel_iommu *iommu = dmaru->iommu;
4060
4061 if (g_iommus[iommu->seq_id])
4062 return 0;
4063
4064 if (hw_pass_through && !ecap_pass_through(iommu->ecap)) {
9f10e5bf 4065 pr_warn("%s: Doesn't support hardware pass through.\n",
ffebeb46
JL
4066 iommu->name);
4067 return -ENXIO;
4068 }
4069 if (!ecap_sc_support(iommu->ecap) &&
4070 domain_update_iommu_snooping(iommu)) {
9f10e5bf 4071 pr_warn("%s: Doesn't support snooping.\n",
ffebeb46
JL
4072 iommu->name);
4073 return -ENXIO;
4074 }
4075 sp = domain_update_iommu_superpage(iommu) - 1;
4076 if (sp >= 0 && !(cap_super_page_val(iommu->cap) & (1 << sp))) {
9f10e5bf 4077 pr_warn("%s: Doesn't support large page.\n",
ffebeb46
JL
4078 iommu->name);
4079 return -ENXIO;
4080 }
4081
4082 /*
4083 * Disable translation if already enabled prior to OS handover.
4084 */
4085 if (iommu->gcmd & DMA_GCMD_TE)
4086 iommu_disable_translation(iommu);
4087
4088 g_iommus[iommu->seq_id] = iommu;
4089 ret = iommu_init_domains(iommu);
4090 if (ret == 0)
4091 ret = iommu_alloc_root_entry(iommu);
4092 if (ret)
4093 goto out;
4094
4095 if (dmaru->ignored) {
4096 /*
4097 * we always have to disable PMRs or DMA may fail on this device
4098 */
4099 if (force_on)
4100 iommu_disable_protect_mem_regions(iommu);
4101 return 0;
4102 }
4103
4104 intel_iommu_init_qi(iommu);
4105 iommu_flush_write_buffer(iommu);
4106 ret = dmar_set_interrupt(iommu);
4107 if (ret)
4108 goto disable_iommu;
4109
4110 iommu_set_root_entry(iommu);
4111 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
4112 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
4113 iommu_enable_translation(iommu);
4114
ffebeb46
JL
4115 iommu_disable_protect_mem_regions(iommu);
4116 return 0;
4117
4118disable_iommu:
4119 disable_dmar_iommu(iommu);
4120out:
4121 free_dmar_iommu(iommu);
4122 return ret;
4123}
4124
6b197249
JL
4125int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
4126{
ffebeb46
JL
4127 int ret = 0;
4128 struct intel_iommu *iommu = dmaru->iommu;
4129
4130 if (!intel_iommu_enabled)
4131 return 0;
4132 if (iommu == NULL)
4133 return -EINVAL;
4134
4135 if (insert) {
4136 ret = intel_iommu_add(dmaru);
4137 } else {
4138 disable_dmar_iommu(iommu);
4139 free_dmar_iommu(iommu);
4140 }
4141
4142 return ret;
6b197249
JL
4143}
4144
9bdc531e
JL
4145static void intel_iommu_free_dmars(void)
4146{
4147 struct dmar_rmrr_unit *rmrru, *rmrr_n;
4148 struct dmar_atsr_unit *atsru, *atsr_n;
4149
4150 list_for_each_entry_safe(rmrru, rmrr_n, &dmar_rmrr_units, list) {
4151 list_del(&rmrru->list);
4152 dmar_free_dev_scope(&rmrru->devices, &rmrru->devices_cnt);
4153 kfree(rmrru);
318fe7df
SS
4154 }
4155
9bdc531e
JL
4156 list_for_each_entry_safe(atsru, atsr_n, &dmar_atsr_units, list) {
4157 list_del(&atsru->list);
4158 intel_iommu_free_atsr(atsru);
4159 }
318fe7df
SS
4160}
4161
4162int dmar_find_matched_atsr_unit(struct pci_dev *dev)
4163{
b683b230 4164 int i, ret = 1;
318fe7df 4165 struct pci_bus *bus;
832bd858
DW
4166 struct pci_dev *bridge = NULL;
4167 struct device *tmp;
318fe7df
SS
4168 struct acpi_dmar_atsr *atsr;
4169 struct dmar_atsr_unit *atsru;
4170
4171 dev = pci_physfn(dev);
318fe7df 4172 for (bus = dev->bus; bus; bus = bus->parent) {
b5f82ddf 4173 bridge = bus->self;
318fe7df 4174 if (!bridge || !pci_is_pcie(bridge) ||
62f87c0e 4175 pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE)
318fe7df 4176 return 0;
b5f82ddf 4177 if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT)
318fe7df 4178 break;
318fe7df 4179 }
b5f82ddf
JL
4180 if (!bridge)
4181 return 0;
318fe7df 4182
0e242612 4183 rcu_read_lock();
b5f82ddf
JL
4184 list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
4185 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4186 if (atsr->segment != pci_domain_nr(dev->bus))
4187 continue;
4188
b683b230 4189 for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp)
832bd858 4190 if (tmp == &bridge->dev)
b683b230 4191 goto out;
b5f82ddf
JL
4192
4193 if (atsru->include_all)
b683b230 4194 goto out;
b5f82ddf 4195 }
b683b230
JL
4196 ret = 0;
4197out:
0e242612 4198 rcu_read_unlock();
318fe7df 4199
b683b230 4200 return ret;
318fe7df
SS
4201}
4202
59ce0515
JL
4203int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
4204{
4205 int ret = 0;
4206 struct dmar_rmrr_unit *rmrru;
4207 struct dmar_atsr_unit *atsru;
4208 struct acpi_dmar_atsr *atsr;
4209 struct acpi_dmar_reserved_memory *rmrr;
4210
4211 if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING)
4212 return 0;
4213
4214 list_for_each_entry(rmrru, &dmar_rmrr_units, list) {
4215 rmrr = container_of(rmrru->hdr,
4216 struct acpi_dmar_reserved_memory, header);
4217 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4218 ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1),
4219 ((void *)rmrr) + rmrr->header.length,
4220 rmrr->segment, rmrru->devices,
4221 rmrru->devices_cnt);
27e24950 4222 if(ret < 0)
59ce0515
JL
4223 return ret;
4224 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
27e24950
JL
4225 dmar_remove_dev_scope(info, rmrr->segment,
4226 rmrru->devices, rmrru->devices_cnt);
59ce0515
JL
4227 }
4228 }
4229
4230 list_for_each_entry(atsru, &dmar_atsr_units, list) {
4231 if (atsru->include_all)
4232 continue;
4233
4234 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
4235 if (info->event == BUS_NOTIFY_ADD_DEVICE) {
4236 ret = dmar_insert_dev_scope(info, (void *)(atsr + 1),
4237 (void *)atsr + atsr->header.length,
4238 atsr->segment, atsru->devices,
4239 atsru->devices_cnt);
4240 if (ret > 0)
4241 break;
4242 else if(ret < 0)
4243 return ret;
4244 } else if (info->event == BUS_NOTIFY_DEL_DEVICE) {
4245 if (dmar_remove_dev_scope(info, atsr->segment,
4246 atsru->devices, atsru->devices_cnt))
4247 break;
4248 }
4249 }
4250
4251 return 0;
4252}
4253
99dcaded
FY
4254/*
4255 * Here we only respond to action of unbound device from driver.
4256 *
4257 * Added device is not attached to its DMAR domain here yet. That will happen
4258 * when mapping the device to iova.
4259 */
4260static int device_notifier(struct notifier_block *nb,
4261 unsigned long action, void *data)
4262{
4263 struct device *dev = data;
99dcaded
FY
4264 struct dmar_domain *domain;
4265
3d89194a 4266 if (iommu_dummy(dev))
44cd613c
DW
4267 return 0;
4268
1196c2fb 4269 if (action != BUS_NOTIFY_REMOVED_DEVICE)
7e7dfab7
JL
4270 return 0;
4271
1525a29a 4272 domain = find_domain(dev);
99dcaded
FY
4273 if (!domain)
4274 return 0;
4275
e6de0f8d 4276 dmar_remove_one_dev_info(domain, dev);
ab8dfe25 4277 if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices))
7e7dfab7 4278 domain_exit(domain);
a97590e5 4279
99dcaded
FY
4280 return 0;
4281}
4282
4283static struct notifier_block device_nb = {
4284 .notifier_call = device_notifier,
4285};
4286
75f05569
JL
4287static int intel_iommu_memory_notifier(struct notifier_block *nb,
4288 unsigned long val, void *v)
4289{
4290 struct memory_notify *mhp = v;
4291 unsigned long long start, end;
4292 unsigned long start_vpfn, last_vpfn;
4293
4294 switch (val) {
4295 case MEM_GOING_ONLINE:
4296 start = mhp->start_pfn << PAGE_SHIFT;
4297 end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1;
4298 if (iommu_domain_identity_map(si_domain, start, end)) {
9f10e5bf 4299 pr_warn("Failed to build identity map for [%llx-%llx]\n",
75f05569
JL
4300 start, end);
4301 return NOTIFY_BAD;
4302 }
4303 break;
4304
4305 case MEM_OFFLINE:
4306 case MEM_CANCEL_ONLINE:
4307 start_vpfn = mm_to_dma_pfn(mhp->start_pfn);
4308 last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1);
4309 while (start_vpfn <= last_vpfn) {
4310 struct iova *iova;
4311 struct dmar_drhd_unit *drhd;
4312 struct intel_iommu *iommu;
ea8ea460 4313 struct page *freelist;
75f05569
JL
4314
4315 iova = find_iova(&si_domain->iovad, start_vpfn);
4316 if (iova == NULL) {
9f10e5bf 4317 pr_debug("Failed get IOVA for PFN %lx\n",
75f05569
JL
4318 start_vpfn);
4319 break;
4320 }
4321
4322 iova = split_and_remove_iova(&si_domain->iovad, iova,
4323 start_vpfn, last_vpfn);
4324 if (iova == NULL) {
9f10e5bf 4325 pr_warn("Failed to split IOVA PFN [%lx-%lx]\n",
75f05569
JL
4326 start_vpfn, last_vpfn);
4327 return NOTIFY_BAD;
4328 }
4329
ea8ea460
DW
4330 freelist = domain_unmap(si_domain, iova->pfn_lo,
4331 iova->pfn_hi);
4332
75f05569
JL
4333 rcu_read_lock();
4334 for_each_active_iommu(iommu, drhd)
a1ddcbe9 4335 iommu_flush_iotlb_psi(iommu, si_domain,
a156ef99 4336 iova->pfn_lo, iova_size(iova),
ea8ea460 4337 !freelist, 0);
75f05569 4338 rcu_read_unlock();
ea8ea460 4339 dma_free_pagelist(freelist);
75f05569
JL
4340
4341 start_vpfn = iova->pfn_hi + 1;
4342 free_iova_mem(iova);
4343 }
4344 break;
4345 }
4346
4347 return NOTIFY_OK;
4348}
4349
4350static struct notifier_block intel_iommu_memory_nb = {
4351 .notifier_call = intel_iommu_memory_notifier,
4352 .priority = 0
4353};
4354
a5459cfe
AW
4355
4356static ssize_t intel_iommu_show_version(struct device *dev,
4357 struct device_attribute *attr,
4358 char *buf)
4359{
4360 struct intel_iommu *iommu = dev_get_drvdata(dev);
4361 u32 ver = readl(iommu->reg + DMAR_VER_REG);
4362 return sprintf(buf, "%d:%d\n",
4363 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver));
4364}
4365static DEVICE_ATTR(version, S_IRUGO, intel_iommu_show_version, NULL);
4366
4367static ssize_t intel_iommu_show_address(struct device *dev,
4368 struct device_attribute *attr,
4369 char *buf)
4370{
4371 struct intel_iommu *iommu = dev_get_drvdata(dev);
4372 return sprintf(buf, "%llx\n", iommu->reg_phys);
4373}
4374static DEVICE_ATTR(address, S_IRUGO, intel_iommu_show_address, NULL);
4375
4376static ssize_t intel_iommu_show_cap(struct device *dev,
4377 struct device_attribute *attr,
4378 char *buf)
4379{
4380 struct intel_iommu *iommu = dev_get_drvdata(dev);
4381 return sprintf(buf, "%llx\n", iommu->cap);
4382}
4383static DEVICE_ATTR(cap, S_IRUGO, intel_iommu_show_cap, NULL);
4384
4385static ssize_t intel_iommu_show_ecap(struct device *dev,
4386 struct device_attribute *attr,
4387 char *buf)
4388{
4389 struct intel_iommu *iommu = dev_get_drvdata(dev);
4390 return sprintf(buf, "%llx\n", iommu->ecap);
4391}
4392static DEVICE_ATTR(ecap, S_IRUGO, intel_iommu_show_ecap, NULL);
4393
2238c082
AW
4394static ssize_t intel_iommu_show_ndoms(struct device *dev,
4395 struct device_attribute *attr,
4396 char *buf)
4397{
4398 struct intel_iommu *iommu = dev_get_drvdata(dev);
4399 return sprintf(buf, "%ld\n", cap_ndoms(iommu->cap));
4400}
4401static DEVICE_ATTR(domains_supported, S_IRUGO, intel_iommu_show_ndoms, NULL);
4402
4403static ssize_t intel_iommu_show_ndoms_used(struct device *dev,
4404 struct device_attribute *attr,
4405 char *buf)
4406{
4407 struct intel_iommu *iommu = dev_get_drvdata(dev);
4408 return sprintf(buf, "%d\n", bitmap_weight(iommu->domain_ids,
4409 cap_ndoms(iommu->cap)));
4410}
4411static DEVICE_ATTR(domains_used, S_IRUGO, intel_iommu_show_ndoms_used, NULL);
4412
a5459cfe
AW
4413static struct attribute *intel_iommu_attrs[] = {
4414 &dev_attr_version.attr,
4415 &dev_attr_address.attr,
4416 &dev_attr_cap.attr,
4417 &dev_attr_ecap.attr,
2238c082
AW
4418 &dev_attr_domains_supported.attr,
4419 &dev_attr_domains_used.attr,
a5459cfe
AW
4420 NULL,
4421};
4422
4423static struct attribute_group intel_iommu_group = {
4424 .name = "intel-iommu",
4425 .attrs = intel_iommu_attrs,
4426};
4427
4428const struct attribute_group *intel_iommu_groups[] = {
4429 &intel_iommu_group,
4430 NULL,
4431};
4432
ba395927
KA
4433int __init intel_iommu_init(void)
4434{
9bdc531e 4435 int ret = -ENODEV;
3a93c841 4436 struct dmar_drhd_unit *drhd;
7c919779 4437 struct intel_iommu *iommu;
ba395927 4438
a59b50e9
JC
4439 /* VT-d is required for a TXT/tboot launch, so enforce that */
4440 force_on = tboot_force_iommu();
4441
3a5670e8
JL
4442 if (iommu_init_mempool()) {
4443 if (force_on)
4444 panic("tboot: Failed to initialize iommu memory\n");
4445 return -ENOMEM;
4446 }
4447
4448 down_write(&dmar_global_lock);
a59b50e9
JC
4449 if (dmar_table_init()) {
4450 if (force_on)
4451 panic("tboot: Failed to initialize DMAR table\n");
9bdc531e 4452 goto out_free_dmar;
a59b50e9 4453 }
ba395927 4454
c2c7286a 4455 if (dmar_dev_scope_init() < 0) {
a59b50e9
JC
4456 if (force_on)
4457 panic("tboot: Failed to initialize DMAR device scope\n");
9bdc531e 4458 goto out_free_dmar;
a59b50e9 4459 }
1886e8a9 4460
75f1cdf1 4461 if (no_iommu || dmar_disabled)
9bdc531e 4462 goto out_free_dmar;
2ae21010 4463
318fe7df 4464 if (list_empty(&dmar_rmrr_units))
9f10e5bf 4465 pr_info("No RMRR found\n");
318fe7df
SS
4466
4467 if (list_empty(&dmar_atsr_units))
9f10e5bf 4468 pr_info("No ATSR found\n");
318fe7df 4469
51a63e67
JC
4470 if (dmar_init_reserved_ranges()) {
4471 if (force_on)
4472 panic("tboot: Failed to reserve iommu ranges\n");
3a5670e8 4473 goto out_free_reserved_range;
51a63e67 4474 }
ba395927
KA
4475
4476 init_no_remapping_devices();
4477
b779260b 4478 ret = init_dmars();
ba395927 4479 if (ret) {
a59b50e9
JC
4480 if (force_on)
4481 panic("tboot: Failed to initialize DMARs\n");
9f10e5bf 4482 pr_err("Initialization failed\n");
9bdc531e 4483 goto out_free_reserved_range;
ba395927 4484 }
3a5670e8 4485 up_write(&dmar_global_lock);
9f10e5bf 4486 pr_info("Intel(R) Virtualization Technology for Directed I/O\n");
ba395927 4487
5e0d2a6f 4488 init_timer(&unmap_timer);
75f1cdf1
FT
4489#ifdef CONFIG_SWIOTLB
4490 swiotlb = 0;
4491#endif
19943b0e 4492 dma_ops = &intel_dma_ops;
4ed0d3e6 4493
134fac3f 4494 init_iommu_pm_ops();
a8bcbb0d 4495
a5459cfe
AW
4496 for_each_active_iommu(iommu, drhd)
4497 iommu->iommu_dev = iommu_device_create(NULL, iommu,
4498 intel_iommu_groups,
2439d4aa 4499 "%s", iommu->name);
a5459cfe 4500
4236d97d 4501 bus_set_iommu(&pci_bus_type, &intel_iommu_ops);
99dcaded 4502 bus_register_notifier(&pci_bus_type, &device_nb);
75f05569
JL
4503 if (si_domain && !hw_pass_through)
4504 register_memory_notifier(&intel_iommu_memory_nb);
99dcaded 4505
8bc1f85c
ED
4506 intel_iommu_enabled = 1;
4507
ba395927 4508 return 0;
9bdc531e
JL
4509
4510out_free_reserved_range:
4511 put_iova_domain(&reserved_iova_list);
9bdc531e
JL
4512out_free_dmar:
4513 intel_iommu_free_dmars();
3a5670e8
JL
4514 up_write(&dmar_global_lock);
4515 iommu_exit_mempool();
9bdc531e 4516 return ret;
ba395927 4517}
e820482c 4518
2452d9db 4519static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
579305f7
AW
4520{
4521 struct intel_iommu *iommu = opaque;
4522
2452d9db 4523 domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
579305f7
AW
4524 return 0;
4525}
4526
4527/*
4528 * NB - intel-iommu lacks any sort of reference counting for the users of
4529 * dependent devices. If multiple endpoints have intersecting dependent
4530 * devices, unbinding the driver from any one of them will possibly leave
4531 * the others unable to operate.
4532 */
2452d9db 4533static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
3199aa6b 4534{
0bcb3e28 4535 if (!iommu || !dev || !dev_is_pci(dev))
3199aa6b
HW
4536 return;
4537
2452d9db 4538 pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
3199aa6b
HW
4539}
4540
55d94043
JR
4541static void __dmar_remove_one_dev_info(struct dmar_domain *domain,
4542 struct device *dev)
c7151a8d 4543{
b608ac3b 4544 struct device_domain_info *info;
c7151a8d
WH
4545 struct intel_iommu *iommu;
4546 unsigned long flags;
156baca8 4547 u8 bus, devfn;
c7151a8d 4548
55d94043
JR
4549 assert_spin_locked(&device_domain_lock);
4550
bf9c9eda 4551 iommu = device_to_iommu(dev, &bus, &devfn);
c7151a8d
WH
4552 if (!iommu)
4553 return;
4554
b608ac3b 4555 info = dev->archdata.iommu;
c7151a8d 4556
b608ac3b
JR
4557 if (WARN_ON(!info))
4558 return;
c7151a8d 4559
b608ac3b 4560 unlink_domain_info(info);
3e7abe25 4561
b608ac3b 4562 iommu_disable_dev_iotlb(info);
2452d9db 4563 domain_context_clear(iommu, dev);
b608ac3b 4564 free_devinfo_mem(info);
b608ac3b 4565
d160aca5
JR
4566 spin_lock_irqsave(&iommu->lock, flags);
4567 domain_detach_iommu(domain, iommu);
4568 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
4569}
4570
55d94043
JR
4571static void dmar_remove_one_dev_info(struct dmar_domain *domain,
4572 struct device *dev)
4573{
4574 unsigned long flags;
4575
4576 spin_lock_irqsave(&device_domain_lock, flags);
4577 __dmar_remove_one_dev_info(domain, dev);
4578 spin_unlock_irqrestore(&device_domain_lock, flags);
4579}
4580
2c2e2c38 4581static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
4582{
4583 int adjust_width;
4584
0fb5fe87
RM
4585 init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN,
4586 DMA_32BIT_PFN);
5e98c4b1
WH
4587 domain_reserve_special_ranges(domain);
4588
4589 /* calculate AGAW */
4590 domain->gaw = guest_width;
4591 adjust_width = guestwidth_to_adjustwidth(guest_width);
4592 domain->agaw = width_to_agaw(adjust_width);
4593
5e98c4b1 4594 domain->iommu_coherency = 0;
c5b15255 4595 domain->iommu_snooping = 0;
6dd9a7c7 4596 domain->iommu_superpage = 0;
fe40f1e0 4597 domain->max_addr = 0;
5e98c4b1
WH
4598
4599 /* always allocate the top pgd */
4c923d47 4600 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
4601 if (!domain->pgd)
4602 return -ENOMEM;
4603 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
4604 return 0;
4605}
4606
00a77deb 4607static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
38717946 4608{
5d450806 4609 struct dmar_domain *dmar_domain;
00a77deb
JR
4610 struct iommu_domain *domain;
4611
4612 if (type != IOMMU_DOMAIN_UNMANAGED)
4613 return NULL;
38717946 4614
ab8dfe25 4615 dmar_domain = alloc_domain(DOMAIN_FLAG_VIRTUAL_MACHINE);
5d450806 4616 if (!dmar_domain) {
9f10e5bf 4617 pr_err("Can't allocate dmar_domain\n");
00a77deb 4618 return NULL;
38717946 4619 }
2c2e2c38 4620 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
9f10e5bf 4621 pr_err("Domain initialization failed\n");
92d03cc8 4622 domain_exit(dmar_domain);
00a77deb 4623 return NULL;
38717946 4624 }
8140a95d 4625 domain_update_iommu_cap(dmar_domain);
faa3d6f5 4626
00a77deb 4627 domain = &dmar_domain->domain;
8a0e715b
JR
4628 domain->geometry.aperture_start = 0;
4629 domain->geometry.aperture_end = __DOMAIN_MAX_ADDR(dmar_domain->gaw);
4630 domain->geometry.force_aperture = true;
4631
00a77deb 4632 return domain;
38717946 4633}
38717946 4634
00a77deb 4635static void intel_iommu_domain_free(struct iommu_domain *domain)
38717946 4636{
00a77deb 4637 domain_exit(to_dmar_domain(domain));
38717946 4638}
38717946 4639
4c5478c9
JR
4640static int intel_iommu_attach_device(struct iommu_domain *domain,
4641 struct device *dev)
38717946 4642{
00a77deb 4643 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0
WH
4644 struct intel_iommu *iommu;
4645 int addr_width;
156baca8 4646 u8 bus, devfn;
faa3d6f5 4647
c875d2c1
AW
4648 if (device_is_rmrr_locked(dev)) {
4649 dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n");
4650 return -EPERM;
4651 }
4652
7207d8f9
DW
4653 /* normally dev is not mapped */
4654 if (unlikely(domain_context_mapped(dev))) {
faa3d6f5
WH
4655 struct dmar_domain *old_domain;
4656
1525a29a 4657 old_domain = find_domain(dev);
faa3d6f5 4658 if (old_domain) {
d160aca5 4659 rcu_read_lock();
de7e8886 4660 dmar_remove_one_dev_info(old_domain, dev);
d160aca5 4661 rcu_read_unlock();
62c22167
JR
4662
4663 if (!domain_type_is_vm_or_si(old_domain) &&
4664 list_empty(&old_domain->devices))
4665 domain_exit(old_domain);
faa3d6f5
WH
4666 }
4667 }
4668
156baca8 4669 iommu = device_to_iommu(dev, &bus, &devfn);
fe40f1e0
WH
4670 if (!iommu)
4671 return -ENODEV;
4672
4673 /* check if this iommu agaw is sufficient for max mapped address */
4674 addr_width = agaw_to_width(iommu->agaw);
a99c47a2
TL
4675 if (addr_width > cap_mgaw(iommu->cap))
4676 addr_width = cap_mgaw(iommu->cap);
4677
4678 if (dmar_domain->max_addr > (1LL << addr_width)) {
9f10e5bf 4679 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4680 "sufficient for the mapped address (%llx)\n",
a99c47a2 4681 __func__, addr_width, dmar_domain->max_addr);
fe40f1e0
WH
4682 return -EFAULT;
4683 }
a99c47a2
TL
4684 dmar_domain->gaw = addr_width;
4685
4686 /*
4687 * Knock out extra levels of page tables if necessary
4688 */
4689 while (iommu->agaw < dmar_domain->agaw) {
4690 struct dma_pte *pte;
4691
4692 pte = dmar_domain->pgd;
4693 if (dma_pte_present(pte)) {
25cbff16
SY
4694 dmar_domain->pgd = (struct dma_pte *)
4695 phys_to_virt(dma_pte_addr(pte));
7a661013 4696 free_pgtable_page(pte);
a99c47a2
TL
4697 }
4698 dmar_domain->agaw--;
4699 }
fe40f1e0 4700
28ccce0d 4701 return domain_add_dev_info(dmar_domain, dev);
38717946 4702}
38717946 4703
4c5478c9
JR
4704static void intel_iommu_detach_device(struct iommu_domain *domain,
4705 struct device *dev)
38717946 4706{
e6de0f8d 4707 dmar_remove_one_dev_info(to_dmar_domain(domain), dev);
faa3d6f5 4708}
c7151a8d 4709
b146a1c9
JR
4710static int intel_iommu_map(struct iommu_domain *domain,
4711 unsigned long iova, phys_addr_t hpa,
5009065d 4712 size_t size, int iommu_prot)
faa3d6f5 4713{
00a77deb 4714 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
fe40f1e0 4715 u64 max_addr;
dde57a21 4716 int prot = 0;
faa3d6f5 4717 int ret;
fe40f1e0 4718
dde57a21
JR
4719 if (iommu_prot & IOMMU_READ)
4720 prot |= DMA_PTE_READ;
4721 if (iommu_prot & IOMMU_WRITE)
4722 prot |= DMA_PTE_WRITE;
9cf06697
SY
4723 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
4724 prot |= DMA_PTE_SNP;
dde57a21 4725
163cc52c 4726 max_addr = iova + size;
dde57a21 4727 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
4728 u64 end;
4729
4730 /* check if minimum agaw is sufficient for mapped address */
8954da1f 4731 end = __DOMAIN_MAX_ADDR(dmar_domain->gaw) + 1;
fe40f1e0 4732 if (end < max_addr) {
9f10e5bf 4733 pr_err("%s: iommu width (%d) is not "
fe40f1e0 4734 "sufficient for the mapped address (%llx)\n",
8954da1f 4735 __func__, dmar_domain->gaw, max_addr);
fe40f1e0
WH
4736 return -EFAULT;
4737 }
dde57a21 4738 dmar_domain->max_addr = max_addr;
fe40f1e0 4739 }
ad051221
DW
4740 /* Round up size to next multiple of PAGE_SIZE, if it and
4741 the low bits of hpa would take us onto the next page */
88cb6a74 4742 size = aligned_nrpages(hpa, size);
ad051221
DW
4743 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
4744 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 4745 return ret;
38717946 4746}
38717946 4747
5009065d 4748static size_t intel_iommu_unmap(struct iommu_domain *domain,
ea8ea460 4749 unsigned long iova, size_t size)
38717946 4750{
00a77deb 4751 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
ea8ea460
DW
4752 struct page *freelist = NULL;
4753 struct intel_iommu *iommu;
4754 unsigned long start_pfn, last_pfn;
4755 unsigned int npages;
42e8c186 4756 int iommu_id, level = 0;
5cf0a76f
DW
4757
4758 /* Cope with horrid API which requires us to unmap more than the
4759 size argument if it happens to be a large-page mapping. */
4760 if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level))
4761 BUG();
4762
4763 if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
4764 size = VTD_PAGE_SIZE << level_to_offset_bits(level);
4b99d352 4765
ea8ea460
DW
4766 start_pfn = iova >> VTD_PAGE_SHIFT;
4767 last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT;
4768
4769 freelist = domain_unmap(dmar_domain, start_pfn, last_pfn);
4770
4771 npages = last_pfn - start_pfn + 1;
4772
29a27719 4773 for_each_domain_iommu(iommu_id, dmar_domain) {
a1ddcbe9
JR
4774 iommu = g_iommus[iommu_id];
4775
42e8c186
JR
4776 iommu_flush_iotlb_psi(g_iommus[iommu_id], dmar_domain,
4777 start_pfn, npages, !freelist, 0);
ea8ea460
DW
4778 }
4779
4780 dma_free_pagelist(freelist);
fe40f1e0 4781
163cc52c
DW
4782 if (dmar_domain->max_addr == iova + size)
4783 dmar_domain->max_addr = iova;
b146a1c9 4784
5cf0a76f 4785 return size;
38717946 4786}
38717946 4787
d14d6577 4788static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
bb5547ac 4789 dma_addr_t iova)
38717946 4790{
00a77deb 4791 struct dmar_domain *dmar_domain = to_dmar_domain(domain);
38717946 4792 struct dma_pte *pte;
5cf0a76f 4793 int level = 0;
faa3d6f5 4794 u64 phys = 0;
38717946 4795
5cf0a76f 4796 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
38717946 4797 if (pte)
faa3d6f5 4798 phys = dma_pte_addr(pte);
38717946 4799
faa3d6f5 4800 return phys;
38717946 4801}
a8bcbb0d 4802
5d587b8d 4803static bool intel_iommu_capable(enum iommu_cap cap)
dbb9fd86 4804{
dbb9fd86 4805 if (cap == IOMMU_CAP_CACHE_COHERENCY)
5d587b8d 4806 return domain_update_iommu_snooping(NULL) == 1;
323f99cb 4807 if (cap == IOMMU_CAP_INTR_REMAP)
5d587b8d 4808 return irq_remapping_enabled == 1;
dbb9fd86 4809
5d587b8d 4810 return false;
dbb9fd86
SY
4811}
4812
abdfdde2
AW
4813static int intel_iommu_add_device(struct device *dev)
4814{
a5459cfe 4815 struct intel_iommu *iommu;
abdfdde2 4816 struct iommu_group *group;
156baca8 4817 u8 bus, devfn;
70ae6f0d 4818
a5459cfe
AW
4819 iommu = device_to_iommu(dev, &bus, &devfn);
4820 if (!iommu)
70ae6f0d
AW
4821 return -ENODEV;
4822
a5459cfe 4823 iommu_device_link(iommu->iommu_dev, dev);
a4ff1fc2 4824
e17f9ff4 4825 group = iommu_group_get_for_dev(dev);
783f157b 4826
e17f9ff4
AW
4827 if (IS_ERR(group))
4828 return PTR_ERR(group);
bcb71abe 4829
abdfdde2 4830 iommu_group_put(group);
e17f9ff4 4831 return 0;
abdfdde2 4832}
70ae6f0d 4833
abdfdde2
AW
4834static void intel_iommu_remove_device(struct device *dev)
4835{
a5459cfe
AW
4836 struct intel_iommu *iommu;
4837 u8 bus, devfn;
4838
4839 iommu = device_to_iommu(dev, &bus, &devfn);
4840 if (!iommu)
4841 return;
4842
abdfdde2 4843 iommu_group_remove_device(dev);
a5459cfe
AW
4844
4845 iommu_device_unlink(iommu->iommu_dev, dev);
70ae6f0d
AW
4846}
4847
b22f6434 4848static const struct iommu_ops intel_iommu_ops = {
5d587b8d 4849 .capable = intel_iommu_capable,
00a77deb
JR
4850 .domain_alloc = intel_iommu_domain_alloc,
4851 .domain_free = intel_iommu_domain_free,
a8bcbb0d
JR
4852 .attach_dev = intel_iommu_attach_device,
4853 .detach_dev = intel_iommu_detach_device,
b146a1c9
JR
4854 .map = intel_iommu_map,
4855 .unmap = intel_iommu_unmap,
315786eb 4856 .map_sg = default_iommu_map_sg,
a8bcbb0d 4857 .iova_to_phys = intel_iommu_iova_to_phys,
abdfdde2
AW
4858 .add_device = intel_iommu_add_device,
4859 .remove_device = intel_iommu_remove_device,
6d1c56a9 4860 .pgsize_bitmap = INTEL_IOMMU_PGSIZES,
a8bcbb0d 4861};
9af88143 4862
9452618e
DV
4863static void quirk_iommu_g4x_gfx(struct pci_dev *dev)
4864{
4865 /* G4x/GM45 integrated gfx dmar support is totally busted. */
9f10e5bf 4866 pr_info("Disabling IOMMU for graphics on this chipset\n");
9452618e
DV
4867 dmar_map_gfx = 0;
4868}
4869
4870DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_g4x_gfx);
4871DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_g4x_gfx);
4872DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_g4x_gfx);
4873DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_g4x_gfx);
4874DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_g4x_gfx);
4875DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_g4x_gfx);
4876DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_g4x_gfx);
4877
d34d6517 4878static void quirk_iommu_rwbf(struct pci_dev *dev)
9af88143
DW
4879{
4880 /*
4881 * Mobile 4 Series Chipset neglects to set RWBF capability,
210561ff 4882 * but needs it. Same seems to hold for the desktop versions.
9af88143 4883 */
9f10e5bf 4884 pr_info("Forcing write-buffer flush capability\n");
9af88143
DW
4885 rwbf_quirk = 1;
4886}
4887
4888DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
210561ff
DV
4889DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e00, quirk_iommu_rwbf);
4890DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e10, quirk_iommu_rwbf);
4891DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e20, quirk_iommu_rwbf);
4892DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e30, quirk_iommu_rwbf);
4893DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e40, quirk_iommu_rwbf);
4894DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2e90, quirk_iommu_rwbf);
e0fc7e0b 4895
eecfd57f
AJ
4896#define GGC 0x52
4897#define GGC_MEMORY_SIZE_MASK (0xf << 8)
4898#define GGC_MEMORY_SIZE_NONE (0x0 << 8)
4899#define GGC_MEMORY_SIZE_1M (0x1 << 8)
4900#define GGC_MEMORY_SIZE_2M (0x3 << 8)
4901#define GGC_MEMORY_VT_ENABLED (0x8 << 8)
4902#define GGC_MEMORY_SIZE_2M_VT (0x9 << 8)
4903#define GGC_MEMORY_SIZE_3M_VT (0xa << 8)
4904#define GGC_MEMORY_SIZE_4M_VT (0xb << 8)
4905
d34d6517 4906static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev)
9eecabcb
DW
4907{
4908 unsigned short ggc;
4909
eecfd57f 4910 if (pci_read_config_word(dev, GGC, &ggc))
9eecabcb
DW
4911 return;
4912
eecfd57f 4913 if (!(ggc & GGC_MEMORY_VT_ENABLED)) {
9f10e5bf 4914 pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n");
9eecabcb 4915 dmar_map_gfx = 0;
6fbcfb3e
DW
4916 } else if (dmar_map_gfx) {
4917 /* we have to ensure the gfx device is idle before we flush */
9f10e5bf 4918 pr_info("Disabling batched IOTLB flush on Ironlake\n");
6fbcfb3e
DW
4919 intel_iommu_strict = 1;
4920 }
9eecabcb
DW
4921}
4922DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0040, quirk_calpella_no_shadow_gtt);
4923DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0044, quirk_calpella_no_shadow_gtt);
4924DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x0062, quirk_calpella_no_shadow_gtt);
4925DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x006a, quirk_calpella_no_shadow_gtt);
4926
e0fc7e0b
DW
4927/* On Tylersburg chipsets, some BIOSes have been known to enable the
4928 ISOCH DMAR unit for the Azalia sound device, but not give it any
4929 TLB entries, which causes it to deadlock. Check for that. We do
4930 this in a function called from init_dmars(), instead of in a PCI
4931 quirk, because we don't want to print the obnoxious "BIOS broken"
4932 message if VT-d is actually disabled.
4933*/
4934static void __init check_tylersburg_isoch(void)
4935{
4936 struct pci_dev *pdev;
4937 uint32_t vtisochctrl;
4938
4939 /* If there's no Azalia in the system anyway, forget it. */
4940 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
4941 if (!pdev)
4942 return;
4943 pci_dev_put(pdev);
4944
4945 /* System Management Registers. Might be hidden, in which case
4946 we can't do the sanity check. But that's OK, because the
4947 known-broken BIOSes _don't_ actually hide it, so far. */
4948 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
4949 if (!pdev)
4950 return;
4951
4952 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
4953 pci_dev_put(pdev);
4954 return;
4955 }
4956
4957 pci_dev_put(pdev);
4958
4959 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
4960 if (vtisochctrl & 1)
4961 return;
4962
4963 /* Drop all bits other than the number of TLB entries */
4964 vtisochctrl &= 0x1c;
4965
4966 /* If we have the recommended number of TLB entries (16), fine. */
4967 if (vtisochctrl == 0x10)
4968 return;
4969
4970 /* Zero TLB entries? You get to ride the short bus to school. */
4971 if (!vtisochctrl) {
4972 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
4973 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
4974 dmi_get_system_info(DMI_BIOS_VENDOR),
4975 dmi_get_system_info(DMI_BIOS_VERSION),
4976 dmi_get_system_info(DMI_PRODUCT_VERSION));
4977 iommu_identity_mapping |= IDENTMAP_AZALIA;
4978 return;
4979 }
9f10e5bf
JR
4980
4981 pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
e0fc7e0b
DW
4982 vtisochctrl);
4983}
This page took 1.109211 seconds and 5 git commands to generate.