Commit | Line | Data |
---|---|---|
09b55412 CM |
1 | /* |
2 | * SWIOTLB-based DMA API implementation | |
3 | * | |
4 | * Copyright (C) 2012 ARM Ltd. | |
5 | * Author: Catalin Marinas <catalin.marinas@arm.com> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License version 2 as | |
9 | * published by the Free Software Foundation. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program. If not, see <http://www.gnu.org/licenses/>. | |
18 | */ | |
19 | ||
20 | #include <linux/gfp.h> | |
1dccb598 | 21 | #include <linux/acpi.h> |
09b55412 CM |
22 | #include <linux/export.h> |
23 | #include <linux/slab.h> | |
d4932f9e | 24 | #include <linux/genalloc.h> |
09b55412 | 25 | #include <linux/dma-mapping.h> |
6ac2104d | 26 | #include <linux/dma-contiguous.h> |
09b55412 CM |
27 | #include <linux/vmalloc.h> |
28 | #include <linux/swiotlb.h> | |
29 | ||
30 | #include <asm/cacheflush.h> | |
31 | ||
214fdbe7 LA |
32 | static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot, |
33 | bool coherent) | |
34 | { | |
196adf2f | 35 | if (!coherent || dma_get_attr(DMA_ATTR_WRITE_COMBINE, attrs)) |
214fdbe7 | 36 | return pgprot_writecombine(prot); |
214fdbe7 LA |
37 | return prot; |
38 | } | |
39 | ||
d4932f9e LA |
40 | static struct gen_pool *atomic_pool; |
41 | ||
42 | #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K | |
a7c61a34 | 43 | static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE; |
d4932f9e LA |
44 | |
45 | static int __init early_coherent_pool(char *p) | |
46 | { | |
47 | atomic_pool_size = memparse(p, &p); | |
48 | return 0; | |
49 | } | |
50 | early_param("coherent_pool", early_coherent_pool); | |
51 | ||
7132813c | 52 | static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags) |
d4932f9e LA |
53 | { |
54 | unsigned long val; | |
55 | void *ptr = NULL; | |
56 | ||
57 | if (!atomic_pool) { | |
58 | WARN(1, "coherent pool not initialised!\n"); | |
59 | return NULL; | |
60 | } | |
61 | ||
62 | val = gen_pool_alloc(atomic_pool, size); | |
63 | if (val) { | |
64 | phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val); | |
65 | ||
66 | *ret_page = phys_to_page(phys); | |
67 | ptr = (void *)val; | |
6829e274 | 68 | memset(ptr, 0, size); |
d4932f9e LA |
69 | } |
70 | ||
71 | return ptr; | |
72 | } | |
73 | ||
74 | static bool __in_atomic_pool(void *start, size_t size) | |
75 | { | |
76 | return addr_in_gen_pool(atomic_pool, (unsigned long)start, size); | |
77 | } | |
78 | ||
79 | static int __free_from_pool(void *start, size_t size) | |
80 | { | |
81 | if (!__in_atomic_pool(start, size)) | |
82 | return 0; | |
83 | ||
84 | gen_pool_free(atomic_pool, (unsigned long)start, size); | |
85 | ||
86 | return 1; | |
87 | } | |
88 | ||
bb10eb7b RH |
89 | static void *__dma_alloc_coherent(struct device *dev, size_t size, |
90 | dma_addr_t *dma_handle, gfp_t flags, | |
91 | struct dma_attrs *attrs) | |
09b55412 | 92 | { |
c666e8d5 LA |
93 | if (dev == NULL) { |
94 | WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | |
95 | return NULL; | |
96 | } | |
97 | ||
19e7640d | 98 | if (IS_ENABLED(CONFIG_ZONE_DMA) && |
09b55412 | 99 | dev->coherent_dma_mask <= DMA_BIT_MASK(32)) |
19e7640d | 100 | flags |= GFP_DMA; |
d0164adc | 101 | if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) { |
6ac2104d | 102 | struct page *page; |
7132813c | 103 | void *addr; |
6ac2104d LA |
104 | |
105 | page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, | |
106 | get_order(size)); | |
107 | if (!page) | |
108 | return NULL; | |
109 | ||
110 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
7132813c | 111 | addr = page_address(page); |
6829e274 | 112 | memset(addr, 0, size); |
7132813c | 113 | return addr; |
6ac2104d LA |
114 | } else { |
115 | return swiotlb_alloc_coherent(dev, size, dma_handle, flags); | |
116 | } | |
09b55412 CM |
117 | } |
118 | ||
bb10eb7b RH |
119 | static void __dma_free_coherent(struct device *dev, size_t size, |
120 | void *vaddr, dma_addr_t dma_handle, | |
121 | struct dma_attrs *attrs) | |
09b55412 | 122 | { |
d4932f9e LA |
123 | bool freed; |
124 | phys_addr_t paddr = dma_to_phys(dev, dma_handle); | |
125 | ||
c666e8d5 LA |
126 | if (dev == NULL) { |
127 | WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); | |
128 | return; | |
129 | } | |
130 | ||
d4932f9e | 131 | freed = dma_release_from_contiguous(dev, |
6ac2104d LA |
132 | phys_to_page(paddr), |
133 | size >> PAGE_SHIFT); | |
d4932f9e | 134 | if (!freed) |
6ac2104d | 135 | swiotlb_free_coherent(dev, size, vaddr, dma_handle); |
09b55412 CM |
136 | } |
137 | ||
9d3bfbb4 CM |
138 | static void *__dma_alloc(struct device *dev, size_t size, |
139 | dma_addr_t *dma_handle, gfp_t flags, | |
140 | struct dma_attrs *attrs) | |
7363590d | 141 | { |
d4932f9e | 142 | struct page *page; |
7363590d | 143 | void *ptr, *coherent_ptr; |
9d3bfbb4 | 144 | bool coherent = is_device_dma_coherent(dev); |
97942c28 | 145 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); |
7363590d CM |
146 | |
147 | size = PAGE_ALIGN(size); | |
d4932f9e | 148 | |
d0164adc | 149 | if (!coherent && !gfpflags_allow_blocking(flags)) { |
d4932f9e | 150 | struct page *page = NULL; |
7132813c | 151 | void *addr = __alloc_from_pool(size, &page, flags); |
d4932f9e LA |
152 | |
153 | if (addr) | |
154 | *dma_handle = phys_to_dma(dev, page_to_phys(page)); | |
155 | ||
156 | return addr; | |
d4932f9e | 157 | } |
7363590d CM |
158 | |
159 | ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs); | |
160 | if (!ptr) | |
161 | goto no_mem; | |
7363590d | 162 | |
9d3bfbb4 CM |
163 | /* no need for non-cacheable mapping if coherent */ |
164 | if (coherent) | |
165 | return ptr; | |
166 | ||
7363590d CM |
167 | /* remove any dirty cache lines on the kernel alias */ |
168 | __dma_flush_range(ptr, ptr + size); | |
169 | ||
170 | /* create a coherent mapping */ | |
171 | page = virt_to_page(ptr); | |
d4932f9e | 172 | coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, |
97942c28 | 173 | prot, NULL); |
7363590d CM |
174 | if (!coherent_ptr) |
175 | goto no_map; | |
176 | ||
177 | return coherent_ptr; | |
178 | ||
179 | no_map: | |
180 | __dma_free_coherent(dev, size, ptr, *dma_handle, attrs); | |
181 | no_mem: | |
a52ce121 | 182 | *dma_handle = DMA_ERROR_CODE; |
7363590d CM |
183 | return NULL; |
184 | } | |
185 | ||
9d3bfbb4 CM |
186 | static void __dma_free(struct device *dev, size_t size, |
187 | void *vaddr, dma_addr_t dma_handle, | |
188 | struct dma_attrs *attrs) | |
7363590d CM |
189 | { |
190 | void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); | |
191 | ||
2cff98b9 DN |
192 | size = PAGE_ALIGN(size); |
193 | ||
9d3bfbb4 CM |
194 | if (!is_device_dma_coherent(dev)) { |
195 | if (__free_from_pool(vaddr, size)) | |
196 | return; | |
197 | vunmap(vaddr); | |
198 | } | |
7363590d CM |
199 | __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs); |
200 | } | |
201 | ||
202 | static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, | |
203 | unsigned long offset, size_t size, | |
204 | enum dma_data_direction dir, | |
205 | struct dma_attrs *attrs) | |
206 | { | |
207 | dma_addr_t dev_addr; | |
208 | ||
209 | dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); | |
9d3bfbb4 CM |
210 | if (!is_device_dma_coherent(dev)) |
211 | __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
212 | |
213 | return dev_addr; | |
214 | } | |
215 | ||
216 | ||
217 | static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
218 | size_t size, enum dma_data_direction dir, | |
219 | struct dma_attrs *attrs) | |
220 | { | |
9d3bfbb4 CM |
221 | if (!is_device_dma_coherent(dev)) |
222 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
223 | swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); |
224 | } | |
225 | ||
226 | static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | |
227 | int nelems, enum dma_data_direction dir, | |
228 | struct dma_attrs *attrs) | |
229 | { | |
230 | struct scatterlist *sg; | |
231 | int i, ret; | |
232 | ||
233 | ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); | |
9d3bfbb4 CM |
234 | if (!is_device_dma_coherent(dev)) |
235 | for_each_sg(sgl, sg, ret, i) | |
236 | __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
237 | sg->length, dir); | |
7363590d CM |
238 | |
239 | return ret; | |
240 | } | |
241 | ||
242 | static void __swiotlb_unmap_sg_attrs(struct device *dev, | |
243 | struct scatterlist *sgl, int nelems, | |
244 | enum dma_data_direction dir, | |
245 | struct dma_attrs *attrs) | |
246 | { | |
247 | struct scatterlist *sg; | |
248 | int i; | |
249 | ||
9d3bfbb4 CM |
250 | if (!is_device_dma_coherent(dev)) |
251 | for_each_sg(sgl, sg, nelems, i) | |
252 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
253 | sg->length, dir); | |
7363590d CM |
254 | swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); |
255 | } | |
256 | ||
257 | static void __swiotlb_sync_single_for_cpu(struct device *dev, | |
258 | dma_addr_t dev_addr, size_t size, | |
259 | enum dma_data_direction dir) | |
260 | { | |
9d3bfbb4 CM |
261 | if (!is_device_dma_coherent(dev)) |
262 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
263 | swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); |
264 | } | |
265 | ||
266 | static void __swiotlb_sync_single_for_device(struct device *dev, | |
267 | dma_addr_t dev_addr, size_t size, | |
268 | enum dma_data_direction dir) | |
269 | { | |
270 | swiotlb_sync_single_for_device(dev, dev_addr, size, dir); | |
9d3bfbb4 CM |
271 | if (!is_device_dma_coherent(dev)) |
272 | __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); | |
7363590d CM |
273 | } |
274 | ||
275 | static void __swiotlb_sync_sg_for_cpu(struct device *dev, | |
276 | struct scatterlist *sgl, int nelems, | |
277 | enum dma_data_direction dir) | |
278 | { | |
279 | struct scatterlist *sg; | |
280 | int i; | |
281 | ||
9d3bfbb4 CM |
282 | if (!is_device_dma_coherent(dev)) |
283 | for_each_sg(sgl, sg, nelems, i) | |
284 | __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
285 | sg->length, dir); | |
7363590d CM |
286 | swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); |
287 | } | |
288 | ||
289 | static void __swiotlb_sync_sg_for_device(struct device *dev, | |
290 | struct scatterlist *sgl, int nelems, | |
291 | enum dma_data_direction dir) | |
292 | { | |
293 | struct scatterlist *sg; | |
294 | int i; | |
295 | ||
296 | swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); | |
9d3bfbb4 CM |
297 | if (!is_device_dma_coherent(dev)) |
298 | for_each_sg(sgl, sg, nelems, i) | |
299 | __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), | |
300 | sg->length, dir); | |
7363590d CM |
301 | } |
302 | ||
aaf6f2f0 RM |
303 | static int __swiotlb_mmap(struct device *dev, |
304 | struct vm_area_struct *vma, | |
305 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
306 | struct dma_attrs *attrs) | |
6e8d7968 LA |
307 | { |
308 | int ret = -ENXIO; | |
309 | unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> | |
310 | PAGE_SHIFT; | |
311 | unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; | |
312 | unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; | |
313 | unsigned long off = vma->vm_pgoff; | |
314 | ||
aaf6f2f0 RM |
315 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, |
316 | is_device_dma_coherent(dev)); | |
317 | ||
6e8d7968 LA |
318 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) |
319 | return ret; | |
320 | ||
321 | if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) { | |
322 | ret = remap_pfn_range(vma, vma->vm_start, | |
323 | pfn + off, | |
324 | vma->vm_end - vma->vm_start, | |
325 | vma->vm_page_prot); | |
326 | } | |
327 | ||
328 | return ret; | |
329 | } | |
330 | ||
1d1ddf67 RM |
331 | static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, |
332 | void *cpu_addr, dma_addr_t handle, size_t size, | |
333 | struct dma_attrs *attrs) | |
334 | { | |
335 | int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); | |
336 | ||
337 | if (!ret) | |
338 | sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), | |
339 | PAGE_ALIGN(size), 0); | |
340 | ||
341 | return ret; | |
342 | } | |
343 | ||
9d3bfbb4 CM |
344 | static struct dma_map_ops swiotlb_dma_ops = { |
345 | .alloc = __dma_alloc, | |
346 | .free = __dma_free, | |
347 | .mmap = __swiotlb_mmap, | |
1d1ddf67 | 348 | .get_sgtable = __swiotlb_get_sgtable, |
7363590d CM |
349 | .map_page = __swiotlb_map_page, |
350 | .unmap_page = __swiotlb_unmap_page, | |
351 | .map_sg = __swiotlb_map_sg_attrs, | |
352 | .unmap_sg = __swiotlb_unmap_sg_attrs, | |
353 | .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, | |
354 | .sync_single_for_device = __swiotlb_sync_single_for_device, | |
355 | .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, | |
356 | .sync_sg_for_device = __swiotlb_sync_sg_for_device, | |
357 | .dma_supported = swiotlb_dma_supported, | |
358 | .mapping_error = swiotlb_dma_mapping_error, | |
359 | }; | |
09b55412 | 360 | |
d4932f9e LA |
361 | static int __init atomic_pool_init(void) |
362 | { | |
363 | pgprot_t prot = __pgprot(PROT_NORMAL_NC); | |
364 | unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT; | |
365 | struct page *page; | |
366 | void *addr; | |
367 | unsigned int pool_size_order = get_order(atomic_pool_size); | |
368 | ||
369 | if (dev_get_cma_area(NULL)) | |
370 | page = dma_alloc_from_contiguous(NULL, nr_pages, | |
371 | pool_size_order); | |
372 | else | |
373 | page = alloc_pages(GFP_DMA, pool_size_order); | |
374 | ||
375 | if (page) { | |
376 | int ret; | |
377 | void *page_addr = page_address(page); | |
378 | ||
379 | memset(page_addr, 0, atomic_pool_size); | |
380 | __dma_flush_range(page_addr, page_addr + atomic_pool_size); | |
381 | ||
382 | atomic_pool = gen_pool_create(PAGE_SHIFT, -1); | |
383 | if (!atomic_pool) | |
384 | goto free_page; | |
385 | ||
386 | addr = dma_common_contiguous_remap(page, atomic_pool_size, | |
387 | VM_USERMAP, prot, atomic_pool_init); | |
388 | ||
389 | if (!addr) | |
390 | goto destroy_genpool; | |
391 | ||
392 | ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr, | |
393 | page_to_phys(page), | |
394 | atomic_pool_size, -1); | |
395 | if (ret) | |
396 | goto remove_mapping; | |
397 | ||
398 | gen_pool_set_algo(atomic_pool, | |
399 | gen_pool_first_fit_order_align, | |
400 | (void *)PAGE_SHIFT); | |
401 | ||
402 | pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n", | |
403 | atomic_pool_size / 1024); | |
404 | return 0; | |
405 | } | |
406 | goto out; | |
407 | ||
408 | remove_mapping: | |
409 | dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP); | |
410 | destroy_genpool: | |
411 | gen_pool_destroy(atomic_pool); | |
412 | atomic_pool = NULL; | |
413 | free_page: | |
414 | if (!dma_release_from_contiguous(NULL, page, nr_pages)) | |
415 | __free_pages(page, pool_size_order); | |
416 | out: | |
417 | pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n", | |
418 | atomic_pool_size / 1024); | |
419 | return -ENOMEM; | |
420 | } | |
421 | ||
b6197b93 SS |
422 | /******************************************** |
423 | * The following APIs are for dummy DMA ops * | |
424 | ********************************************/ | |
425 | ||
426 | static void *__dummy_alloc(struct device *dev, size_t size, | |
427 | dma_addr_t *dma_handle, gfp_t flags, | |
428 | struct dma_attrs *attrs) | |
429 | { | |
430 | return NULL; | |
431 | } | |
432 | ||
433 | static void __dummy_free(struct device *dev, size_t size, | |
434 | void *vaddr, dma_addr_t dma_handle, | |
435 | struct dma_attrs *attrs) | |
436 | { | |
437 | } | |
438 | ||
439 | static int __dummy_mmap(struct device *dev, | |
440 | struct vm_area_struct *vma, | |
441 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
442 | struct dma_attrs *attrs) | |
443 | { | |
444 | return -ENXIO; | |
445 | } | |
446 | ||
447 | static dma_addr_t __dummy_map_page(struct device *dev, struct page *page, | |
448 | unsigned long offset, size_t size, | |
449 | enum dma_data_direction dir, | |
450 | struct dma_attrs *attrs) | |
451 | { | |
452 | return DMA_ERROR_CODE; | |
453 | } | |
454 | ||
455 | static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
456 | size_t size, enum dma_data_direction dir, | |
457 | struct dma_attrs *attrs) | |
458 | { | |
459 | } | |
460 | ||
461 | static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl, | |
462 | int nelems, enum dma_data_direction dir, | |
463 | struct dma_attrs *attrs) | |
464 | { | |
465 | return 0; | |
466 | } | |
467 | ||
468 | static void __dummy_unmap_sg(struct device *dev, | |
469 | struct scatterlist *sgl, int nelems, | |
470 | enum dma_data_direction dir, | |
471 | struct dma_attrs *attrs) | |
472 | { | |
473 | } | |
474 | ||
475 | static void __dummy_sync_single(struct device *dev, | |
476 | dma_addr_t dev_addr, size_t size, | |
477 | enum dma_data_direction dir) | |
478 | { | |
479 | } | |
480 | ||
481 | static void __dummy_sync_sg(struct device *dev, | |
482 | struct scatterlist *sgl, int nelems, | |
483 | enum dma_data_direction dir) | |
484 | { | |
485 | } | |
486 | ||
487 | static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr) | |
488 | { | |
489 | return 1; | |
490 | } | |
491 | ||
492 | static int __dummy_dma_supported(struct device *hwdev, u64 mask) | |
493 | { | |
494 | return 0; | |
495 | } | |
496 | ||
497 | struct dma_map_ops dummy_dma_ops = { | |
498 | .alloc = __dummy_alloc, | |
499 | .free = __dummy_free, | |
500 | .mmap = __dummy_mmap, | |
501 | .map_page = __dummy_map_page, | |
502 | .unmap_page = __dummy_unmap_page, | |
503 | .map_sg = __dummy_map_sg, | |
504 | .unmap_sg = __dummy_unmap_sg, | |
505 | .sync_single_for_cpu = __dummy_sync_single, | |
506 | .sync_single_for_device = __dummy_sync_single, | |
507 | .sync_sg_for_cpu = __dummy_sync_sg, | |
508 | .sync_sg_for_device = __dummy_sync_sg, | |
509 | .mapping_error = __dummy_mapping_error, | |
510 | .dma_supported = __dummy_dma_supported, | |
511 | }; | |
512 | EXPORT_SYMBOL(dummy_dma_ops); | |
513 | ||
a1e50a82 | 514 | static int __init arm64_dma_init(void) |
09b55412 | 515 | { |
1dccb598 | 516 | return atomic_pool_init(); |
d4932f9e LA |
517 | } |
518 | arch_initcall(arm64_dma_init); | |
09b55412 CM |
519 | |
520 | #define PREALLOC_DMA_DEBUG_ENTRIES 4096 | |
521 | ||
522 | static int __init dma_debug_do_init(void) | |
523 | { | |
524 | dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); | |
525 | return 0; | |
526 | } | |
527 | fs_initcall(dma_debug_do_init); | |
13b8629f RM |
528 | |
529 | ||
530 | #ifdef CONFIG_IOMMU_DMA | |
531 | #include <linux/dma-iommu.h> | |
532 | #include <linux/platform_device.h> | |
533 | #include <linux/amba/bus.h> | |
534 | ||
535 | /* Thankfully, all cache ops are by VA so we can ignore phys here */ | |
536 | static void flush_page(struct device *dev, const void *virt, phys_addr_t phys) | |
537 | { | |
538 | __dma_flush_range(virt, virt + PAGE_SIZE); | |
539 | } | |
540 | ||
541 | static void *__iommu_alloc_attrs(struct device *dev, size_t size, | |
542 | dma_addr_t *handle, gfp_t gfp, | |
543 | struct dma_attrs *attrs) | |
544 | { | |
545 | bool coherent = is_device_dma_coherent(dev); | |
546 | int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent); | |
bd1c6ff7 | 547 | size_t iosize = size; |
13b8629f RM |
548 | void *addr; |
549 | ||
550 | if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n")) | |
551 | return NULL; | |
bd1c6ff7 RM |
552 | |
553 | size = PAGE_ALIGN(size); | |
554 | ||
13b8629f RM |
555 | /* |
556 | * Some drivers rely on this, and we probably don't want the | |
557 | * possibility of stale kernel data being read by devices anyway. | |
558 | */ | |
559 | gfp |= __GFP_ZERO; | |
560 | ||
ce5c2d2c | 561 | if (gfpflags_allow_blocking(gfp)) { |
13b8629f RM |
562 | struct page **pages; |
563 | pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); | |
564 | ||
bd1c6ff7 | 565 | pages = iommu_dma_alloc(dev, iosize, gfp, ioprot, handle, |
13b8629f RM |
566 | flush_page); |
567 | if (!pages) | |
568 | return NULL; | |
569 | ||
570 | addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot, | |
571 | __builtin_return_address(0)); | |
572 | if (!addr) | |
bd1c6ff7 | 573 | iommu_dma_free(dev, pages, iosize, handle); |
13b8629f RM |
574 | } else { |
575 | struct page *page; | |
576 | /* | |
577 | * In atomic context we can't remap anything, so we'll only | |
578 | * get the virtually contiguous buffer we need by way of a | |
579 | * physically contiguous allocation. | |
580 | */ | |
581 | if (coherent) { | |
582 | page = alloc_pages(gfp, get_order(size)); | |
583 | addr = page ? page_address(page) : NULL; | |
584 | } else { | |
585 | addr = __alloc_from_pool(size, &page, gfp); | |
586 | } | |
587 | if (!addr) | |
588 | return NULL; | |
589 | ||
bd1c6ff7 | 590 | *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot); |
13b8629f RM |
591 | if (iommu_dma_mapping_error(dev, *handle)) { |
592 | if (coherent) | |
593 | __free_pages(page, get_order(size)); | |
594 | else | |
595 | __free_from_pool(addr, size); | |
596 | addr = NULL; | |
597 | } | |
598 | } | |
599 | return addr; | |
600 | } | |
601 | ||
602 | static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr, | |
603 | dma_addr_t handle, struct dma_attrs *attrs) | |
604 | { | |
bd1c6ff7 RM |
605 | size_t iosize = size; |
606 | ||
607 | size = PAGE_ALIGN(size); | |
13b8629f RM |
608 | /* |
609 | * @cpu_addr will be one of 3 things depending on how it was allocated: | |
610 | * - A remapped array of pages from iommu_dma_alloc(), for all | |
611 | * non-atomic allocations. | |
612 | * - A non-cacheable alias from the atomic pool, for atomic | |
613 | * allocations by non-coherent devices. | |
614 | * - A normal lowmem address, for atomic allocations by | |
615 | * coherent devices. | |
616 | * Hence how dodgy the below logic looks... | |
617 | */ | |
618 | if (__in_atomic_pool(cpu_addr, size)) { | |
bd1c6ff7 | 619 | iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); |
13b8629f RM |
620 | __free_from_pool(cpu_addr, size); |
621 | } else if (is_vmalloc_addr(cpu_addr)){ | |
622 | struct vm_struct *area = find_vm_area(cpu_addr); | |
623 | ||
624 | if (WARN_ON(!area || !area->pages)) | |
625 | return; | |
bd1c6ff7 | 626 | iommu_dma_free(dev, area->pages, iosize, &handle); |
13b8629f RM |
627 | dma_common_free_remap(cpu_addr, size, VM_USERMAP); |
628 | } else { | |
bd1c6ff7 | 629 | iommu_dma_unmap_page(dev, handle, iosize, 0, NULL); |
13b8629f RM |
630 | __free_pages(virt_to_page(cpu_addr), get_order(size)); |
631 | } | |
632 | } | |
633 | ||
634 | static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, | |
635 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
636 | struct dma_attrs *attrs) | |
637 | { | |
638 | struct vm_struct *area; | |
639 | int ret; | |
640 | ||
641 | vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, | |
642 | is_device_dma_coherent(dev)); | |
643 | ||
644 | if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) | |
645 | return ret; | |
646 | ||
647 | area = find_vm_area(cpu_addr); | |
648 | if (WARN_ON(!area || !area->pages)) | |
649 | return -ENXIO; | |
650 | ||
651 | return iommu_dma_mmap(area->pages, size, vma); | |
652 | } | |
653 | ||
654 | static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt, | |
655 | void *cpu_addr, dma_addr_t dma_addr, | |
656 | size_t size, struct dma_attrs *attrs) | |
657 | { | |
658 | unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; | |
659 | struct vm_struct *area = find_vm_area(cpu_addr); | |
660 | ||
661 | if (WARN_ON(!area || !area->pages)) | |
662 | return -ENXIO; | |
663 | ||
664 | return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size, | |
665 | GFP_KERNEL); | |
666 | } | |
667 | ||
668 | static void __iommu_sync_single_for_cpu(struct device *dev, | |
669 | dma_addr_t dev_addr, size_t size, | |
670 | enum dma_data_direction dir) | |
671 | { | |
672 | phys_addr_t phys; | |
673 | ||
674 | if (is_device_dma_coherent(dev)) | |
675 | return; | |
676 | ||
677 | phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); | |
678 | __dma_unmap_area(phys_to_virt(phys), size, dir); | |
679 | } | |
680 | ||
681 | static void __iommu_sync_single_for_device(struct device *dev, | |
682 | dma_addr_t dev_addr, size_t size, | |
683 | enum dma_data_direction dir) | |
684 | { | |
685 | phys_addr_t phys; | |
686 | ||
687 | if (is_device_dma_coherent(dev)) | |
688 | return; | |
689 | ||
690 | phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); | |
691 | __dma_map_area(phys_to_virt(phys), size, dir); | |
692 | } | |
693 | ||
694 | static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, | |
695 | unsigned long offset, size_t size, | |
696 | enum dma_data_direction dir, | |
697 | struct dma_attrs *attrs) | |
698 | { | |
699 | bool coherent = is_device_dma_coherent(dev); | |
700 | int prot = dma_direction_to_prot(dir, coherent); | |
701 | dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); | |
702 | ||
703 | if (!iommu_dma_mapping_error(dev, dev_addr) && | |
704 | !dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
705 | __iommu_sync_single_for_device(dev, dev_addr, size, dir); | |
706 | ||
707 | return dev_addr; | |
708 | } | |
709 | ||
710 | static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr, | |
711 | size_t size, enum dma_data_direction dir, | |
712 | struct dma_attrs *attrs) | |
713 | { | |
714 | if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
715 | __iommu_sync_single_for_cpu(dev, dev_addr, size, dir); | |
716 | ||
717 | iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs); | |
718 | } | |
719 | ||
720 | static void __iommu_sync_sg_for_cpu(struct device *dev, | |
721 | struct scatterlist *sgl, int nelems, | |
722 | enum dma_data_direction dir) | |
723 | { | |
724 | struct scatterlist *sg; | |
725 | int i; | |
726 | ||
727 | if (is_device_dma_coherent(dev)) | |
728 | return; | |
729 | ||
730 | for_each_sg(sgl, sg, nelems, i) | |
731 | __dma_unmap_area(sg_virt(sg), sg->length, dir); | |
732 | } | |
733 | ||
734 | static void __iommu_sync_sg_for_device(struct device *dev, | |
735 | struct scatterlist *sgl, int nelems, | |
736 | enum dma_data_direction dir) | |
737 | { | |
738 | struct scatterlist *sg; | |
739 | int i; | |
740 | ||
741 | if (is_device_dma_coherent(dev)) | |
742 | return; | |
743 | ||
744 | for_each_sg(sgl, sg, nelems, i) | |
745 | __dma_map_area(sg_virt(sg), sg->length, dir); | |
746 | } | |
747 | ||
748 | static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, | |
749 | int nelems, enum dma_data_direction dir, | |
750 | struct dma_attrs *attrs) | |
751 | { | |
752 | bool coherent = is_device_dma_coherent(dev); | |
753 | ||
754 | if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
755 | __iommu_sync_sg_for_device(dev, sgl, nelems, dir); | |
756 | ||
757 | return iommu_dma_map_sg(dev, sgl, nelems, | |
758 | dma_direction_to_prot(dir, coherent)); | |
759 | } | |
760 | ||
761 | static void __iommu_unmap_sg_attrs(struct device *dev, | |
762 | struct scatterlist *sgl, int nelems, | |
763 | enum dma_data_direction dir, | |
764 | struct dma_attrs *attrs) | |
765 | { | |
766 | if (!dma_get_attr(DMA_ATTR_SKIP_CPU_SYNC, attrs)) | |
767 | __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir); | |
768 | ||
769 | iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); | |
770 | } | |
771 | ||
772 | static struct dma_map_ops iommu_dma_ops = { | |
773 | .alloc = __iommu_alloc_attrs, | |
774 | .free = __iommu_free_attrs, | |
775 | .mmap = __iommu_mmap_attrs, | |
776 | .get_sgtable = __iommu_get_sgtable, | |
777 | .map_page = __iommu_map_page, | |
778 | .unmap_page = __iommu_unmap_page, | |
779 | .map_sg = __iommu_map_sg_attrs, | |
780 | .unmap_sg = __iommu_unmap_sg_attrs, | |
781 | .sync_single_for_cpu = __iommu_sync_single_for_cpu, | |
782 | .sync_single_for_device = __iommu_sync_single_for_device, | |
783 | .sync_sg_for_cpu = __iommu_sync_sg_for_cpu, | |
784 | .sync_sg_for_device = __iommu_sync_sg_for_device, | |
785 | .dma_supported = iommu_dma_supported, | |
786 | .mapping_error = iommu_dma_mapping_error, | |
787 | }; | |
788 | ||
789 | /* | |
790 | * TODO: Right now __iommu_setup_dma_ops() gets called too early to do | |
791 | * everything it needs to - the device is only partially created and the | |
792 | * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we | |
793 | * need this delayed attachment dance. Once IOMMU probe ordering is sorted | |
794 | * to move the arch_setup_dma_ops() call later, all the notifier bits below | |
795 | * become unnecessary, and will go away. | |
796 | */ | |
797 | struct iommu_dma_notifier_data { | |
798 | struct list_head list; | |
799 | struct device *dev; | |
800 | const struct iommu_ops *ops; | |
801 | u64 dma_base; | |
802 | u64 size; | |
803 | }; | |
804 | static LIST_HEAD(iommu_dma_masters); | |
805 | static DEFINE_MUTEX(iommu_dma_notifier_lock); | |
806 | ||
807 | /* | |
808 | * Temporarily "borrow" a domain feature flag to to tell if we had to resort | |
809 | * to creating our own domain here, in case we need to clean it up again. | |
810 | */ | |
811 | #define __IOMMU_DOMAIN_FAKE_DEFAULT (1U << 31) | |
812 | ||
813 | static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, | |
814 | u64 dma_base, u64 size) | |
815 | { | |
816 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | |
817 | ||
818 | /* | |
819 | * Best case: The device is either part of a group which was | |
820 | * already attached to a domain in a previous call, or it's | |
821 | * been put in a default DMA domain by the IOMMU core. | |
822 | */ | |
823 | if (!domain) { | |
824 | /* | |
825 | * Urgh. The IOMMU core isn't going to do default domains | |
826 | * for non-PCI devices anyway, until it has some means of | |
827 | * abstracting the entirely implementation-specific | |
828 | * sideband data/SoC topology/unicorn dust that may or | |
829 | * may not differentiate upstream masters. | |
830 | * So until then, HORRIBLE HACKS! | |
831 | */ | |
832 | domain = ops->domain_alloc(IOMMU_DOMAIN_DMA); | |
833 | if (!domain) | |
834 | goto out_no_domain; | |
835 | ||
836 | domain->ops = ops; | |
837 | domain->type = IOMMU_DOMAIN_DMA | __IOMMU_DOMAIN_FAKE_DEFAULT; | |
838 | ||
839 | if (iommu_attach_device(domain, dev)) | |
840 | goto out_put_domain; | |
841 | } | |
842 | ||
843 | if (iommu_dma_init_domain(domain, dma_base, size)) | |
844 | goto out_detach; | |
845 | ||
846 | dev->archdata.dma_ops = &iommu_dma_ops; | |
847 | return true; | |
848 | ||
849 | out_detach: | |
850 | iommu_detach_device(domain, dev); | |
851 | out_put_domain: | |
852 | if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) | |
853 | iommu_domain_free(domain); | |
854 | out_no_domain: | |
855 | pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", | |
856 | dev_name(dev)); | |
857 | return false; | |
858 | } | |
859 | ||
860 | static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, | |
861 | u64 dma_base, u64 size) | |
862 | { | |
863 | struct iommu_dma_notifier_data *iommudata; | |
864 | ||
865 | iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); | |
866 | if (!iommudata) | |
867 | return; | |
868 | ||
869 | iommudata->dev = dev; | |
870 | iommudata->ops = ops; | |
871 | iommudata->dma_base = dma_base; | |
872 | iommudata->size = size; | |
873 | ||
874 | mutex_lock(&iommu_dma_notifier_lock); | |
875 | list_add(&iommudata->list, &iommu_dma_masters); | |
876 | mutex_unlock(&iommu_dma_notifier_lock); | |
877 | } | |
878 | ||
879 | static int __iommu_attach_notifier(struct notifier_block *nb, | |
880 | unsigned long action, void *data) | |
881 | { | |
882 | struct iommu_dma_notifier_data *master, *tmp; | |
883 | ||
884 | if (action != BUS_NOTIFY_ADD_DEVICE) | |
885 | return 0; | |
886 | ||
887 | mutex_lock(&iommu_dma_notifier_lock); | |
888 | list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { | |
889 | if (do_iommu_attach(master->dev, master->ops, | |
890 | master->dma_base, master->size)) { | |
891 | list_del(&master->list); | |
892 | kfree(master); | |
893 | } | |
894 | } | |
895 | mutex_unlock(&iommu_dma_notifier_lock); | |
896 | return 0; | |
897 | } | |
898 | ||
a7c61a34 | 899 | static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) |
13b8629f RM |
900 | { |
901 | struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); | |
902 | int ret; | |
903 | ||
904 | if (!nb) | |
905 | return -ENOMEM; | |
906 | /* | |
907 | * The device must be attached to a domain before the driver probe | |
908 | * routine gets a chance to start allocating DMA buffers. However, | |
909 | * the IOMMU driver also needs a chance to configure the iommu_group | |
910 | * via its add_device callback first, so we need to make the attach | |
911 | * happen between those two points. Since the IOMMU core uses a bus | |
912 | * notifier with default priority for add_device, do the same but | |
913 | * with a lower priority to ensure the appropriate ordering. | |
914 | */ | |
915 | nb->notifier_call = __iommu_attach_notifier; | |
916 | nb->priority = -100; | |
917 | ||
918 | ret = bus_register_notifier(bus, nb); | |
919 | if (ret) { | |
920 | pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", | |
921 | bus->name); | |
922 | kfree(nb); | |
923 | } | |
924 | return ret; | |
925 | } | |
926 | ||
927 | static int __init __iommu_dma_init(void) | |
928 | { | |
929 | int ret; | |
930 | ||
931 | ret = iommu_dma_init(); | |
932 | if (!ret) | |
933 | ret = register_iommu_dma_ops_notifier(&platform_bus_type); | |
934 | if (!ret) | |
935 | ret = register_iommu_dma_ops_notifier(&amba_bustype); | |
936 | return ret; | |
937 | } | |
938 | arch_initcall(__iommu_dma_init); | |
939 | ||
940 | static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, | |
941 | const struct iommu_ops *ops) | |
942 | { | |
943 | struct iommu_group *group; | |
944 | ||
945 | if (!ops) | |
946 | return; | |
947 | /* | |
948 | * TODO: As a concession to the future, we're ready to handle being | |
949 | * called both early and late (i.e. after bus_add_device). Once all | |
950 | * the platform bus code is reworked to call us late and the notifier | |
951 | * junk above goes away, move the body of do_iommu_attach here. | |
952 | */ | |
953 | group = iommu_group_get(dev); | |
954 | if (group) { | |
955 | do_iommu_attach(dev, ops, dma_base, size); | |
956 | iommu_group_put(group); | |
957 | } else { | |
958 | queue_iommu_attach(dev, ops, dma_base, size); | |
959 | } | |
960 | } | |
961 | ||
876945db RM |
962 | void arch_teardown_dma_ops(struct device *dev) |
963 | { | |
964 | struct iommu_domain *domain = iommu_get_domain_for_dev(dev); | |
965 | ||
966 | if (domain) { | |
967 | iommu_detach_device(domain, dev); | |
968 | if (domain->type & __IOMMU_DOMAIN_FAKE_DEFAULT) | |
969 | iommu_domain_free(domain); | |
970 | } | |
971 | ||
972 | dev->archdata.dma_ops = NULL; | |
973 | } | |
974 | ||
13b8629f RM |
975 | #else |
976 | ||
977 | static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, | |
978 | struct iommu_ops *iommu) | |
979 | { } | |
980 | ||
981 | #endif /* CONFIG_IOMMU_DMA */ | |
982 | ||
876945db RM |
983 | void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, |
984 | struct iommu_ops *iommu, bool coherent) | |
985 | { | |
1dccb598 AB |
986 | if (!dev->archdata.dma_ops) |
987 | dev->archdata.dma_ops = &swiotlb_dma_ops; | |
876945db RM |
988 | |
989 | dev->archdata.dma_coherent = coherent; | |
990 | __iommu_setup_dma_ops(dev, dma_base, size, iommu); | |
991 | } |