Commit | Line | Data |
---|---|---|
f507758c JH |
1 | /* |
2 | * Meta version derived from arch/powerpc/lib/dma-noncoherent.c | |
3 | * Copyright (C) 2008 Imagination Technologies Ltd. | |
4 | * | |
5 | * PowerPC version derived from arch/arm/mm/consistent.c | |
6 | * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) | |
7 | * | |
8 | * Copyright (C) 2000 Russell King | |
9 | * | |
10 | * Consistent memory allocators. Used for DMA devices that want to | |
11 | * share uncached memory with the processor core. The function return | |
12 | * is the virtual address and 'dma_handle' is the physical address. | |
13 | * Mostly stolen from the ARM port, with some changes for PowerPC. | |
14 | * -- Dan | |
15 | * | |
16 | * Reorganized to get rid of the arch-specific consistent_* functions | |
17 | * and provide non-coherent implementations for the DMA API. -Matt | |
18 | * | |
19 | * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() | |
20 | * implementation. This is pulled straight from ARM and barely | |
21 | * modified. -Matt | |
22 | * | |
23 | * This program is free software; you can redistribute it and/or modify | |
24 | * it under the terms of the GNU General Public License version 2 as | |
25 | * published by the Free Software Foundation. | |
26 | */ | |
27 | ||
28 | #include <linux/sched.h> | |
29 | #include <linux/kernel.h> | |
30 | #include <linux/errno.h> | |
31 | #include <linux/export.h> | |
32 | #include <linux/string.h> | |
33 | #include <linux/types.h> | |
34 | #include <linux/highmem.h> | |
35 | #include <linux/dma-mapping.h> | |
36 | #include <linux/slab.h> | |
37 | ||
38 | #include <asm/tlbflush.h> | |
39 | #include <asm/mmu.h> | |
40 | ||
41 | #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_START) \ | |
42 | >> PAGE_SHIFT) | |
43 | ||
44 | static u64 get_coherent_dma_mask(struct device *dev) | |
45 | { | |
46 | u64 mask = ~0ULL; | |
47 | ||
48 | if (dev) { | |
49 | mask = dev->coherent_dma_mask; | |
50 | ||
51 | /* | |
52 | * Sanity check the DMA mask - it must be non-zero, and | |
53 | * must be able to be satisfied by a DMA allocation. | |
54 | */ | |
55 | if (mask == 0) { | |
56 | dev_warn(dev, "coherent DMA mask is unset\n"); | |
57 | return 0; | |
58 | } | |
59 | } | |
60 | ||
61 | return mask; | |
62 | } | |
63 | /* | |
64 | * This is the page table (2MB) covering uncached, DMA consistent allocations | |
65 | */ | |
66 | static pte_t *consistent_pte; | |
67 | static DEFINE_SPINLOCK(consistent_lock); | |
68 | ||
69 | /* | |
70 | * VM region handling support. | |
71 | * | |
72 | * This should become something generic, handling VM region allocations for | |
73 | * vmalloc and similar (ioremap, module space, etc). | |
74 | * | |
75 | * I envisage vmalloc()'s supporting vm_struct becoming: | |
76 | * | |
77 | * struct vm_struct { | |
78 | * struct metag_vm_region region; | |
79 | * unsigned long flags; | |
80 | * struct page **pages; | |
81 | * unsigned int nr_pages; | |
82 | * unsigned long phys_addr; | |
83 | * }; | |
84 | * | |
85 | * get_vm_area() would then call metag_vm_region_alloc with an appropriate | |
86 | * struct metag_vm_region head (eg): | |
87 | * | |
88 | * struct metag_vm_region vmalloc_head = { | |
89 | * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), | |
90 | * .vm_start = VMALLOC_START, | |
91 | * .vm_end = VMALLOC_END, | |
92 | * }; | |
93 | * | |
94 | * However, vmalloc_head.vm_start is variable (typically, it is dependent on | |
95 | * the amount of RAM found at boot time.) I would imagine that get_vm_area() | |
96 | * would have to initialise this each time prior to calling | |
97 | * metag_vm_region_alloc(). | |
98 | */ | |
99 | struct metag_vm_region { | |
100 | struct list_head vm_list; | |
101 | unsigned long vm_start; | |
102 | unsigned long vm_end; | |
103 | struct page *vm_pages; | |
104 | int vm_active; | |
105 | }; | |
106 | ||
107 | static struct metag_vm_region consistent_head = { | |
108 | .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), | |
109 | .vm_start = CONSISTENT_START, | |
110 | .vm_end = CONSISTENT_END, | |
111 | }; | |
112 | ||
113 | static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region | |
114 | *head, size_t size, | |
115 | gfp_t gfp) | |
116 | { | |
117 | unsigned long addr = head->vm_start, end = head->vm_end - size; | |
118 | unsigned long flags; | |
119 | struct metag_vm_region *c, *new; | |
120 | ||
121 | new = kmalloc(sizeof(struct metag_vm_region), gfp); | |
122 | if (!new) | |
123 | goto out; | |
124 | ||
125 | spin_lock_irqsave(&consistent_lock, flags); | |
126 | ||
127 | list_for_each_entry(c, &head->vm_list, vm_list) { | |
128 | if ((addr + size) < addr) | |
129 | goto nospc; | |
130 | if ((addr + size) <= c->vm_start) | |
131 | goto found; | |
132 | addr = c->vm_end; | |
133 | if (addr > end) | |
134 | goto nospc; | |
135 | } | |
136 | ||
137 | found: | |
138 | /* | |
139 | * Insert this entry _before_ the one we found. | |
140 | */ | |
141 | list_add_tail(&new->vm_list, &c->vm_list); | |
142 | new->vm_start = addr; | |
143 | new->vm_end = addr + size; | |
144 | new->vm_active = 1; | |
145 | ||
146 | spin_unlock_irqrestore(&consistent_lock, flags); | |
147 | return new; | |
148 | ||
149 | nospc: | |
150 | spin_unlock_irqrestore(&consistent_lock, flags); | |
151 | kfree(new); | |
152 | out: | |
153 | return NULL; | |
154 | } | |
155 | ||
156 | static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region | |
157 | *head, unsigned long addr) | |
158 | { | |
159 | struct metag_vm_region *c; | |
160 | ||
161 | list_for_each_entry(c, &head->vm_list, vm_list) { | |
162 | if (c->vm_active && c->vm_start == addr) | |
163 | goto out; | |
164 | } | |
165 | c = NULL; | |
166 | out: | |
167 | return c; | |
168 | } | |
169 | ||
170 | /* | |
171 | * Allocate DMA-coherent memory space and return both the kernel remapped | |
172 | * virtual and bus address for that space. | |
173 | */ | |
5348c1e9 | 174 | static void *metag_dma_alloc(struct device *dev, size_t size, |
00085f1e | 175 | dma_addr_t *handle, gfp_t gfp, unsigned long attrs) |
f507758c JH |
176 | { |
177 | struct page *page; | |
178 | struct metag_vm_region *c; | |
179 | unsigned long order; | |
180 | u64 mask = get_coherent_dma_mask(dev); | |
181 | u64 limit; | |
182 | ||
183 | if (!consistent_pte) { | |
184 | pr_err("%s: not initialised\n", __func__); | |
185 | dump_stack(); | |
186 | return NULL; | |
187 | } | |
188 | ||
189 | if (!mask) | |
190 | goto no_page; | |
191 | size = PAGE_ALIGN(size); | |
192 | limit = (mask + 1) & ~mask; | |
193 | if ((limit && size >= limit) | |
194 | || size >= (CONSISTENT_END - CONSISTENT_START)) { | |
195 | pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n", | |
196 | size, mask); | |
197 | return NULL; | |
198 | } | |
199 | ||
200 | order = get_order(size); | |
201 | ||
202 | if (mask != 0xffffffff) | |
203 | gfp |= GFP_DMA; | |
204 | ||
205 | page = alloc_pages(gfp, order); | |
206 | if (!page) | |
207 | goto no_page; | |
208 | ||
209 | /* | |
210 | * Invalidate any data that might be lurking in the | |
211 | * kernel direct-mapped region for device DMA. | |
212 | */ | |
213 | { | |
214 | void *kaddr = page_address(page); | |
215 | memset(kaddr, 0, size); | |
216 | flush_dcache_region(kaddr, size); | |
217 | } | |
218 | ||
219 | /* | |
220 | * Allocate a virtual address in the consistent mapping region. | |
221 | */ | |
222 | c = metag_vm_region_alloc(&consistent_head, size, | |
223 | gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); | |
224 | if (c) { | |
225 | unsigned long vaddr = c->vm_start; | |
226 | pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); | |
227 | struct page *end = page + (1 << order); | |
228 | ||
229 | c->vm_pages = page; | |
230 | split_page(page, order); | |
231 | ||
232 | /* | |
233 | * Set the "dma handle" | |
234 | */ | |
235 | *handle = page_to_bus(page); | |
236 | ||
237 | do { | |
238 | BUG_ON(!pte_none(*pte)); | |
239 | ||
240 | SetPageReserved(page); | |
241 | set_pte_at(&init_mm, vaddr, | |
242 | pte, mk_pte(page, | |
243 | pgprot_writecombine | |
244 | (PAGE_KERNEL))); | |
245 | page++; | |
246 | pte++; | |
247 | vaddr += PAGE_SIZE; | |
248 | } while (size -= PAGE_SIZE); | |
249 | ||
250 | /* | |
251 | * Free the otherwise unused pages. | |
252 | */ | |
253 | while (page < end) { | |
254 | __free_page(page); | |
255 | page++; | |
256 | } | |
257 | ||
258 | return (void *)c->vm_start; | |
259 | } | |
260 | ||
261 | if (page) | |
262 | __free_pages(page, order); | |
263 | no_page: | |
264 | return NULL; | |
265 | } | |
f507758c JH |
266 | |
267 | /* | |
268 | * free a page as defined by the above mapping. | |
269 | */ | |
5348c1e9 | 270 | static void metag_dma_free(struct device *dev, size_t size, void *vaddr, |
00085f1e | 271 | dma_addr_t dma_handle, unsigned long attrs) |
f507758c JH |
272 | { |
273 | struct metag_vm_region *c; | |
274 | unsigned long flags, addr; | |
275 | pte_t *ptep; | |
276 | ||
277 | size = PAGE_ALIGN(size); | |
278 | ||
279 | spin_lock_irqsave(&consistent_lock, flags); | |
280 | ||
281 | c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr); | |
282 | if (!c) | |
283 | goto no_area; | |
284 | ||
285 | c->vm_active = 0; | |
286 | if ((c->vm_end - c->vm_start) != size) { | |
287 | pr_err("%s: freeing wrong coherent size (%ld != %d)\n", | |
288 | __func__, c->vm_end - c->vm_start, size); | |
289 | dump_stack(); | |
290 | size = c->vm_end - c->vm_start; | |
291 | } | |
292 | ||
293 | ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); | |
294 | addr = c->vm_start; | |
295 | do { | |
296 | pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); | |
297 | unsigned long pfn; | |
298 | ||
299 | ptep++; | |
300 | addr += PAGE_SIZE; | |
301 | ||
302 | if (!pte_none(pte) && pte_present(pte)) { | |
303 | pfn = pte_pfn(pte); | |
304 | ||
305 | if (pfn_valid(pfn)) { | |
306 | struct page *page = pfn_to_page(pfn); | |
c1ce4b37 | 307 | __free_reserved_page(page); |
f507758c JH |
308 | continue; |
309 | } | |
310 | } | |
311 | ||
312 | pr_crit("%s: bad page in kernel page table\n", | |
313 | __func__); | |
314 | } while (size -= PAGE_SIZE); | |
315 | ||
316 | flush_tlb_kernel_range(c->vm_start, c->vm_end); | |
317 | ||
318 | list_del(&c->vm_list); | |
319 | ||
320 | spin_unlock_irqrestore(&consistent_lock, flags); | |
321 | ||
322 | kfree(c); | |
323 | return; | |
324 | ||
325 | no_area: | |
326 | spin_unlock_irqrestore(&consistent_lock, flags); | |
327 | pr_err("%s: trying to free invalid coherent area: %p\n", | |
328 | __func__, vaddr); | |
329 | dump_stack(); | |
330 | } | |
f507758c | 331 | |
5348c1e9 CH |
332 | static int metag_dma_mmap(struct device *dev, struct vm_area_struct *vma, |
333 | void *cpu_addr, dma_addr_t dma_addr, size_t size, | |
00085f1e | 334 | unsigned long attrs) |
f507758c | 335 | { |
f507758c JH |
336 | unsigned long flags, user_size, kern_size; |
337 | struct metag_vm_region *c; | |
5348c1e9 CH |
338 | int ret = -ENXIO; |
339 | ||
00085f1e | 340 | if (attrs & DMA_ATTR_WRITE_COMBINE) |
5348c1e9 CH |
341 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); |
342 | else | |
343 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | |
f507758c JH |
344 | |
345 | user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | |
346 | ||
347 | spin_lock_irqsave(&consistent_lock, flags); | |
348 | c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr); | |
349 | spin_unlock_irqrestore(&consistent_lock, flags); | |
350 | ||
351 | if (c) { | |
352 | unsigned long off = vma->vm_pgoff; | |
353 | ||
354 | kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; | |
355 | ||
356 | if (off < kern_size && | |
357 | user_size <= (kern_size - off)) { | |
358 | ret = remap_pfn_range(vma, vma->vm_start, | |
359 | page_to_pfn(c->vm_pages) + off, | |
360 | user_size << PAGE_SHIFT, | |
361 | vma->vm_page_prot); | |
362 | } | |
363 | } | |
364 | ||
365 | ||
366 | return ret; | |
367 | } | |
368 | ||
f507758c JH |
369 | /* |
370 | * Initialise the consistent memory allocation. | |
371 | */ | |
372 | static int __init dma_alloc_init(void) | |
373 | { | |
374 | pgd_t *pgd, *pgd_k; | |
375 | pud_t *pud, *pud_k; | |
376 | pmd_t *pmd, *pmd_k; | |
377 | pte_t *pte; | |
378 | int ret = 0; | |
379 | ||
380 | do { | |
381 | int offset = pgd_index(CONSISTENT_START); | |
382 | pgd = pgd_offset(&init_mm, CONSISTENT_START); | |
383 | pud = pud_alloc(&init_mm, pgd, CONSISTENT_START); | |
384 | pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START); | |
f507758c JH |
385 | WARN_ON(!pmd_none(*pmd)); |
386 | ||
387 | pte = pte_alloc_kernel(pmd, CONSISTENT_START); | |
388 | if (!pte) { | |
389 | pr_err("%s: no pte tables\n", __func__); | |
390 | ret = -ENOMEM; | |
391 | break; | |
392 | } | |
393 | ||
394 | pgd_k = ((pgd_t *) mmu_get_base()) + offset; | |
395 | pud_k = pud_offset(pgd_k, CONSISTENT_START); | |
396 | pmd_k = pmd_offset(pud_k, CONSISTENT_START); | |
397 | set_pmd(pmd_k, *pmd); | |
398 | ||
399 | consistent_pte = pte; | |
400 | } while (0); | |
401 | ||
402 | return ret; | |
403 | } | |
404 | early_initcall(dma_alloc_init); | |
405 | ||
406 | /* | |
407 | * make an area consistent to devices. | |
408 | */ | |
5348c1e9 | 409 | static void dma_sync_for_device(void *vaddr, size_t size, int dma_direction) |
f507758c JH |
410 | { |
411 | /* | |
412 | * Ensure any writes get through the write combiner. This is necessary | |
413 | * even with DMA_FROM_DEVICE, or the write may dirty the cache after | |
414 | * we've invalidated it and get written back during the DMA. | |
415 | */ | |
416 | ||
417 | barrier(); | |
418 | ||
419 | switch (dma_direction) { | |
420 | case DMA_BIDIRECTIONAL: | |
421 | /* | |
422 | * Writeback to ensure the device can see our latest changes and | |
423 | * so that we have no dirty lines, and invalidate the cache | |
424 | * lines too in preparation for receiving the buffer back | |
425 | * (dma_sync_for_cpu) later. | |
426 | */ | |
427 | flush_dcache_region(vaddr, size); | |
428 | break; | |
429 | case DMA_TO_DEVICE: | |
430 | /* | |
431 | * Writeback to ensure the device can see our latest changes. | |
432 | * There's no need to invalidate as the device shouldn't write | |
433 | * to the buffer. | |
434 | */ | |
435 | writeback_dcache_region(vaddr, size); | |
436 | break; | |
437 | case DMA_FROM_DEVICE: | |
438 | /* | |
439 | * Invalidate to ensure we have no dirty lines that could get | |
440 | * written back during the DMA. It's also safe to flush | |
441 | * (writeback) here if necessary. | |
442 | */ | |
443 | invalidate_dcache_region(vaddr, size); | |
444 | break; | |
445 | case DMA_NONE: | |
446 | BUG(); | |
447 | } | |
448 | ||
449 | wmb(); | |
450 | } | |
f507758c JH |
451 | |
452 | /* | |
453 | * make an area consistent to the core. | |
454 | */ | |
5348c1e9 | 455 | static void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction) |
f507758c JH |
456 | { |
457 | /* | |
458 | * Hardware L2 cache prefetch doesn't occur across 4K physical | |
459 | * boundaries, however according to Documentation/DMA-API-HOWTO.txt | |
460 | * kmalloc'd memory is DMA'able, so accesses in nearby memory could | |
461 | * trigger a cache fill in the DMA buffer. | |
462 | * | |
463 | * This should never cause dirty lines, so a flush or invalidate should | |
464 | * be safe to allow us to see data from the device. | |
465 | */ | |
466 | if (_meta_l2c_pf_is_enabled()) { | |
467 | switch (dma_direction) { | |
468 | case DMA_BIDIRECTIONAL: | |
469 | case DMA_FROM_DEVICE: | |
470 | invalidate_dcache_region(vaddr, size); | |
471 | break; | |
472 | case DMA_TO_DEVICE: | |
473 | /* The device shouldn't have written to the buffer */ | |
474 | break; | |
475 | case DMA_NONE: | |
476 | BUG(); | |
477 | } | |
478 | } | |
479 | ||
480 | rmb(); | |
481 | } | |
5348c1e9 CH |
482 | |
483 | static dma_addr_t metag_dma_map_page(struct device *dev, struct page *page, | |
484 | unsigned long offset, size_t size, | |
00085f1e | 485 | enum dma_data_direction direction, unsigned long attrs) |
5348c1e9 CH |
486 | { |
487 | dma_sync_for_device((void *)(page_to_phys(page) + offset), size, | |
488 | direction); | |
489 | return page_to_phys(page) + offset; | |
490 | } | |
491 | ||
492 | static void metag_dma_unmap_page(struct device *dev, dma_addr_t dma_address, | |
493 | size_t size, enum dma_data_direction direction, | |
00085f1e | 494 | unsigned long attrs) |
5348c1e9 CH |
495 | { |
496 | dma_sync_for_cpu(phys_to_virt(dma_address), size, direction); | |
497 | } | |
498 | ||
499 | static int metag_dma_map_sg(struct device *dev, struct scatterlist *sglist, | |
500 | int nents, enum dma_data_direction direction, | |
00085f1e | 501 | unsigned long attrs) |
5348c1e9 CH |
502 | { |
503 | struct scatterlist *sg; | |
504 | int i; | |
505 | ||
506 | for_each_sg(sglist, sg, nents, i) { | |
507 | BUG_ON(!sg_page(sg)); | |
508 | ||
509 | sg->dma_address = sg_phys(sg); | |
510 | dma_sync_for_device(sg_virt(sg), sg->length, direction); | |
511 | } | |
512 | ||
513 | return nents; | |
514 | } | |
515 | ||
516 | ||
517 | static void metag_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, | |
518 | int nhwentries, enum dma_data_direction direction, | |
00085f1e | 519 | unsigned long attrs) |
5348c1e9 CH |
520 | { |
521 | struct scatterlist *sg; | |
522 | int i; | |
523 | ||
524 | for_each_sg(sglist, sg, nhwentries, i) { | |
525 | BUG_ON(!sg_page(sg)); | |
526 | ||
527 | sg->dma_address = sg_phys(sg); | |
528 | dma_sync_for_cpu(sg_virt(sg), sg->length, direction); | |
529 | } | |
530 | } | |
531 | ||
532 | static void metag_dma_sync_single_for_cpu(struct device *dev, | |
533 | dma_addr_t dma_handle, size_t size, | |
534 | enum dma_data_direction direction) | |
535 | { | |
536 | dma_sync_for_cpu(phys_to_virt(dma_handle), size, direction); | |
537 | } | |
538 | ||
539 | static void metag_dma_sync_single_for_device(struct device *dev, | |
540 | dma_addr_t dma_handle, size_t size, | |
541 | enum dma_data_direction direction) | |
542 | { | |
543 | dma_sync_for_device(phys_to_virt(dma_handle), size, direction); | |
544 | } | |
545 | ||
546 | static void metag_dma_sync_sg_for_cpu(struct device *dev, | |
547 | struct scatterlist *sglist, int nelems, | |
548 | enum dma_data_direction direction) | |
549 | { | |
550 | int i; | |
551 | struct scatterlist *sg; | |
552 | ||
553 | for_each_sg(sglist, sg, nelems, i) | |
554 | dma_sync_for_cpu(sg_virt(sg), sg->length, direction); | |
555 | } | |
556 | ||
557 | static void metag_dma_sync_sg_for_device(struct device *dev, | |
558 | struct scatterlist *sglist, int nelems, | |
559 | enum dma_data_direction direction) | |
560 | { | |
561 | int i; | |
562 | struct scatterlist *sg; | |
563 | ||
564 | for_each_sg(sglist, sg, nelems, i) | |
565 | dma_sync_for_device(sg_virt(sg), sg->length, direction); | |
566 | } | |
567 | ||
568 | struct dma_map_ops metag_dma_ops = { | |
569 | .alloc = metag_dma_alloc, | |
570 | .free = metag_dma_free, | |
571 | .map_page = metag_dma_map_page, | |
572 | .map_sg = metag_dma_map_sg, | |
573 | .sync_single_for_device = metag_dma_sync_single_for_device, | |
574 | .sync_single_for_cpu = metag_dma_sync_single_for_cpu, | |
575 | .sync_sg_for_cpu = metag_dma_sync_sg_for_cpu, | |
576 | .mmap = metag_dma_mmap, | |
577 | }; | |
578 | EXPORT_SYMBOL(metag_dma_ops); |