Commit | Line | Data |
---|---|---|
f507758c JH |
1 | /* |
2 | * Meta version derived from arch/powerpc/lib/dma-noncoherent.c | |
3 | * Copyright (C) 2008 Imagination Technologies Ltd. | |
4 | * | |
5 | * PowerPC version derived from arch/arm/mm/consistent.c | |
6 | * Copyright (C) 2001 Dan Malek (dmalek@jlc.net) | |
7 | * | |
8 | * Copyright (C) 2000 Russell King | |
9 | * | |
10 | * Consistent memory allocators. Used for DMA devices that want to | |
11 | * share uncached memory with the processor core. The function return | |
12 | * is the virtual address and 'dma_handle' is the physical address. | |
13 | * Mostly stolen from the ARM port, with some changes for PowerPC. | |
14 | * -- Dan | |
15 | * | |
16 | * Reorganized to get rid of the arch-specific consistent_* functions | |
17 | * and provide non-coherent implementations for the DMA API. -Matt | |
18 | * | |
19 | * Added in_interrupt() safe dma_alloc_coherent()/dma_free_coherent() | |
20 | * implementation. This is pulled straight from ARM and barely | |
21 | * modified. -Matt | |
22 | * | |
23 | * This program is free software; you can redistribute it and/or modify | |
24 | * it under the terms of the GNU General Public License version 2 as | |
25 | * published by the Free Software Foundation. | |
26 | */ | |
27 | ||
28 | #include <linux/sched.h> | |
29 | #include <linux/kernel.h> | |
30 | #include <linux/errno.h> | |
31 | #include <linux/export.h> | |
32 | #include <linux/string.h> | |
33 | #include <linux/types.h> | |
34 | #include <linux/highmem.h> | |
35 | #include <linux/dma-mapping.h> | |
36 | #include <linux/slab.h> | |
37 | ||
38 | #include <asm/tlbflush.h> | |
39 | #include <asm/mmu.h> | |
40 | ||
41 | #define CONSISTENT_OFFSET(x) (((unsigned long)(x) - CONSISTENT_START) \ | |
42 | >> PAGE_SHIFT) | |
43 | ||
44 | static u64 get_coherent_dma_mask(struct device *dev) | |
45 | { | |
46 | u64 mask = ~0ULL; | |
47 | ||
48 | if (dev) { | |
49 | mask = dev->coherent_dma_mask; | |
50 | ||
51 | /* | |
52 | * Sanity check the DMA mask - it must be non-zero, and | |
53 | * must be able to be satisfied by a DMA allocation. | |
54 | */ | |
55 | if (mask == 0) { | |
56 | dev_warn(dev, "coherent DMA mask is unset\n"); | |
57 | return 0; | |
58 | } | |
59 | } | |
60 | ||
61 | return mask; | |
62 | } | |
63 | /* | |
64 | * This is the page table (2MB) covering uncached, DMA consistent allocations | |
65 | */ | |
66 | static pte_t *consistent_pte; | |
67 | static DEFINE_SPINLOCK(consistent_lock); | |
68 | ||
69 | /* | |
70 | * VM region handling support. | |
71 | * | |
72 | * This should become something generic, handling VM region allocations for | |
73 | * vmalloc and similar (ioremap, module space, etc). | |
74 | * | |
75 | * I envisage vmalloc()'s supporting vm_struct becoming: | |
76 | * | |
77 | * struct vm_struct { | |
78 | * struct metag_vm_region region; | |
79 | * unsigned long flags; | |
80 | * struct page **pages; | |
81 | * unsigned int nr_pages; | |
82 | * unsigned long phys_addr; | |
83 | * }; | |
84 | * | |
85 | * get_vm_area() would then call metag_vm_region_alloc with an appropriate | |
86 | * struct metag_vm_region head (eg): | |
87 | * | |
88 | * struct metag_vm_region vmalloc_head = { | |
89 | * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list), | |
90 | * .vm_start = VMALLOC_START, | |
91 | * .vm_end = VMALLOC_END, | |
92 | * }; | |
93 | * | |
94 | * However, vmalloc_head.vm_start is variable (typically, it is dependent on | |
95 | * the amount of RAM found at boot time.) I would imagine that get_vm_area() | |
96 | * would have to initialise this each time prior to calling | |
97 | * metag_vm_region_alloc(). | |
98 | */ | |
99 | struct metag_vm_region { | |
100 | struct list_head vm_list; | |
101 | unsigned long vm_start; | |
102 | unsigned long vm_end; | |
103 | struct page *vm_pages; | |
104 | int vm_active; | |
105 | }; | |
106 | ||
107 | static struct metag_vm_region consistent_head = { | |
108 | .vm_list = LIST_HEAD_INIT(consistent_head.vm_list), | |
109 | .vm_start = CONSISTENT_START, | |
110 | .vm_end = CONSISTENT_END, | |
111 | }; | |
112 | ||
113 | static struct metag_vm_region *metag_vm_region_alloc(struct metag_vm_region | |
114 | *head, size_t size, | |
115 | gfp_t gfp) | |
116 | { | |
117 | unsigned long addr = head->vm_start, end = head->vm_end - size; | |
118 | unsigned long flags; | |
119 | struct metag_vm_region *c, *new; | |
120 | ||
121 | new = kmalloc(sizeof(struct metag_vm_region), gfp); | |
122 | if (!new) | |
123 | goto out; | |
124 | ||
125 | spin_lock_irqsave(&consistent_lock, flags); | |
126 | ||
127 | list_for_each_entry(c, &head->vm_list, vm_list) { | |
128 | if ((addr + size) < addr) | |
129 | goto nospc; | |
130 | if ((addr + size) <= c->vm_start) | |
131 | goto found; | |
132 | addr = c->vm_end; | |
133 | if (addr > end) | |
134 | goto nospc; | |
135 | } | |
136 | ||
137 | found: | |
138 | /* | |
139 | * Insert this entry _before_ the one we found. | |
140 | */ | |
141 | list_add_tail(&new->vm_list, &c->vm_list); | |
142 | new->vm_start = addr; | |
143 | new->vm_end = addr + size; | |
144 | new->vm_active = 1; | |
145 | ||
146 | spin_unlock_irqrestore(&consistent_lock, flags); | |
147 | return new; | |
148 | ||
149 | nospc: | |
150 | spin_unlock_irqrestore(&consistent_lock, flags); | |
151 | kfree(new); | |
152 | out: | |
153 | return NULL; | |
154 | } | |
155 | ||
156 | static struct metag_vm_region *metag_vm_region_find(struct metag_vm_region | |
157 | *head, unsigned long addr) | |
158 | { | |
159 | struct metag_vm_region *c; | |
160 | ||
161 | list_for_each_entry(c, &head->vm_list, vm_list) { | |
162 | if (c->vm_active && c->vm_start == addr) | |
163 | goto out; | |
164 | } | |
165 | c = NULL; | |
166 | out: | |
167 | return c; | |
168 | } | |
169 | ||
170 | /* | |
171 | * Allocate DMA-coherent memory space and return both the kernel remapped | |
172 | * virtual and bus address for that space. | |
173 | */ | |
174 | void *dma_alloc_coherent(struct device *dev, size_t size, | |
175 | dma_addr_t *handle, gfp_t gfp) | |
176 | { | |
177 | struct page *page; | |
178 | struct metag_vm_region *c; | |
179 | unsigned long order; | |
180 | u64 mask = get_coherent_dma_mask(dev); | |
181 | u64 limit; | |
182 | ||
183 | if (!consistent_pte) { | |
184 | pr_err("%s: not initialised\n", __func__); | |
185 | dump_stack(); | |
186 | return NULL; | |
187 | } | |
188 | ||
189 | if (!mask) | |
190 | goto no_page; | |
191 | size = PAGE_ALIGN(size); | |
192 | limit = (mask + 1) & ~mask; | |
193 | if ((limit && size >= limit) | |
194 | || size >= (CONSISTENT_END - CONSISTENT_START)) { | |
195 | pr_warn("coherent allocation too big (requested %#x mask %#Lx)\n", | |
196 | size, mask); | |
197 | return NULL; | |
198 | } | |
199 | ||
200 | order = get_order(size); | |
201 | ||
202 | if (mask != 0xffffffff) | |
203 | gfp |= GFP_DMA; | |
204 | ||
205 | page = alloc_pages(gfp, order); | |
206 | if (!page) | |
207 | goto no_page; | |
208 | ||
209 | /* | |
210 | * Invalidate any data that might be lurking in the | |
211 | * kernel direct-mapped region for device DMA. | |
212 | */ | |
213 | { | |
214 | void *kaddr = page_address(page); | |
215 | memset(kaddr, 0, size); | |
216 | flush_dcache_region(kaddr, size); | |
217 | } | |
218 | ||
219 | /* | |
220 | * Allocate a virtual address in the consistent mapping region. | |
221 | */ | |
222 | c = metag_vm_region_alloc(&consistent_head, size, | |
223 | gfp & ~(__GFP_DMA | __GFP_HIGHMEM)); | |
224 | if (c) { | |
225 | unsigned long vaddr = c->vm_start; | |
226 | pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr); | |
227 | struct page *end = page + (1 << order); | |
228 | ||
229 | c->vm_pages = page; | |
230 | split_page(page, order); | |
231 | ||
232 | /* | |
233 | * Set the "dma handle" | |
234 | */ | |
235 | *handle = page_to_bus(page); | |
236 | ||
237 | do { | |
238 | BUG_ON(!pte_none(*pte)); | |
239 | ||
240 | SetPageReserved(page); | |
241 | set_pte_at(&init_mm, vaddr, | |
242 | pte, mk_pte(page, | |
243 | pgprot_writecombine | |
244 | (PAGE_KERNEL))); | |
245 | page++; | |
246 | pte++; | |
247 | vaddr += PAGE_SIZE; | |
248 | } while (size -= PAGE_SIZE); | |
249 | ||
250 | /* | |
251 | * Free the otherwise unused pages. | |
252 | */ | |
253 | while (page < end) { | |
254 | __free_page(page); | |
255 | page++; | |
256 | } | |
257 | ||
258 | return (void *)c->vm_start; | |
259 | } | |
260 | ||
261 | if (page) | |
262 | __free_pages(page, order); | |
263 | no_page: | |
264 | return NULL; | |
265 | } | |
266 | EXPORT_SYMBOL(dma_alloc_coherent); | |
267 | ||
268 | /* | |
269 | * free a page as defined by the above mapping. | |
270 | */ | |
271 | void dma_free_coherent(struct device *dev, size_t size, | |
272 | void *vaddr, dma_addr_t dma_handle) | |
273 | { | |
274 | struct metag_vm_region *c; | |
275 | unsigned long flags, addr; | |
276 | pte_t *ptep; | |
277 | ||
278 | size = PAGE_ALIGN(size); | |
279 | ||
280 | spin_lock_irqsave(&consistent_lock, flags); | |
281 | ||
282 | c = metag_vm_region_find(&consistent_head, (unsigned long)vaddr); | |
283 | if (!c) | |
284 | goto no_area; | |
285 | ||
286 | c->vm_active = 0; | |
287 | if ((c->vm_end - c->vm_start) != size) { | |
288 | pr_err("%s: freeing wrong coherent size (%ld != %d)\n", | |
289 | __func__, c->vm_end - c->vm_start, size); | |
290 | dump_stack(); | |
291 | size = c->vm_end - c->vm_start; | |
292 | } | |
293 | ||
294 | ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start); | |
295 | addr = c->vm_start; | |
296 | do { | |
297 | pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep); | |
298 | unsigned long pfn; | |
299 | ||
300 | ptep++; | |
301 | addr += PAGE_SIZE; | |
302 | ||
303 | if (!pte_none(pte) && pte_present(pte)) { | |
304 | pfn = pte_pfn(pte); | |
305 | ||
306 | if (pfn_valid(pfn)) { | |
307 | struct page *page = pfn_to_page(pfn); | |
308 | ClearPageReserved(page); | |
309 | ||
310 | __free_page(page); | |
311 | continue; | |
312 | } | |
313 | } | |
314 | ||
315 | pr_crit("%s: bad page in kernel page table\n", | |
316 | __func__); | |
317 | } while (size -= PAGE_SIZE); | |
318 | ||
319 | flush_tlb_kernel_range(c->vm_start, c->vm_end); | |
320 | ||
321 | list_del(&c->vm_list); | |
322 | ||
323 | spin_unlock_irqrestore(&consistent_lock, flags); | |
324 | ||
325 | kfree(c); | |
326 | return; | |
327 | ||
328 | no_area: | |
329 | spin_unlock_irqrestore(&consistent_lock, flags); | |
330 | pr_err("%s: trying to free invalid coherent area: %p\n", | |
331 | __func__, vaddr); | |
332 | dump_stack(); | |
333 | } | |
334 | EXPORT_SYMBOL(dma_free_coherent); | |
335 | ||
336 | ||
337 | static int dma_mmap(struct device *dev, struct vm_area_struct *vma, | |
338 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | |
339 | { | |
340 | int ret = -ENXIO; | |
341 | ||
342 | unsigned long flags, user_size, kern_size; | |
343 | struct metag_vm_region *c; | |
344 | ||
345 | user_size = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | |
346 | ||
347 | spin_lock_irqsave(&consistent_lock, flags); | |
348 | c = metag_vm_region_find(&consistent_head, (unsigned long)cpu_addr); | |
349 | spin_unlock_irqrestore(&consistent_lock, flags); | |
350 | ||
351 | if (c) { | |
352 | unsigned long off = vma->vm_pgoff; | |
353 | ||
354 | kern_size = (c->vm_end - c->vm_start) >> PAGE_SHIFT; | |
355 | ||
356 | if (off < kern_size && | |
357 | user_size <= (kern_size - off)) { | |
358 | ret = remap_pfn_range(vma, vma->vm_start, | |
359 | page_to_pfn(c->vm_pages) + off, | |
360 | user_size << PAGE_SHIFT, | |
361 | vma->vm_page_prot); | |
362 | } | |
363 | } | |
364 | ||
365 | ||
366 | return ret; | |
367 | } | |
368 | ||
369 | int dma_mmap_coherent(struct device *dev, struct vm_area_struct *vma, | |
370 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | |
371 | { | |
372 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | |
373 | return dma_mmap(dev, vma, cpu_addr, dma_addr, size); | |
374 | } | |
375 | EXPORT_SYMBOL(dma_mmap_coherent); | |
376 | ||
377 | int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma, | |
378 | void *cpu_addr, dma_addr_t dma_addr, size_t size) | |
379 | { | |
380 | vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); | |
381 | return dma_mmap(dev, vma, cpu_addr, dma_addr, size); | |
382 | } | |
383 | EXPORT_SYMBOL(dma_mmap_writecombine); | |
384 | ||
385 | ||
386 | ||
387 | ||
388 | /* | |
389 | * Initialise the consistent memory allocation. | |
390 | */ | |
391 | static int __init dma_alloc_init(void) | |
392 | { | |
393 | pgd_t *pgd, *pgd_k; | |
394 | pud_t *pud, *pud_k; | |
395 | pmd_t *pmd, *pmd_k; | |
396 | pte_t *pte; | |
397 | int ret = 0; | |
398 | ||
399 | do { | |
400 | int offset = pgd_index(CONSISTENT_START); | |
401 | pgd = pgd_offset(&init_mm, CONSISTENT_START); | |
402 | pud = pud_alloc(&init_mm, pgd, CONSISTENT_START); | |
403 | pmd = pmd_alloc(&init_mm, pud, CONSISTENT_START); | |
404 | if (!pmd) { | |
405 | pr_err("%s: no pmd tables\n", __func__); | |
406 | ret = -ENOMEM; | |
407 | break; | |
408 | } | |
409 | WARN_ON(!pmd_none(*pmd)); | |
410 | ||
411 | pte = pte_alloc_kernel(pmd, CONSISTENT_START); | |
412 | if (!pte) { | |
413 | pr_err("%s: no pte tables\n", __func__); | |
414 | ret = -ENOMEM; | |
415 | break; | |
416 | } | |
417 | ||
418 | pgd_k = ((pgd_t *) mmu_get_base()) + offset; | |
419 | pud_k = pud_offset(pgd_k, CONSISTENT_START); | |
420 | pmd_k = pmd_offset(pud_k, CONSISTENT_START); | |
421 | set_pmd(pmd_k, *pmd); | |
422 | ||
423 | consistent_pte = pte; | |
424 | } while (0); | |
425 | ||
426 | return ret; | |
427 | } | |
428 | early_initcall(dma_alloc_init); | |
429 | ||
430 | /* | |
431 | * make an area consistent to devices. | |
432 | */ | |
433 | void dma_sync_for_device(void *vaddr, size_t size, int dma_direction) | |
434 | { | |
435 | /* | |
436 | * Ensure any writes get through the write combiner. This is necessary | |
437 | * even with DMA_FROM_DEVICE, or the write may dirty the cache after | |
438 | * we've invalidated it and get written back during the DMA. | |
439 | */ | |
440 | ||
441 | barrier(); | |
442 | ||
443 | switch (dma_direction) { | |
444 | case DMA_BIDIRECTIONAL: | |
445 | /* | |
446 | * Writeback to ensure the device can see our latest changes and | |
447 | * so that we have no dirty lines, and invalidate the cache | |
448 | * lines too in preparation for receiving the buffer back | |
449 | * (dma_sync_for_cpu) later. | |
450 | */ | |
451 | flush_dcache_region(vaddr, size); | |
452 | break; | |
453 | case DMA_TO_DEVICE: | |
454 | /* | |
455 | * Writeback to ensure the device can see our latest changes. | |
456 | * There's no need to invalidate as the device shouldn't write | |
457 | * to the buffer. | |
458 | */ | |
459 | writeback_dcache_region(vaddr, size); | |
460 | break; | |
461 | case DMA_FROM_DEVICE: | |
462 | /* | |
463 | * Invalidate to ensure we have no dirty lines that could get | |
464 | * written back during the DMA. It's also safe to flush | |
465 | * (writeback) here if necessary. | |
466 | */ | |
467 | invalidate_dcache_region(vaddr, size); | |
468 | break; | |
469 | case DMA_NONE: | |
470 | BUG(); | |
471 | } | |
472 | ||
473 | wmb(); | |
474 | } | |
475 | EXPORT_SYMBOL(dma_sync_for_device); | |
476 | ||
477 | /* | |
478 | * make an area consistent to the core. | |
479 | */ | |
480 | void dma_sync_for_cpu(void *vaddr, size_t size, int dma_direction) | |
481 | { | |
482 | /* | |
483 | * Hardware L2 cache prefetch doesn't occur across 4K physical | |
484 | * boundaries, however according to Documentation/DMA-API-HOWTO.txt | |
485 | * kmalloc'd memory is DMA'able, so accesses in nearby memory could | |
486 | * trigger a cache fill in the DMA buffer. | |
487 | * | |
488 | * This should never cause dirty lines, so a flush or invalidate should | |
489 | * be safe to allow us to see data from the device. | |
490 | */ | |
491 | if (_meta_l2c_pf_is_enabled()) { | |
492 | switch (dma_direction) { | |
493 | case DMA_BIDIRECTIONAL: | |
494 | case DMA_FROM_DEVICE: | |
495 | invalidate_dcache_region(vaddr, size); | |
496 | break; | |
497 | case DMA_TO_DEVICE: | |
498 | /* The device shouldn't have written to the buffer */ | |
499 | break; | |
500 | case DMA_NONE: | |
501 | BUG(); | |
502 | } | |
503 | } | |
504 | ||
505 | rmb(); | |
506 | } | |
507 | EXPORT_SYMBOL(dma_sync_for_cpu); |