tile: use asm-generic/bitops/builtin-*.h
[deliverable/linux.git] / arch / tile / lib / memcpy_tile64.c
CommitLineData
867e359b
CM
1/*
2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation, version 2.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
11 * NON INFRINGEMENT. See the GNU General Public License for
12 * more details.
13 */
14
15#include <linux/string.h>
16#include <linux/smp.h>
17#include <linux/module.h>
18#include <linux/uaccess.h>
19#include <asm/fixmap.h>
20#include <asm/kmap_types.h>
21#include <asm/tlbflush.h>
22#include <hv/hypervisor.h>
23#include <arch/chip.h>
24
25
26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
27
28/* Defined in memcpy.S */
29extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
30extern unsigned long __copy_to_user_inatomic_asm(
31 void __user *to, const void *from, unsigned long n);
32extern unsigned long __copy_from_user_inatomic_asm(
33 void *to, const void __user *from, unsigned long n);
34extern unsigned long __copy_from_user_zeroing_asm(
35 void *to, const void __user *from, unsigned long n);
36
37typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
38
39/* Size above which to consider TLB games for performance */
40#define LARGE_COPY_CUTOFF 2048
41
42/* Communicate to the simulator what we are trying to do. */
43#define sim_allow_multiple_caching(b) \
44 __insn_mtspr(SPR_SIM_CONTROL, \
45 SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
46
47/*
48 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
49 *
50 * We set up our own source and destination PTEs that we fully control.
51 * This is the only way to guarantee that we don't race with another
52 * thread that is modifying the PTE; we can't afford to try the
53 * copy_{to,from}_user() technique of catching the interrupt, since
54 * we must run with interrupts disabled to avoid the risk of some
55 * other code seeing the incoherent data in our cache. (Recall that
56 * our cache is indexed by PA, so even if the other code doesn't use
38a6f426 57 * our kmap_atomic virtual addresses, they'll still hit in cache using
867e359b
CM
58 * the normal VAs that aren't supposed to hit in cache.)
59 */
60static void memcpy_multicache(void *dest, const void *source,
61 pte_t dst_pte, pte_t src_pte, int len)
62{
0707ad30
CM
63 int idx;
64 unsigned long flags, newsrc, newdst;
867e359b
CM
65 pmd_t *pmdp;
66 pte_t *ptep;
38a6f426 67 int type0, type1;
bc1a298f 68 int cpu = smp_processor_id();
867e359b
CM
69
70 /*
71 * Disable interrupts so that we don't recurse into memcpy()
72 * in an interrupt handler, nor accidentally reference
73 * the PA of the source from an interrupt routine. Also
74 * notify the simulator that we're playing games so we don't
75 * generate spurious coherency warnings.
76 */
77 local_irq_save(flags);
78 sim_allow_multiple_caching(1);
79
80 /* Set up the new dest mapping */
38a6f426
CM
81 type0 = kmap_atomic_idx_push();
82 idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
867e359b
CM
83 newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
84 pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
85 ptep = pte_offset_kernel(pmdp, newdst);
86 if (pte_val(*ptep) != pte_val(dst_pte)) {
87 set_pte(ptep, dst_pte);
88 local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
89 }
90
91 /* Set up the new source mapping */
38a6f426
CM
92 type1 = kmap_atomic_idx_push();
93 idx += (type0 - type1);
867e359b
CM
94 src_pte = hv_pte_set_nc(src_pte);
95 src_pte = hv_pte_clear_writable(src_pte); /* be paranoid */
96 newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
97 pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
98 ptep = pte_offset_kernel(pmdp, newsrc);
76c567fb 99 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
867e359b
CM
100 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
101
102 /* Actually move the data. */
103 __memcpy_asm((void *)newdst, (const void *)newsrc, len);
104
105 /*
106 * Remap the source as locally-cached and not OLOC'ed so that
107 * we can inval without also invaling the remote cpu's cache.
108 * This also avoids known errata with inv'ing cacheable oloc data.
109 */
110 src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
111 src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
76c567fb 112 __set_pte(ptep, src_pte); /* set_pte() would be confused by this */
867e359b
CM
113 local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
114
115 /*
116 * Do the actual invalidation, covering the full L2 cache line
117 * at the end since __memcpy_asm() is somewhat aggressive.
118 */
119 __inv_buffer((void *)newsrc, len);
120
121 /*
122 * We're done: notify the simulator that all is back to normal,
123 * and re-enable interrupts and pre-emption.
124 */
38a6f426
CM
125 kmap_atomic_idx_pop();
126 kmap_atomic_idx_pop();
867e359b
CM
127 sim_allow_multiple_caching(0);
128 local_irq_restore(flags);
867e359b
CM
129}
130
131/*
132 * Identify large copies from remotely-cached memory, and copy them
133 * via memcpy_multicache() if they look good, otherwise fall back
134 * to the particular kind of copying passed as the memcpy_t function.
135 */
136static unsigned long fast_copy(void *dest, const void *source, int len,
137 memcpy_t func)
138{
bc1a298f
CM
139 int cpu = get_cpu();
140 unsigned long retval;
141
867e359b
CM
142 /*
143 * Check if it's big enough to bother with. We may end up doing a
144 * small copy via TLB manipulation if we're near a page boundary,
145 * but presumably we'll make it up when we hit the second page.
146 */
147 while (len >= LARGE_COPY_CUTOFF) {
148 int copy_size, bytes_left_on_page;
149 pte_t *src_ptep, *dst_ptep;
150 pte_t src_pte, dst_pte;
151 struct page *src_page, *dst_page;
152
153 /* Is the source page oloc'ed to a remote cpu? */
154retry_source:
155 src_ptep = virt_to_pte(current->mm, (unsigned long)source);
156 if (src_ptep == NULL)
157 break;
158 src_pte = *src_ptep;
159 if (!hv_pte_get_present(src_pte) ||
160 !hv_pte_get_readable(src_pte) ||
161 hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
162 break;
bc1a298f 163 if (get_remote_cache_cpu(src_pte) == cpu)
867e359b 164 break;
d5d14ed6 165 src_page = pfn_to_page(pte_pfn(src_pte));
867e359b
CM
166 get_page(src_page);
167 if (pte_val(src_pte) != pte_val(*src_ptep)) {
168 put_page(src_page);
169 goto retry_source;
170 }
171 if (pte_huge(src_pte)) {
172 /* Adjust the PTE to correspond to a small page */
d5d14ed6 173 int pfn = pte_pfn(src_pte);
867e359b
CM
174 pfn += (((unsigned long)source & (HPAGE_SIZE-1))
175 >> PAGE_SHIFT);
176 src_pte = pfn_pte(pfn, src_pte);
177 src_pte = pte_mksmall(src_pte);
178 }
179
180 /* Is the destination page writable? */
181retry_dest:
182 dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
183 if (dst_ptep == NULL) {
184 put_page(src_page);
185 break;
186 }
187 dst_pte = *dst_ptep;
188 if (!hv_pte_get_present(dst_pte) ||
189 !hv_pte_get_writable(dst_pte)) {
190 put_page(src_page);
191 break;
192 }
d5d14ed6 193 dst_page = pfn_to_page(pte_pfn(dst_pte));
867e359b
CM
194 if (dst_page == src_page) {
195 /*
196 * Source and dest are on the same page; this
197 * potentially exposes us to incoherence if any
198 * part of src and dest overlap on a cache line.
199 * Just give up rather than trying to be precise.
200 */
201 put_page(src_page);
202 break;
203 }
204 get_page(dst_page);
205 if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
206 put_page(dst_page);
207 goto retry_dest;
208 }
209 if (pte_huge(dst_pte)) {
210 /* Adjust the PTE to correspond to a small page */
d5d14ed6 211 int pfn = pte_pfn(dst_pte);
867e359b
CM
212 pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
213 >> PAGE_SHIFT);
214 dst_pte = pfn_pte(pfn, dst_pte);
215 dst_pte = pte_mksmall(dst_pte);
216 }
217
218 /* All looks good: create a cachable PTE and copy from it */
219 copy_size = len;
220 bytes_left_on_page =
221 PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
222 if (copy_size > bytes_left_on_page)
223 copy_size = bytes_left_on_page;
224 bytes_left_on_page =
225 PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
226 if (copy_size > bytes_left_on_page)
227 copy_size = bytes_left_on_page;
228 memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
229
230 /* Release the pages */
231 put_page(dst_page);
232 put_page(src_page);
233
234 /* Continue on the next page */
235 dest += copy_size;
236 source += copy_size;
237 len -= copy_size;
238 }
239
bc1a298f
CM
240 retval = func(dest, source, len);
241 put_cpu();
242 return retval;
867e359b
CM
243}
244
245void *memcpy(void *to, const void *from, __kernel_size_t n)
246{
247 if (n < LARGE_COPY_CUTOFF)
248 return (void *)__memcpy_asm(to, from, n);
249 else
250 return (void *)fast_copy(to, from, n, __memcpy_asm);
251}
252
253unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
254 unsigned long n)
255{
256 if (n < LARGE_COPY_CUTOFF)
257 return __copy_to_user_inatomic_asm(to, from, n);
258 else
259 return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
260}
261
262unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
263 unsigned long n)
264{
265 if (n < LARGE_COPY_CUTOFF)
266 return __copy_from_user_inatomic_asm(to, from, n);
267 else
268 return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
269}
270
271unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
272 unsigned long n)
273{
274 if (n < LARGE_COPY_CUTOFF)
275 return __copy_from_user_zeroing_asm(to, from, n);
276 else
277 return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
278}
279
280#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
This page took 0.179649 seconds and 5 git commands to generate.