Commit | Line | Data |
---|---|---|
1965aae3 PA |
1 | #ifndef _ASM_X86_XOR_32_H |
2 | #define _ASM_X86_XOR_32_H | |
0db125c4 | 3 | |
1da177e4 | 4 | /* |
e8f6e3f8 | 5 | * Optimized RAID-5 checksumming functions for MMX. |
1da177e4 LT |
6 | * |
7 | * This program is free software; you can redistribute it and/or modify | |
8 | * it under the terms of the GNU General Public License as published by | |
9 | * the Free Software Foundation; either version 2, or (at your option) | |
10 | * any later version. | |
11 | * | |
12 | * You should have received a copy of the GNU General Public License | |
13 | * (for example /usr/src/linux/COPYING); if not, write to the Free | |
14 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
15 | */ | |
16 | ||
17 | /* | |
18 | * High-speed RAID5 checksumming functions utilizing MMX instructions. | |
19 | * Copyright (C) 1998 Ingo Molnar. | |
20 | */ | |
21 | ||
8fdf7655 JP |
22 | #define LD(x, y) " movq 8*("#x")(%1), %%mm"#y" ;\n" |
23 | #define ST(x, y) " movq %%mm"#y", 8*("#x")(%1) ;\n" | |
24 | #define XO1(x, y) " pxor 8*("#x")(%2), %%mm"#y" ;\n" | |
25 | #define XO2(x, y) " pxor 8*("#x")(%3), %%mm"#y" ;\n" | |
26 | #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" | |
27 | #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" | |
1da177e4 LT |
28 | |
29 | #include <asm/i387.h> | |
30 | ||
31 | static void | |
32 | xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
33 | { | |
34 | unsigned long lines = bytes >> 7; | |
35 | ||
36 | kernel_fpu_begin(); | |
37 | ||
8fdf7655 | 38 | asm volatile( |
1da177e4 | 39 | #undef BLOCK |
8fdf7655 JP |
40 | #define BLOCK(i) \ |
41 | LD(i, 0) \ | |
42 | LD(i + 1, 1) \ | |
43 | LD(i + 2, 2) \ | |
44 | LD(i + 3, 3) \ | |
45 | XO1(i, 0) \ | |
46 | ST(i, 0) \ | |
47 | XO1(i+1, 1) \ | |
48 | ST(i+1, 1) \ | |
49 | XO1(i + 2, 2) \ | |
50 | ST(i + 2, 2) \ | |
51 | XO1(i + 3, 3) \ | |
52 | ST(i + 3, 3) | |
1da177e4 LT |
53 | |
54 | " .align 32 ;\n" | |
8fdf7655 | 55 | " 1: ;\n" |
1da177e4 LT |
56 | |
57 | BLOCK(0) | |
58 | BLOCK(4) | |
59 | BLOCK(8) | |
60 | BLOCK(12) | |
61 | ||
62 | " addl $128, %1 ;\n" | |
63 | " addl $128, %2 ;\n" | |
64 | " decl %0 ;\n" | |
65 | " jnz 1b ;\n" | |
66 | : "+r" (lines), | |
67 | "+r" (p1), "+r" (p2) | |
68 | : | |
69 | : "memory"); | |
70 | ||
71 | kernel_fpu_end(); | |
72 | } | |
73 | ||
74 | static void | |
75 | xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
76 | unsigned long *p3) | |
77 | { | |
78 | unsigned long lines = bytes >> 7; | |
79 | ||
80 | kernel_fpu_begin(); | |
81 | ||
8fdf7655 | 82 | asm volatile( |
1da177e4 | 83 | #undef BLOCK |
8fdf7655 JP |
84 | #define BLOCK(i) \ |
85 | LD(i, 0) \ | |
86 | LD(i + 1, 1) \ | |
87 | LD(i + 2, 2) \ | |
88 | LD(i + 3, 3) \ | |
89 | XO1(i, 0) \ | |
90 | XO1(i + 1, 1) \ | |
91 | XO1(i + 2, 2) \ | |
92 | XO1(i + 3, 3) \ | |
93 | XO2(i, 0) \ | |
94 | ST(i, 0) \ | |
95 | XO2(i + 1, 1) \ | |
96 | ST(i + 1, 1) \ | |
97 | XO2(i + 2, 2) \ | |
98 | ST(i + 2, 2) \ | |
99 | XO2(i + 3, 3) \ | |
100 | ST(i + 3, 3) | |
1da177e4 LT |
101 | |
102 | " .align 32 ;\n" | |
103 | " 1: ;\n" | |
104 | ||
105 | BLOCK(0) | |
106 | BLOCK(4) | |
107 | BLOCK(8) | |
108 | BLOCK(12) | |
109 | ||
110 | " addl $128, %1 ;\n" | |
111 | " addl $128, %2 ;\n" | |
112 | " addl $128, %3 ;\n" | |
113 | " decl %0 ;\n" | |
114 | " jnz 1b ;\n" | |
115 | : "+r" (lines), | |
116 | "+r" (p1), "+r" (p2), "+r" (p3) | |
117 | : | |
118 | : "memory"); | |
119 | ||
120 | kernel_fpu_end(); | |
121 | } | |
122 | ||
123 | static void | |
124 | xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
125 | unsigned long *p3, unsigned long *p4) | |
126 | { | |
127 | unsigned long lines = bytes >> 7; | |
128 | ||
129 | kernel_fpu_begin(); | |
130 | ||
8fdf7655 | 131 | asm volatile( |
1da177e4 | 132 | #undef BLOCK |
8fdf7655 JP |
133 | #define BLOCK(i) \ |
134 | LD(i, 0) \ | |
135 | LD(i + 1, 1) \ | |
136 | LD(i + 2, 2) \ | |
137 | LD(i + 3, 3) \ | |
138 | XO1(i, 0) \ | |
139 | XO1(i + 1, 1) \ | |
140 | XO1(i + 2, 2) \ | |
141 | XO1(i + 3, 3) \ | |
142 | XO2(i, 0) \ | |
143 | XO2(i + 1, 1) \ | |
144 | XO2(i + 2, 2) \ | |
145 | XO2(i + 3, 3) \ | |
146 | XO3(i, 0) \ | |
147 | ST(i, 0) \ | |
148 | XO3(i + 1, 1) \ | |
149 | ST(i + 1, 1) \ | |
150 | XO3(i + 2, 2) \ | |
151 | ST(i + 2, 2) \ | |
152 | XO3(i + 3, 3) \ | |
153 | ST(i + 3, 3) | |
1da177e4 LT |
154 | |
155 | " .align 32 ;\n" | |
156 | " 1: ;\n" | |
157 | ||
158 | BLOCK(0) | |
159 | BLOCK(4) | |
160 | BLOCK(8) | |
161 | BLOCK(12) | |
162 | ||
163 | " addl $128, %1 ;\n" | |
164 | " addl $128, %2 ;\n" | |
165 | " addl $128, %3 ;\n" | |
166 | " addl $128, %4 ;\n" | |
167 | " decl %0 ;\n" | |
168 | " jnz 1b ;\n" | |
169 | : "+r" (lines), | |
170 | "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) | |
171 | : | |
172 | : "memory"); | |
173 | ||
174 | kernel_fpu_end(); | |
175 | } | |
176 | ||
177 | ||
178 | static void | |
179 | xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
180 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
181 | { | |
182 | unsigned long lines = bytes >> 7; | |
183 | ||
184 | kernel_fpu_begin(); | |
185 | ||
186 | /* Make sure GCC forgets anything it knows about p4 or p5, | |
187 | such that it won't pass to the asm volatile below a | |
188 | register that is shared with any other variable. That's | |
189 | because we modify p4 and p5 there, but we can't mark them | |
190 | as read/write, otherwise we'd overflow the 10-asm-operands | |
191 | limit of GCC < 3.1. */ | |
8fdf7655 | 192 | asm("" : "+r" (p4), "+r" (p5)); |
1da177e4 | 193 | |
8fdf7655 | 194 | asm volatile( |
1da177e4 | 195 | #undef BLOCK |
8fdf7655 JP |
196 | #define BLOCK(i) \ |
197 | LD(i, 0) \ | |
198 | LD(i + 1, 1) \ | |
199 | LD(i + 2, 2) \ | |
200 | LD(i + 3, 3) \ | |
201 | XO1(i, 0) \ | |
202 | XO1(i + 1, 1) \ | |
203 | XO1(i + 2, 2) \ | |
204 | XO1(i + 3, 3) \ | |
205 | XO2(i, 0) \ | |
206 | XO2(i + 1, 1) \ | |
207 | XO2(i + 2, 2) \ | |
208 | XO2(i + 3, 3) \ | |
209 | XO3(i, 0) \ | |
210 | XO3(i + 1, 1) \ | |
211 | XO3(i + 2, 2) \ | |
212 | XO3(i + 3, 3) \ | |
213 | XO4(i, 0) \ | |
214 | ST(i, 0) \ | |
215 | XO4(i + 1, 1) \ | |
216 | ST(i + 1, 1) \ | |
217 | XO4(i + 2, 2) \ | |
218 | ST(i + 2, 2) \ | |
219 | XO4(i + 3, 3) \ | |
220 | ST(i + 3, 3) | |
1da177e4 LT |
221 | |
222 | " .align 32 ;\n" | |
223 | " 1: ;\n" | |
224 | ||
225 | BLOCK(0) | |
226 | BLOCK(4) | |
227 | BLOCK(8) | |
228 | BLOCK(12) | |
229 | ||
230 | " addl $128, %1 ;\n" | |
231 | " addl $128, %2 ;\n" | |
232 | " addl $128, %3 ;\n" | |
233 | " addl $128, %4 ;\n" | |
234 | " addl $128, %5 ;\n" | |
235 | " decl %0 ;\n" | |
236 | " jnz 1b ;\n" | |
237 | : "+r" (lines), | |
238 | "+r" (p1), "+r" (p2), "+r" (p3) | |
8fdf7655 | 239 | : "r" (p4), "r" (p5) |
1da177e4 LT |
240 | : "memory"); |
241 | ||
242 | /* p4 and p5 were modified, and now the variables are dead. | |
243 | Clobber them just to be sure nobody does something stupid | |
244 | like assuming they have some legal value. */ | |
8fdf7655 | 245 | asm("" : "=r" (p4), "=r" (p5)); |
1da177e4 LT |
246 | |
247 | kernel_fpu_end(); | |
248 | } | |
249 | ||
250 | #undef LD | |
251 | #undef XO1 | |
252 | #undef XO2 | |
253 | #undef XO3 | |
254 | #undef XO4 | |
255 | #undef ST | |
256 | #undef BLOCK | |
257 | ||
258 | static void | |
259 | xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
260 | { | |
261 | unsigned long lines = bytes >> 6; | |
262 | ||
263 | kernel_fpu_begin(); | |
264 | ||
8fdf7655 | 265 | asm volatile( |
1da177e4 LT |
266 | " .align 32 ;\n" |
267 | " 1: ;\n" | |
268 | " movq (%1), %%mm0 ;\n" | |
269 | " movq 8(%1), %%mm1 ;\n" | |
270 | " pxor (%2), %%mm0 ;\n" | |
271 | " movq 16(%1), %%mm2 ;\n" | |
272 | " movq %%mm0, (%1) ;\n" | |
273 | " pxor 8(%2), %%mm1 ;\n" | |
274 | " movq 24(%1), %%mm3 ;\n" | |
275 | " movq %%mm1, 8(%1) ;\n" | |
276 | " pxor 16(%2), %%mm2 ;\n" | |
277 | " movq 32(%1), %%mm4 ;\n" | |
278 | " movq %%mm2, 16(%1) ;\n" | |
279 | " pxor 24(%2), %%mm3 ;\n" | |
280 | " movq 40(%1), %%mm5 ;\n" | |
281 | " movq %%mm3, 24(%1) ;\n" | |
282 | " pxor 32(%2), %%mm4 ;\n" | |
283 | " movq 48(%1), %%mm6 ;\n" | |
284 | " movq %%mm4, 32(%1) ;\n" | |
285 | " pxor 40(%2), %%mm5 ;\n" | |
286 | " movq 56(%1), %%mm7 ;\n" | |
287 | " movq %%mm5, 40(%1) ;\n" | |
288 | " pxor 48(%2), %%mm6 ;\n" | |
289 | " pxor 56(%2), %%mm7 ;\n" | |
290 | " movq %%mm6, 48(%1) ;\n" | |
291 | " movq %%mm7, 56(%1) ;\n" | |
8fdf7655 | 292 | |
1da177e4 LT |
293 | " addl $64, %1 ;\n" |
294 | " addl $64, %2 ;\n" | |
295 | " decl %0 ;\n" | |
296 | " jnz 1b ;\n" | |
297 | : "+r" (lines), | |
298 | "+r" (p1), "+r" (p2) | |
299 | : | |
300 | : "memory"); | |
301 | ||
302 | kernel_fpu_end(); | |
303 | } | |
304 | ||
305 | static void | |
306 | xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
307 | unsigned long *p3) | |
308 | { | |
309 | unsigned long lines = bytes >> 6; | |
310 | ||
311 | kernel_fpu_begin(); | |
312 | ||
8fdf7655 | 313 | asm volatile( |
1da177e4 LT |
314 | " .align 32,0x90 ;\n" |
315 | " 1: ;\n" | |
316 | " movq (%1), %%mm0 ;\n" | |
317 | " movq 8(%1), %%mm1 ;\n" | |
318 | " pxor (%2), %%mm0 ;\n" | |
319 | " movq 16(%1), %%mm2 ;\n" | |
320 | " pxor 8(%2), %%mm1 ;\n" | |
321 | " pxor (%3), %%mm0 ;\n" | |
322 | " pxor 16(%2), %%mm2 ;\n" | |
323 | " movq %%mm0, (%1) ;\n" | |
324 | " pxor 8(%3), %%mm1 ;\n" | |
325 | " pxor 16(%3), %%mm2 ;\n" | |
326 | " movq 24(%1), %%mm3 ;\n" | |
327 | " movq %%mm1, 8(%1) ;\n" | |
328 | " movq 32(%1), %%mm4 ;\n" | |
329 | " movq 40(%1), %%mm5 ;\n" | |
330 | " pxor 24(%2), %%mm3 ;\n" | |
331 | " movq %%mm2, 16(%1) ;\n" | |
332 | " pxor 32(%2), %%mm4 ;\n" | |
333 | " pxor 24(%3), %%mm3 ;\n" | |
334 | " pxor 40(%2), %%mm5 ;\n" | |
335 | " movq %%mm3, 24(%1) ;\n" | |
336 | " pxor 32(%3), %%mm4 ;\n" | |
337 | " pxor 40(%3), %%mm5 ;\n" | |
338 | " movq 48(%1), %%mm6 ;\n" | |
339 | " movq %%mm4, 32(%1) ;\n" | |
340 | " movq 56(%1), %%mm7 ;\n" | |
341 | " pxor 48(%2), %%mm6 ;\n" | |
342 | " movq %%mm5, 40(%1) ;\n" | |
343 | " pxor 56(%2), %%mm7 ;\n" | |
344 | " pxor 48(%3), %%mm6 ;\n" | |
345 | " pxor 56(%3), %%mm7 ;\n" | |
346 | " movq %%mm6, 48(%1) ;\n" | |
347 | " movq %%mm7, 56(%1) ;\n" | |
8fdf7655 | 348 | |
1da177e4 LT |
349 | " addl $64, %1 ;\n" |
350 | " addl $64, %2 ;\n" | |
351 | " addl $64, %3 ;\n" | |
352 | " decl %0 ;\n" | |
353 | " jnz 1b ;\n" | |
354 | : "+r" (lines), | |
355 | "+r" (p1), "+r" (p2), "+r" (p3) | |
356 | : | |
357 | : "memory" ); | |
358 | ||
359 | kernel_fpu_end(); | |
360 | } | |
361 | ||
362 | static void | |
363 | xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
364 | unsigned long *p3, unsigned long *p4) | |
365 | { | |
366 | unsigned long lines = bytes >> 6; | |
367 | ||
368 | kernel_fpu_begin(); | |
369 | ||
8fdf7655 | 370 | asm volatile( |
1da177e4 LT |
371 | " .align 32,0x90 ;\n" |
372 | " 1: ;\n" | |
373 | " movq (%1), %%mm0 ;\n" | |
374 | " movq 8(%1), %%mm1 ;\n" | |
375 | " pxor (%2), %%mm0 ;\n" | |
376 | " movq 16(%1), %%mm2 ;\n" | |
377 | " pxor 8(%2), %%mm1 ;\n" | |
378 | " pxor (%3), %%mm0 ;\n" | |
379 | " pxor 16(%2), %%mm2 ;\n" | |
380 | " pxor 8(%3), %%mm1 ;\n" | |
381 | " pxor (%4), %%mm0 ;\n" | |
382 | " movq 24(%1), %%mm3 ;\n" | |
383 | " pxor 16(%3), %%mm2 ;\n" | |
384 | " pxor 8(%4), %%mm1 ;\n" | |
385 | " movq %%mm0, (%1) ;\n" | |
386 | " movq 32(%1), %%mm4 ;\n" | |
387 | " pxor 24(%2), %%mm3 ;\n" | |
388 | " pxor 16(%4), %%mm2 ;\n" | |
389 | " movq %%mm1, 8(%1) ;\n" | |
390 | " movq 40(%1), %%mm5 ;\n" | |
391 | " pxor 32(%2), %%mm4 ;\n" | |
392 | " pxor 24(%3), %%mm3 ;\n" | |
393 | " movq %%mm2, 16(%1) ;\n" | |
394 | " pxor 40(%2), %%mm5 ;\n" | |
395 | " pxor 32(%3), %%mm4 ;\n" | |
396 | " pxor 24(%4), %%mm3 ;\n" | |
397 | " movq %%mm3, 24(%1) ;\n" | |
398 | " movq 56(%1), %%mm7 ;\n" | |
399 | " movq 48(%1), %%mm6 ;\n" | |
400 | " pxor 40(%3), %%mm5 ;\n" | |
401 | " pxor 32(%4), %%mm4 ;\n" | |
402 | " pxor 48(%2), %%mm6 ;\n" | |
403 | " movq %%mm4, 32(%1) ;\n" | |
404 | " pxor 56(%2), %%mm7 ;\n" | |
405 | " pxor 40(%4), %%mm5 ;\n" | |
406 | " pxor 48(%3), %%mm6 ;\n" | |
407 | " pxor 56(%3), %%mm7 ;\n" | |
408 | " movq %%mm5, 40(%1) ;\n" | |
409 | " pxor 48(%4), %%mm6 ;\n" | |
410 | " pxor 56(%4), %%mm7 ;\n" | |
411 | " movq %%mm6, 48(%1) ;\n" | |
412 | " movq %%mm7, 56(%1) ;\n" | |
8fdf7655 | 413 | |
1da177e4 LT |
414 | " addl $64, %1 ;\n" |
415 | " addl $64, %2 ;\n" | |
416 | " addl $64, %3 ;\n" | |
417 | " addl $64, %4 ;\n" | |
418 | " decl %0 ;\n" | |
419 | " jnz 1b ;\n" | |
420 | : "+r" (lines), | |
421 | "+r" (p1), "+r" (p2), "+r" (p3), "+r" (p4) | |
422 | : | |
423 | : "memory"); | |
424 | ||
425 | kernel_fpu_end(); | |
426 | } | |
427 | ||
428 | static void | |
429 | xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
430 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
431 | { | |
432 | unsigned long lines = bytes >> 6; | |
433 | ||
434 | kernel_fpu_begin(); | |
435 | ||
436 | /* Make sure GCC forgets anything it knows about p4 or p5, | |
437 | such that it won't pass to the asm volatile below a | |
438 | register that is shared with any other variable. That's | |
439 | because we modify p4 and p5 there, but we can't mark them | |
440 | as read/write, otherwise we'd overflow the 10-asm-operands | |
441 | limit of GCC < 3.1. */ | |
8fdf7655 | 442 | asm("" : "+r" (p4), "+r" (p5)); |
1da177e4 | 443 | |
8fdf7655 | 444 | asm volatile( |
1da177e4 LT |
445 | " .align 32,0x90 ;\n" |
446 | " 1: ;\n" | |
447 | " movq (%1), %%mm0 ;\n" | |
448 | " movq 8(%1), %%mm1 ;\n" | |
449 | " pxor (%2), %%mm0 ;\n" | |
450 | " pxor 8(%2), %%mm1 ;\n" | |
451 | " movq 16(%1), %%mm2 ;\n" | |
452 | " pxor (%3), %%mm0 ;\n" | |
453 | " pxor 8(%3), %%mm1 ;\n" | |
454 | " pxor 16(%2), %%mm2 ;\n" | |
455 | " pxor (%4), %%mm0 ;\n" | |
456 | " pxor 8(%4), %%mm1 ;\n" | |
457 | " pxor 16(%3), %%mm2 ;\n" | |
458 | " movq 24(%1), %%mm3 ;\n" | |
459 | " pxor (%5), %%mm0 ;\n" | |
460 | " pxor 8(%5), %%mm1 ;\n" | |
461 | " movq %%mm0, (%1) ;\n" | |
462 | " pxor 16(%4), %%mm2 ;\n" | |
463 | " pxor 24(%2), %%mm3 ;\n" | |
464 | " movq %%mm1, 8(%1) ;\n" | |
465 | " pxor 16(%5), %%mm2 ;\n" | |
466 | " pxor 24(%3), %%mm3 ;\n" | |
467 | " movq 32(%1), %%mm4 ;\n" | |
468 | " movq %%mm2, 16(%1) ;\n" | |
469 | " pxor 24(%4), %%mm3 ;\n" | |
470 | " pxor 32(%2), %%mm4 ;\n" | |
471 | " movq 40(%1), %%mm5 ;\n" | |
472 | " pxor 24(%5), %%mm3 ;\n" | |
473 | " pxor 32(%3), %%mm4 ;\n" | |
474 | " pxor 40(%2), %%mm5 ;\n" | |
475 | " movq %%mm3, 24(%1) ;\n" | |
476 | " pxor 32(%4), %%mm4 ;\n" | |
477 | " pxor 40(%3), %%mm5 ;\n" | |
478 | " movq 48(%1), %%mm6 ;\n" | |
479 | " movq 56(%1), %%mm7 ;\n" | |
480 | " pxor 32(%5), %%mm4 ;\n" | |
481 | " pxor 40(%4), %%mm5 ;\n" | |
482 | " pxor 48(%2), %%mm6 ;\n" | |
483 | " pxor 56(%2), %%mm7 ;\n" | |
484 | " movq %%mm4, 32(%1) ;\n" | |
485 | " pxor 48(%3), %%mm6 ;\n" | |
486 | " pxor 56(%3), %%mm7 ;\n" | |
487 | " pxor 40(%5), %%mm5 ;\n" | |
488 | " pxor 48(%4), %%mm6 ;\n" | |
489 | " pxor 56(%4), %%mm7 ;\n" | |
490 | " movq %%mm5, 40(%1) ;\n" | |
491 | " pxor 48(%5), %%mm6 ;\n" | |
492 | " pxor 56(%5), %%mm7 ;\n" | |
493 | " movq %%mm6, 48(%1) ;\n" | |
494 | " movq %%mm7, 56(%1) ;\n" | |
8fdf7655 | 495 | |
1da177e4 LT |
496 | " addl $64, %1 ;\n" |
497 | " addl $64, %2 ;\n" | |
498 | " addl $64, %3 ;\n" | |
499 | " addl $64, %4 ;\n" | |
500 | " addl $64, %5 ;\n" | |
501 | " decl %0 ;\n" | |
502 | " jnz 1b ;\n" | |
503 | : "+r" (lines), | |
504 | "+r" (p1), "+r" (p2), "+r" (p3) | |
505 | : "r" (p4), "r" (p5) | |
506 | : "memory"); | |
507 | ||
508 | /* p4 and p5 were modified, and now the variables are dead. | |
509 | Clobber them just to be sure nobody does something stupid | |
510 | like assuming they have some legal value. */ | |
8fdf7655 | 511 | asm("" : "=r" (p4), "=r" (p5)); |
1da177e4 LT |
512 | |
513 | kernel_fpu_end(); | |
514 | } | |
515 | ||
516 | static struct xor_block_template xor_block_pII_mmx = { | |
517 | .name = "pII_mmx", | |
518 | .do_2 = xor_pII_mmx_2, | |
519 | .do_3 = xor_pII_mmx_3, | |
520 | .do_4 = xor_pII_mmx_4, | |
521 | .do_5 = xor_pII_mmx_5, | |
522 | }; | |
523 | ||
524 | static struct xor_block_template xor_block_p5_mmx = { | |
525 | .name = "p5_mmx", | |
526 | .do_2 = xor_p5_mmx_2, | |
527 | .do_3 = xor_p5_mmx_3, | |
528 | .do_4 = xor_p5_mmx_4, | |
529 | .do_5 = xor_p5_mmx_5, | |
530 | }; | |
531 | ||
1da177e4 | 532 | static struct xor_block_template xor_block_pIII_sse = { |
8fdf7655 JP |
533 | .name = "pIII_sse", |
534 | .do_2 = xor_sse_2, | |
535 | .do_3 = xor_sse_3, | |
536 | .do_4 = xor_sse_4, | |
537 | .do_5 = xor_sse_5, | |
1da177e4 LT |
538 | }; |
539 | ||
ea4d26ae | 540 | /* Also try the AVX routines */ |
a1ce3928 | 541 | #include <asm/xor_avx.h> |
ea4d26ae | 542 | |
1da177e4 LT |
543 | /* Also try the generic routines. */ |
544 | #include <asm-generic/xor.h> | |
545 | ||
f317820c JB |
546 | /* We force the use of the SSE xor block because it can write around L2. |
547 | We may also be able to load into the L1 only depending on how the cpu | |
548 | deals with a load to a line that is being prefetched. */ | |
1da177e4 LT |
549 | #undef XOR_TRY_TEMPLATES |
550 | #define XOR_TRY_TEMPLATES \ | |
8fdf7655 | 551 | do { \ |
ea4d26ae | 552 | AVX_XOR_SPEED; \ |
f317820c | 553 | if (cpu_has_xmm) { \ |
8fdf7655 | 554 | xor_speed(&xor_block_pIII_sse); \ |
f317820c JB |
555 | xor_speed(&xor_block_sse_pf64); \ |
556 | } else if (cpu_has_mmx) { \ | |
8fdf7655 JP |
557 | xor_speed(&xor_block_pII_mmx); \ |
558 | xor_speed(&xor_block_p5_mmx); \ | |
f317820c JB |
559 | } else { \ |
560 | xor_speed(&xor_block_8regs); \ | |
561 | xor_speed(&xor_block_8regs_p); \ | |
562 | xor_speed(&xor_block_32regs); \ | |
563 | xor_speed(&xor_block_32regs_p); \ | |
8fdf7655 JP |
564 | } \ |
565 | } while (0) | |
1da177e4 | 566 | |
1965aae3 | 567 | #endif /* _ASM_X86_XOR_32_H */ |