Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
4baa9922 | 2 | * arch/arm/include/asm/xor.h |
1da177e4 LT |
3 | * |
4 | * Copyright (C) 2001 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
01956597 | 10 | #include <linux/hardirq.h> |
1da177e4 | 11 | #include <asm-generic/xor.h> |
01956597 AB |
12 | #include <asm/hwcap.h> |
13 | #include <asm/neon.h> | |
1da177e4 LT |
14 | |
15 | #define __XOR(a1, a2) a1 ^= a2 | |
16 | ||
17 | #define GET_BLOCK_2(dst) \ | |
18 | __asm__("ldmia %0, {%1, %2}" \ | |
19 | : "=r" (dst), "=r" (a1), "=r" (a2) \ | |
20 | : "0" (dst)) | |
21 | ||
22 | #define GET_BLOCK_4(dst) \ | |
23 | __asm__("ldmia %0, {%1, %2, %3, %4}" \ | |
24 | : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ | |
25 | : "0" (dst)) | |
26 | ||
27 | #define XOR_BLOCK_2(src) \ | |
28 | __asm__("ldmia %0!, {%1, %2}" \ | |
29 | : "=r" (src), "=r" (b1), "=r" (b2) \ | |
30 | : "0" (src)); \ | |
31 | __XOR(a1, b1); __XOR(a2, b2); | |
32 | ||
33 | #define XOR_BLOCK_4(src) \ | |
34 | __asm__("ldmia %0!, {%1, %2, %3, %4}" \ | |
35 | : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ | |
36 | : "0" (src)); \ | |
37 | __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) | |
38 | ||
39 | #define PUT_BLOCK_2(dst) \ | |
40 | __asm__ __volatile__("stmia %0!, {%2, %3}" \ | |
41 | : "=r" (dst) \ | |
42 | : "0" (dst), "r" (a1), "r" (a2)) | |
43 | ||
44 | #define PUT_BLOCK_4(dst) \ | |
45 | __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ | |
46 | : "=r" (dst) \ | |
47 | : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) | |
48 | ||
49 | static void | |
50 | xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
51 | { | |
52 | unsigned int lines = bytes / sizeof(unsigned long) / 4; | |
53 | register unsigned int a1 __asm__("r4"); | |
54 | register unsigned int a2 __asm__("r5"); | |
55 | register unsigned int a3 __asm__("r6"); | |
56 | register unsigned int a4 __asm__("r7"); | |
57 | register unsigned int b1 __asm__("r8"); | |
58 | register unsigned int b2 __asm__("r9"); | |
59 | register unsigned int b3 __asm__("ip"); | |
60 | register unsigned int b4 __asm__("lr"); | |
61 | ||
62 | do { | |
63 | GET_BLOCK_4(p1); | |
64 | XOR_BLOCK_4(p2); | |
65 | PUT_BLOCK_4(p1); | |
66 | } while (--lines); | |
67 | } | |
68 | ||
69 | static void | |
70 | xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
71 | unsigned long *p3) | |
72 | { | |
73 | unsigned int lines = bytes / sizeof(unsigned long) / 4; | |
74 | register unsigned int a1 __asm__("r4"); | |
75 | register unsigned int a2 __asm__("r5"); | |
76 | register unsigned int a3 __asm__("r6"); | |
77 | register unsigned int a4 __asm__("r7"); | |
78 | register unsigned int b1 __asm__("r8"); | |
79 | register unsigned int b2 __asm__("r9"); | |
80 | register unsigned int b3 __asm__("ip"); | |
81 | register unsigned int b4 __asm__("lr"); | |
82 | ||
83 | do { | |
84 | GET_BLOCK_4(p1); | |
85 | XOR_BLOCK_4(p2); | |
86 | XOR_BLOCK_4(p3); | |
87 | PUT_BLOCK_4(p1); | |
88 | } while (--lines); | |
89 | } | |
90 | ||
91 | static void | |
92 | xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
93 | unsigned long *p3, unsigned long *p4) | |
94 | { | |
95 | unsigned int lines = bytes / sizeof(unsigned long) / 2; | |
96 | register unsigned int a1 __asm__("r8"); | |
97 | register unsigned int a2 __asm__("r9"); | |
98 | register unsigned int b1 __asm__("ip"); | |
99 | register unsigned int b2 __asm__("lr"); | |
100 | ||
101 | do { | |
102 | GET_BLOCK_2(p1); | |
103 | XOR_BLOCK_2(p2); | |
104 | XOR_BLOCK_2(p3); | |
105 | XOR_BLOCK_2(p4); | |
106 | PUT_BLOCK_2(p1); | |
107 | } while (--lines); | |
108 | } | |
109 | ||
110 | static void | |
111 | xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
112 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
113 | { | |
114 | unsigned int lines = bytes / sizeof(unsigned long) / 2; | |
115 | register unsigned int a1 __asm__("r8"); | |
116 | register unsigned int a2 __asm__("r9"); | |
117 | register unsigned int b1 __asm__("ip"); | |
118 | register unsigned int b2 __asm__("lr"); | |
119 | ||
120 | do { | |
121 | GET_BLOCK_2(p1); | |
122 | XOR_BLOCK_2(p2); | |
123 | XOR_BLOCK_2(p3); | |
124 | XOR_BLOCK_2(p4); | |
125 | XOR_BLOCK_2(p5); | |
126 | PUT_BLOCK_2(p1); | |
127 | } while (--lines); | |
128 | } | |
129 | ||
130 | static struct xor_block_template xor_block_arm4regs = { | |
131 | .name = "arm4regs", | |
132 | .do_2 = xor_arm4regs_2, | |
133 | .do_3 = xor_arm4regs_3, | |
134 | .do_4 = xor_arm4regs_4, | |
135 | .do_5 = xor_arm4regs_5, | |
136 | }; | |
137 | ||
138 | #undef XOR_TRY_TEMPLATES | |
139 | #define XOR_TRY_TEMPLATES \ | |
140 | do { \ | |
141 | xor_speed(&xor_block_arm4regs); \ | |
142 | xor_speed(&xor_block_8regs); \ | |
143 | xor_speed(&xor_block_32regs); \ | |
01956597 | 144 | NEON_TEMPLATES; \ |
1da177e4 | 145 | } while (0) |
01956597 AB |
146 | |
147 | #ifdef CONFIG_KERNEL_MODE_NEON | |
148 | ||
149 | extern struct xor_block_template const xor_block_neon_inner; | |
150 | ||
151 | static void | |
152 | xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
153 | { | |
154 | if (in_interrupt()) { | |
155 | xor_arm4regs_2(bytes, p1, p2); | |
156 | } else { | |
157 | kernel_neon_begin(); | |
158 | xor_block_neon_inner.do_2(bytes, p1, p2); | |
159 | kernel_neon_end(); | |
160 | } | |
161 | } | |
162 | ||
163 | static void | |
164 | xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
165 | unsigned long *p3) | |
166 | { | |
167 | if (in_interrupt()) { | |
168 | xor_arm4regs_3(bytes, p1, p2, p3); | |
169 | } else { | |
170 | kernel_neon_begin(); | |
171 | xor_block_neon_inner.do_3(bytes, p1, p2, p3); | |
172 | kernel_neon_end(); | |
173 | } | |
174 | } | |
175 | ||
176 | static void | |
177 | xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
178 | unsigned long *p3, unsigned long *p4) | |
179 | { | |
180 | if (in_interrupt()) { | |
181 | xor_arm4regs_4(bytes, p1, p2, p3, p4); | |
182 | } else { | |
183 | kernel_neon_begin(); | |
184 | xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); | |
185 | kernel_neon_end(); | |
186 | } | |
187 | } | |
188 | ||
189 | static void | |
190 | xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
191 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
192 | { | |
193 | if (in_interrupt()) { | |
194 | xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); | |
195 | } else { | |
196 | kernel_neon_begin(); | |
197 | xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); | |
198 | kernel_neon_end(); | |
199 | } | |
200 | } | |
201 | ||
202 | static struct xor_block_template xor_block_neon = { | |
203 | .name = "neon", | |
204 | .do_2 = xor_neon_2, | |
205 | .do_3 = xor_neon_3, | |
206 | .do_4 = xor_neon_4, | |
207 | .do_5 = xor_neon_5 | |
208 | }; | |
209 | ||
210 | #define NEON_TEMPLATES \ | |
211 | do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) | |
212 | #else | |
213 | #define NEON_TEMPLATES | |
214 | #endif |