Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * arch/ia64/lib/xor.S | |
3 | * | |
4 | * Optimized RAID-5 checksumming functions for IA-64. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2, or (at your option) | |
9 | * any later version. | |
10 | * | |
11 | * You should have received a copy of the GNU General Public License | |
12 | * (for example /usr/src/linux/COPYING); if not, write to the Free | |
13 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
14 | */ | |
15 | ||
16 | #include <asm/asmmacro.h> | |
e007c533 | 17 | #include <asm/export.h> |
1da177e4 LT |
18 | |
19 | GLOBAL_ENTRY(xor_ia64_2) | |
20 | .prologue | |
21 | .fframe 0 | |
22 | .save ar.pfs, r31 | |
23 | alloc r31 = ar.pfs, 3, 0, 13, 16 | |
24 | .save ar.lc, r30 | |
25 | mov r30 = ar.lc | |
26 | .save pr, r29 | |
27 | mov r29 = pr | |
28 | ;; | |
29 | .body | |
30 | mov r8 = in1 | |
31 | mov ar.ec = 6 + 2 | |
32 | shr in0 = in0, 3 | |
33 | ;; | |
34 | adds in0 = -1, in0 | |
35 | mov r16 = in1 | |
36 | mov r17 = in2 | |
37 | ;; | |
38 | mov ar.lc = in0 | |
39 | mov pr.rot = 1 << 16 | |
40 | ;; | |
41 | .rotr s1[6+1], s2[6+1], d[2] | |
42 | .rotp p[6+2] | |
43 | 0: | |
44 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
45 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
46 | (p[6]) xor d[0] = s1[6], s2[6] | |
47 | (p[6+1])st8.nta [r8] = d[1], 8 | |
48 | nop.f 0 | |
49 | br.ctop.dptk.few 0b | |
50 | ;; | |
51 | mov ar.lc = r30 | |
52 | mov pr = r29, -1 | |
53 | br.ret.sptk.few rp | |
54 | END(xor_ia64_2) | |
e007c533 | 55 | EXPORT_SYMBOL(xor_ia64_2) |
1da177e4 LT |
56 | |
57 | GLOBAL_ENTRY(xor_ia64_3) | |
58 | .prologue | |
59 | .fframe 0 | |
60 | .save ar.pfs, r31 | |
61 | alloc r31 = ar.pfs, 4, 0, 20, 24 | |
62 | .save ar.lc, r30 | |
63 | mov r30 = ar.lc | |
64 | .save pr, r29 | |
65 | mov r29 = pr | |
66 | ;; | |
67 | .body | |
68 | mov r8 = in1 | |
69 | mov ar.ec = 6 + 2 | |
70 | shr in0 = in0, 3 | |
71 | ;; | |
72 | adds in0 = -1, in0 | |
73 | mov r16 = in1 | |
74 | mov r17 = in2 | |
75 | ;; | |
76 | mov r18 = in3 | |
77 | mov ar.lc = in0 | |
78 | mov pr.rot = 1 << 16 | |
79 | ;; | |
80 | .rotr s1[6+1], s2[6+1], s3[6+1], d[2] | |
81 | .rotp p[6+2] | |
82 | 0: | |
83 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
84 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
85 | (p[6]) xor d[0] = s1[6], s2[6] | |
86 | ;; | |
87 | (p[0]) ld8.nta s3[0] = [r18], 8 | |
88 | (p[6+1])st8.nta [r8] = d[1], 8 | |
89 | (p[6]) xor d[0] = d[0], s3[6] | |
90 | br.ctop.dptk.few 0b | |
91 | ;; | |
92 | mov ar.lc = r30 | |
93 | mov pr = r29, -1 | |
94 | br.ret.sptk.few rp | |
95 | END(xor_ia64_3) | |
e007c533 | 96 | EXPORT_SYMBOL(xor_ia64_3) |
1da177e4 LT |
97 | |
98 | GLOBAL_ENTRY(xor_ia64_4) | |
99 | .prologue | |
100 | .fframe 0 | |
101 | .save ar.pfs, r31 | |
102 | alloc r31 = ar.pfs, 5, 0, 27, 32 | |
103 | .save ar.lc, r30 | |
104 | mov r30 = ar.lc | |
105 | .save pr, r29 | |
106 | mov r29 = pr | |
107 | ;; | |
108 | .body | |
109 | mov r8 = in1 | |
110 | mov ar.ec = 6 + 2 | |
111 | shr in0 = in0, 3 | |
112 | ;; | |
113 | adds in0 = -1, in0 | |
114 | mov r16 = in1 | |
115 | mov r17 = in2 | |
116 | ;; | |
117 | mov r18 = in3 | |
118 | mov ar.lc = in0 | |
119 | mov pr.rot = 1 << 16 | |
120 | mov r19 = in4 | |
121 | ;; | |
122 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] | |
123 | .rotp p[6+2] | |
124 | 0: | |
125 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
126 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
127 | (p[6]) xor d[0] = s1[6], s2[6] | |
128 | (p[0]) ld8.nta s3[0] = [r18], 8 | |
129 | (p[0]) ld8.nta s4[0] = [r19], 8 | |
130 | (p[6]) xor r20 = s3[6], s4[6] | |
131 | ;; | |
132 | (p[6+1])st8.nta [r8] = d[1], 8 | |
133 | (p[6]) xor d[0] = d[0], r20 | |
134 | br.ctop.dptk.few 0b | |
135 | ;; | |
136 | mov ar.lc = r30 | |
137 | mov pr = r29, -1 | |
138 | br.ret.sptk.few rp | |
139 | END(xor_ia64_4) | |
e007c533 | 140 | EXPORT_SYMBOL(xor_ia64_4) |
1da177e4 LT |
141 | |
142 | GLOBAL_ENTRY(xor_ia64_5) | |
143 | .prologue | |
144 | .fframe 0 | |
145 | .save ar.pfs, r31 | |
146 | alloc r31 = ar.pfs, 6, 0, 34, 40 | |
147 | .save ar.lc, r30 | |
148 | mov r30 = ar.lc | |
149 | .save pr, r29 | |
150 | mov r29 = pr | |
151 | ;; | |
152 | .body | |
153 | mov r8 = in1 | |
154 | mov ar.ec = 6 + 2 | |
155 | shr in0 = in0, 3 | |
156 | ;; | |
157 | adds in0 = -1, in0 | |
158 | mov r16 = in1 | |
159 | mov r17 = in2 | |
160 | ;; | |
161 | mov r18 = in3 | |
162 | mov ar.lc = in0 | |
163 | mov pr.rot = 1 << 16 | |
164 | mov r19 = in4 | |
165 | mov r20 = in5 | |
166 | ;; | |
167 | .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] | |
168 | .rotp p[6+2] | |
169 | 0: | |
170 | (p[0]) ld8.nta s1[0] = [r16], 8 | |
171 | (p[0]) ld8.nta s2[0] = [r17], 8 | |
172 | (p[6]) xor d[0] = s1[6], s2[6] | |
173 | (p[0]) ld8.nta s3[0] = [r18], 8 | |
174 | (p[0]) ld8.nta s4[0] = [r19], 8 | |
175 | (p[6]) xor r21 = s3[6], s4[6] | |
176 | ;; | |
177 | (p[0]) ld8.nta s5[0] = [r20], 8 | |
178 | (p[6+1])st8.nta [r8] = d[1], 8 | |
179 | (p[6]) xor d[0] = d[0], r21 | |
180 | ;; | |
181 | (p[6]) xor d[0] = d[0], s5[6] | |
182 | nop.f 0 | |
183 | br.ctop.dptk.few 0b | |
184 | ;; | |
185 | mov ar.lc = r30 | |
186 | mov pr = r29, -1 | |
187 | br.ret.sptk.few rp | |
188 | END(xor_ia64_5) | |
e007c533 | 189 | EXPORT_SYMBOL(xor_ia64_5) |