Commit | Line | Data |
---|---|---|
2249cbb5 TC |
1 | /* |
2 | * Buffer submit code for multi buffer SHA1 algorithm | |
3 | * | |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
9 | * Copyright(c) 2014 Intel Corporation. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or modify | |
12 | * it under the terms of version 2 of the GNU General Public License as | |
13 | * published by the Free Software Foundation. | |
14 | * | |
15 | * This program is distributed in the hope that it will be useful, but | |
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
18 | * General Public License for more details. | |
19 | * | |
20 | * Contact Information: | |
21 | * James Guilford <james.guilford@intel.com> | |
22 | * Tim Chen <tim.c.chen@linux.intel.com> | |
23 | * | |
24 | * BSD LICENSE | |
25 | * | |
26 | * Copyright(c) 2014 Intel Corporation. | |
27 | * | |
28 | * Redistribution and use in source and binary forms, with or without | |
29 | * modification, are permitted provided that the following conditions | |
30 | * are met: | |
31 | * | |
32 | * * Redistributions of source code must retain the above copyright | |
33 | * notice, this list of conditions and the following disclaimer. | |
34 | * * Redistributions in binary form must reproduce the above copyright | |
35 | * notice, this list of conditions and the following disclaimer in | |
36 | * the documentation and/or other materials provided with the | |
37 | * distribution. | |
38 | * * Neither the name of Intel Corporation nor the names of its | |
39 | * contributors may be used to endorse or promote products derived | |
40 | * from this software without specific prior written permission. | |
41 | * | |
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
43 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
44 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
45 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
46 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
47 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
48 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
49 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
50 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
51 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
52 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
53 | */ | |
54 | ||
55 | #include <linux/linkage.h> | |
56 | #include "sha1_mb_mgr_datastruct.S" | |
57 | ||
58 | ||
59 | .extern sha1_x8_avx | |
60 | ||
61 | # LINUX register definitions | |
62 | arg1 = %rdi | |
63 | arg2 = %rsi | |
64 | size_offset = %rcx | |
65 | tmp2 = %rcx | |
66 | extra_blocks = %rdx | |
67 | ||
68 | # Common definitions | |
69 | #define state arg1 | |
70 | #define job %rsi | |
71 | #define len2 arg2 | |
72 | #define p2 arg2 | |
73 | ||
74 | # idx must be a register not clobberred by sha1_x8_avx2 | |
75 | idx = %r8 | |
76 | DWORD_idx = %r8d | |
77 | last_len = %r8 | |
78 | ||
79 | p = %r11 | |
80 | start_offset = %r11 | |
81 | ||
82 | unused_lanes = %rbx | |
83 | BYTE_unused_lanes = %bl | |
84 | ||
85 | job_rax = %rax | |
86 | len = %rax | |
87 | DWORD_len = %eax | |
88 | ||
89 | lane = %rbp | |
90 | tmp3 = %rbp | |
91 | ||
92 | tmp = %r9 | |
93 | DWORD_tmp = %r9d | |
94 | ||
95 | lane_data = %r10 | |
96 | ||
97 | # STACK_SPACE needs to be an odd multiple of 8 | |
98 | STACK_SPACE = 8*8 + 16*10 + 8 | |
99 | ||
100 | # JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job) | |
101 | # arg 1 : rcx : state | |
102 | # arg 2 : rdx : job | |
103 | ENTRY(sha1_mb_mgr_submit_avx2) | |
104 | ||
105 | mov %rsp, %r10 | |
106 | sub $STACK_SPACE, %rsp | |
107 | and $~31, %rsp | |
108 | ||
109 | mov %rbx, (%rsp) | |
110 | mov %r10, 8*2(%rsp) #save old rsp | |
111 | mov %rbp, 8*3(%rsp) | |
112 | mov %r12, 8*4(%rsp) | |
113 | mov %r13, 8*5(%rsp) | |
114 | mov %r14, 8*6(%rsp) | |
115 | mov %r15, 8*7(%rsp) | |
116 | ||
117 | mov _unused_lanes(state), unused_lanes | |
118 | mov unused_lanes, lane | |
119 | and $0xF, lane | |
120 | shr $4, unused_lanes | |
121 | imul $_LANE_DATA_size, lane, lane_data | |
122 | movl $STS_BEING_PROCESSED, _status(job) | |
123 | lea _ldata(state, lane_data), lane_data | |
124 | mov unused_lanes, _unused_lanes(state) | |
125 | movl _len(job), DWORD_len | |
126 | ||
127 | mov job, _job_in_lane(lane_data) | |
128 | shl $4, len | |
129 | or lane, len | |
130 | ||
131 | movl DWORD_len, _lens(state , lane, 4) | |
132 | ||
133 | # Load digest words from result_digest | |
134 | vmovdqu _result_digest(job), %xmm0 | |
135 | mov _result_digest+1*16(job), DWORD_tmp | |
136 | vmovd %xmm0, _args_digest(state, lane, 4) | |
137 | vpextrd $1, %xmm0, _args_digest+1*32(state , lane, 4) | |
138 | vpextrd $2, %xmm0, _args_digest+2*32(state , lane, 4) | |
139 | vpextrd $3, %xmm0, _args_digest+3*32(state , lane, 4) | |
140 | movl DWORD_tmp, _args_digest+4*32(state , lane, 4) | |
141 | ||
142 | mov _buffer(job), p | |
143 | mov p, _args_data_ptr(state, lane, 8) | |
144 | ||
145 | cmp $0xF, unused_lanes | |
146 | jne return_null | |
147 | ||
148 | start_loop: | |
149 | # Find min length | |
150 | vmovdqa _lens(state), %xmm0 | |
151 | vmovdqa _lens+1*16(state), %xmm1 | |
152 | ||
153 | vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A} | |
154 | vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C} | |
155 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has {x,x,E,F} | |
156 | vpalignr $4, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,x,E} | |
157 | vpminud %xmm3, %xmm2, %xmm2 # xmm2 has min value in low dword | |
158 | ||
159 | vmovd %xmm2, DWORD_idx | |
160 | mov idx, len2 | |
161 | and $0xF, idx | |
162 | shr $4, len2 | |
163 | jz len_is_0 | |
164 | ||
165 | vpand clear_low_nibble(%rip), %xmm2, %xmm2 | |
166 | vpshufd $0, %xmm2, %xmm2 | |
167 | ||
168 | vpsubd %xmm2, %xmm0, %xmm0 | |
169 | vpsubd %xmm2, %xmm1, %xmm1 | |
170 | ||
171 | vmovdqa %xmm0, _lens + 0*16(state) | |
172 | vmovdqa %xmm1, _lens + 1*16(state) | |
173 | ||
174 | ||
175 | # "state" and "args" are the same address, arg1 | |
176 | # len is arg2 | |
177 | call sha1_x8_avx2 | |
178 | ||
179 | # state and idx are intact | |
180 | ||
181 | len_is_0: | |
182 | # process completed job "idx" | |
183 | imul $_LANE_DATA_size, idx, lane_data | |
184 | lea _ldata(state, lane_data), lane_data | |
185 | ||
186 | mov _job_in_lane(lane_data), job_rax | |
187 | mov _unused_lanes(state), unused_lanes | |
188 | movq $0, _job_in_lane(lane_data) | |
189 | movl $STS_COMPLETED, _status(job_rax) | |
190 | shl $4, unused_lanes | |
191 | or idx, unused_lanes | |
192 | mov unused_lanes, _unused_lanes(state) | |
193 | ||
194 | movl $0xFFFFFFFF, _lens(state, idx, 4) | |
195 | ||
196 | vmovd _args_digest(state, idx, 4), %xmm0 | |
197 | vpinsrd $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0 | |
198 | vpinsrd $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0 | |
199 | vpinsrd $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0 | |
10cff58c | 200 | movl _args_digest+4*32(state, idx, 4), DWORD_tmp |
2249cbb5 TC |
201 | |
202 | vmovdqu %xmm0, _result_digest(job_rax) | |
203 | movl DWORD_tmp, _result_digest+1*16(job_rax) | |
204 | ||
205 | return: | |
206 | ||
207 | mov (%rsp), %rbx | |
208 | mov 8*2(%rsp), %r10 #save old rsp | |
209 | mov 8*3(%rsp), %rbp | |
210 | mov 8*4(%rsp), %r12 | |
211 | mov 8*5(%rsp), %r13 | |
212 | mov 8*6(%rsp), %r14 | |
213 | mov 8*7(%rsp), %r15 | |
214 | mov %r10, %rsp | |
215 | ||
216 | ret | |
217 | ||
218 | return_null: | |
219 | xor job_rax, job_rax | |
220 | jmp return | |
221 | ||
222 | ENDPROC(sha1_mb_mgr_submit_avx2) | |
223 | ||
224 | .data | |
225 | ||
226 | .align 16 | |
227 | clear_low_nibble: | |
228 | .octa 0x000000000000000000000000FFFFFFF0 |