Commit | Line | Data |
---|---|---|
2c98833a AB |
1 | /* |
2 | * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions | |
3 | * | |
4 | * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | #include <linux/linkage.h> | |
12 | #include <asm/assembler.h> | |
13 | ||
14 | .text | |
15 | .arch armv8-a+crypto | |
16 | ||
17 | k0 .req v0 | |
18 | k1 .req v1 | |
19 | k2 .req v2 | |
20 | k3 .req v3 | |
21 | ||
22 | t0 .req v4 | |
23 | t1 .req v5 | |
24 | ||
25 | dga .req q6 | |
26 | dgav .req v6 | |
27 | dgb .req s7 | |
28 | dgbv .req v7 | |
29 | ||
30 | dg0q .req q12 | |
31 | dg0s .req s12 | |
32 | dg0v .req v12 | |
33 | dg1s .req s13 | |
34 | dg1v .req v13 | |
35 | dg2s .req s14 | |
36 | ||
37 | .macro add_only, op, ev, rc, s0, dg1 | |
38 | .ifc \ev, ev | |
39 | add t1.4s, v\s0\().4s, \rc\().4s | |
40 | sha1h dg2s, dg0s | |
41 | .ifnb \dg1 | |
42 | sha1\op dg0q, \dg1, t0.4s | |
43 | .else | |
44 | sha1\op dg0q, dg1s, t0.4s | |
45 | .endif | |
46 | .else | |
47 | .ifnb \s0 | |
48 | add t0.4s, v\s0\().4s, \rc\().4s | |
49 | .endif | |
50 | sha1h dg1s, dg0s | |
51 | sha1\op dg0q, dg2s, t1.4s | |
52 | .endif | |
53 | .endm | |
54 | ||
55 | .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1 | |
56 | sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s | |
57 | add_only \op, \ev, \rc, \s1, \dg1 | |
58 | sha1su1 v\s0\().4s, v\s3\().4s | |
59 | .endm | |
60 | ||
61 | /* | |
62 | * The SHA1 round constants | |
63 | */ | |
64 | .align 4 | |
65 | .Lsha1_rcon: | |
66 | .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6 | |
67 | ||
68 | /* | |
07eb54d3 AB |
69 | * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, |
70 | * int blocks) | |
2c98833a AB |
71 | */ |
72 | ENTRY(sha1_ce_transform) | |
73 | /* load round constants */ | |
74 | adr x6, .Lsha1_rcon | |
75 | ld1r {k0.4s}, [x6], #4 | |
76 | ld1r {k1.4s}, [x6], #4 | |
77 | ld1r {k2.4s}, [x6], #4 | |
78 | ld1r {k3.4s}, [x6] | |
79 | ||
80 | /* load state */ | |
07eb54d3 AB |
81 | ldr dga, [x0] |
82 | ldr dgb, [x0, #16] | |
2c98833a | 83 | |
07eb54d3 AB |
84 | /* load sha1_ce_state::finalize */ |
85 | ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] | |
2c98833a AB |
86 | |
87 | /* load input */ | |
88 | 0: ld1 {v8.4s-v11.4s}, [x1], #64 | |
07eb54d3 | 89 | sub w2, w2, #1 |
2c98833a | 90 | |
2c98833a AB |
91 | CPU_LE( rev32 v8.16b, v8.16b ) |
92 | CPU_LE( rev32 v9.16b, v9.16b ) | |
93 | CPU_LE( rev32 v10.16b, v10.16b ) | |
94 | CPU_LE( rev32 v11.16b, v11.16b ) | |
95 | ||
07eb54d3 | 96 | 1: add t0.4s, v8.4s, k0.4s |
2c98833a AB |
97 | mov dg0v.16b, dgav.16b |
98 | ||
99 | add_update c, ev, k0, 8, 9, 10, 11, dgb | |
100 | add_update c, od, k0, 9, 10, 11, 8 | |
101 | add_update c, ev, k0, 10, 11, 8, 9 | |
102 | add_update c, od, k0, 11, 8, 9, 10 | |
103 | add_update c, ev, k1, 8, 9, 10, 11 | |
104 | ||
105 | add_update p, od, k1, 9, 10, 11, 8 | |
106 | add_update p, ev, k1, 10, 11, 8, 9 | |
107 | add_update p, od, k1, 11, 8, 9, 10 | |
108 | add_update p, ev, k1, 8, 9, 10, 11 | |
109 | add_update p, od, k2, 9, 10, 11, 8 | |
110 | ||
111 | add_update m, ev, k2, 10, 11, 8, 9 | |
112 | add_update m, od, k2, 11, 8, 9, 10 | |
113 | add_update m, ev, k2, 8, 9, 10, 11 | |
114 | add_update m, od, k2, 9, 10, 11, 8 | |
115 | add_update m, ev, k3, 10, 11, 8, 9 | |
116 | ||
117 | add_update p, od, k3, 11, 8, 9, 10 | |
118 | add_only p, ev, k3, 9 | |
119 | add_only p, od, k3, 10 | |
120 | add_only p, ev, k3, 11 | |
121 | add_only p, od | |
122 | ||
123 | /* update state */ | |
124 | add dgbv.2s, dgbv.2s, dg1v.2s | |
125 | add dgav.4s, dgav.4s, dg0v.4s | |
126 | ||
07eb54d3 | 127 | cbnz w2, 0b |
2c98833a AB |
128 | |
129 | /* | |
130 | * Final block: add padding and total bit count. | |
07eb54d3 AB |
131 | * Skip if the input size was not a round multiple of the block size, |
132 | * the padding is handled by the C code in that case. | |
2c98833a AB |
133 | */ |
134 | cbz x4, 3f | |
07eb54d3 | 135 | ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] |
2c98833a AB |
136 | movi v9.2d, #0 |
137 | mov x8, #0x80000000 | |
138 | movi v10.2d, #0 | |
139 | ror x7, x4, #29 // ror(lsl(x4, 3), 32) | |
140 | fmov d8, x8 | |
141 | mov x4, #0 | |
142 | mov v11.d[0], xzr | |
143 | mov v11.d[1], x7 | |
07eb54d3 | 144 | b 1b |
2c98833a AB |
145 | |
146 | /* store new state */ | |
07eb54d3 AB |
147 | 3: str dga, [x0] |
148 | str dgb, [x0, #16] | |
2c98833a AB |
149 | ret |
150 | ENDPROC(sha1_ce_transform) |