Merge tag 'perf-core-for-mingo-20160411' of git://git.kernel.org/pub/scm/linux/kernel...
[deliverable/linux.git] / arch / arm64 / crypto / sha1-ce-core.S
CommitLineData
2c98833a
AB
1/*
2 * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 .text
15 .arch armv8-a+crypto
16
17 k0 .req v0
18 k1 .req v1
19 k2 .req v2
20 k3 .req v3
21
22 t0 .req v4
23 t1 .req v5
24
25 dga .req q6
26 dgav .req v6
27 dgb .req s7
28 dgbv .req v7
29
30 dg0q .req q12
31 dg0s .req s12
32 dg0v .req v12
33 dg1s .req s13
34 dg1v .req v13
35 dg2s .req s14
36
37 .macro add_only, op, ev, rc, s0, dg1
38 .ifc \ev, ev
39 add t1.4s, v\s0\().4s, \rc\().4s
40 sha1h dg2s, dg0s
41 .ifnb \dg1
42 sha1\op dg0q, \dg1, t0.4s
43 .else
44 sha1\op dg0q, dg1s, t0.4s
45 .endif
46 .else
47 .ifnb \s0
48 add t0.4s, v\s0\().4s, \rc\().4s
49 .endif
50 sha1h dg1s, dg0s
51 sha1\op dg0q, dg2s, t1.4s
52 .endif
53 .endm
54
55 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
56 sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
57 add_only \op, \ev, \rc, \s1, \dg1
58 sha1su1 v\s0\().4s, v\s3\().4s
59 .endm
60
61 /*
62 * The SHA1 round constants
63 */
64 .align 4
65.Lsha1_rcon:
66 .word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
67
68 /*
07eb54d3
AB
69 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
70 * int blocks)
2c98833a
AB
71 */
72ENTRY(sha1_ce_transform)
73 /* load round constants */
74 adr x6, .Lsha1_rcon
75 ld1r {k0.4s}, [x6], #4
76 ld1r {k1.4s}, [x6], #4
77 ld1r {k2.4s}, [x6], #4
78 ld1r {k3.4s}, [x6]
79
80 /* load state */
07eb54d3
AB
81 ldr dga, [x0]
82 ldr dgb, [x0, #16]
2c98833a 83
07eb54d3
AB
84 /* load sha1_ce_state::finalize */
85 ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
2c98833a
AB
86
87 /* load input */
880: ld1 {v8.4s-v11.4s}, [x1], #64
07eb54d3 89 sub w2, w2, #1
2c98833a 90
2c98833a
AB
91CPU_LE( rev32 v8.16b, v8.16b )
92CPU_LE( rev32 v9.16b, v9.16b )
93CPU_LE( rev32 v10.16b, v10.16b )
94CPU_LE( rev32 v11.16b, v11.16b )
95
07eb54d3 961: add t0.4s, v8.4s, k0.4s
2c98833a
AB
97 mov dg0v.16b, dgav.16b
98
99 add_update c, ev, k0, 8, 9, 10, 11, dgb
100 add_update c, od, k0, 9, 10, 11, 8
101 add_update c, ev, k0, 10, 11, 8, 9
102 add_update c, od, k0, 11, 8, 9, 10
103 add_update c, ev, k1, 8, 9, 10, 11
104
105 add_update p, od, k1, 9, 10, 11, 8
106 add_update p, ev, k1, 10, 11, 8, 9
107 add_update p, od, k1, 11, 8, 9, 10
108 add_update p, ev, k1, 8, 9, 10, 11
109 add_update p, od, k2, 9, 10, 11, 8
110
111 add_update m, ev, k2, 10, 11, 8, 9
112 add_update m, od, k2, 11, 8, 9, 10
113 add_update m, ev, k2, 8, 9, 10, 11
114 add_update m, od, k2, 9, 10, 11, 8
115 add_update m, ev, k3, 10, 11, 8, 9
116
117 add_update p, od, k3, 11, 8, 9, 10
118 add_only p, ev, k3, 9
119 add_only p, od, k3, 10
120 add_only p, ev, k3, 11
121 add_only p, od
122
123 /* update state */
124 add dgbv.2s, dgbv.2s, dg1v.2s
125 add dgav.4s, dgav.4s, dg0v.4s
126
07eb54d3 127 cbnz w2, 0b
2c98833a
AB
128
129 /*
130 * Final block: add padding and total bit count.
07eb54d3
AB
131 * Skip if the input size was not a round multiple of the block size,
132 * the padding is handled by the C code in that case.
2c98833a
AB
133 */
134 cbz x4, 3f
07eb54d3 135 ldr x4, [x0, #:lo12:sha1_ce_offsetof_count]
2c98833a
AB
136 movi v9.2d, #0
137 mov x8, #0x80000000
138 movi v10.2d, #0
139 ror x7, x4, #29 // ror(lsl(x4, 3), 32)
140 fmov d8, x8
141 mov x4, #0
142 mov v11.d[0], xzr
143 mov v11.d[1], x7
07eb54d3 144 b 1b
2c98833a
AB
145
146 /* store new state */
07eb54d3
AB
1473: str dga, [x0]
148 str dgb, [x0, #16]
2c98833a
AB
149 ret
150ENDPROC(sha1_ce_transform)
This page took 0.104871 seconds and 5 git commands to generate.