Commit | Line | Data |
---|---|---|
0e1227d3 HY |
1 | /* |
2 | * Accelerated GHASH implementation with Intel PCLMULQDQ-NI | |
3 | * instructions. This file contains accelerated part of ghash | |
4 | * implementation. More information about PCLMULQDQ can be found at: | |
5 | * | |
6 | * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/ | |
7 | * | |
8 | * Copyright (c) 2009 Intel Corp. | |
9 | * Author: Huang Ying <ying.huang@intel.com> | |
10 | * Vinodh Gopal | |
11 | * Erdinc Ozturk | |
12 | * Deniz Karakoyunlu | |
13 | * | |
14 | * This program is free software; you can redistribute it and/or modify it | |
15 | * under the terms of the GNU General Public License version 2 as published | |
16 | * by the Free Software Foundation. | |
17 | */ | |
18 | ||
19 | #include <linux/linkage.h> | |
564ec0ec | 20 | #include <asm/inst.h> |
0e1227d3 | 21 | |
68ee8716 JK |
22 | .data |
23 | ||
0e1227d3 HY |
24 | .align 16 |
25 | .Lbswap_mask: | |
26 | .octa 0x000102030405060708090a0b0c0d0e0f | |
27 | .Lpoly: | |
28 | .octa 0xc2000000000000000000000000000001 | |
29 | .Ltwo_one: | |
30 | .octa 0x00000001000000000000000000000001 | |
31 | ||
32 | #define DATA %xmm0 | |
33 | #define SHASH %xmm1 | |
34 | #define T1 %xmm2 | |
35 | #define T2 %xmm3 | |
36 | #define T3 %xmm4 | |
37 | #define BSWAP %xmm5 | |
38 | #define IN1 %xmm6 | |
39 | ||
40 | .text | |
41 | ||
42 | /* | |
43 | * __clmul_gf128mul_ble: internal ABI | |
44 | * input: | |
45 | * DATA: operand1 | |
46 | * SHASH: operand2, hash_key << 1 mod poly | |
47 | * output: | |
48 | * DATA: operand1 * operand2 mod poly | |
49 | * changed: | |
50 | * T1 | |
51 | * T2 | |
52 | * T3 | |
53 | */ | |
54 | __clmul_gf128mul_ble: | |
55 | movaps DATA, T1 | |
56 | pshufd $0b01001110, DATA, T2 | |
57 | pshufd $0b01001110, SHASH, T3 | |
58 | pxor DATA, T2 | |
59 | pxor SHASH, T3 | |
60 | ||
564ec0ec HY |
61 | PCLMULQDQ 0x00 SHASH DATA # DATA = a0 * b0 |
62 | PCLMULQDQ 0x11 SHASH T1 # T1 = a1 * b1 | |
63 | PCLMULQDQ 0x00 T3 T2 # T2 = (a1 + a0) * (b1 + b0) | |
0e1227d3 HY |
64 | pxor DATA, T2 |
65 | pxor T1, T2 # T2 = a0 * b1 + a1 * b0 | |
66 | ||
67 | movaps T2, T3 | |
68 | pslldq $8, T3 | |
69 | psrldq $8, T2 | |
70 | pxor T3, DATA | |
71 | pxor T2, T1 # <T1:DATA> is result of | |
72 | # carry-less multiplication | |
73 | ||
74 | # first phase of the reduction | |
75 | movaps DATA, T3 | |
76 | psllq $1, T3 | |
77 | pxor DATA, T3 | |
78 | psllq $5, T3 | |
79 | pxor DATA, T3 | |
80 | psllq $57, T3 | |
81 | movaps T3, T2 | |
82 | pslldq $8, T2 | |
83 | psrldq $8, T3 | |
84 | pxor T2, DATA | |
85 | pxor T3, T1 | |
86 | ||
87 | # second phase of the reduction | |
88 | movaps DATA, T2 | |
89 | psrlq $5, T2 | |
90 | pxor DATA, T2 | |
91 | psrlq $1, T2 | |
92 | pxor DATA, T2 | |
93 | psrlq $1, T2 | |
94 | pxor T2, T1 | |
95 | pxor T1, DATA | |
96 | ret | |
97 | ||
98 | /* void clmul_ghash_mul(char *dst, const be128 *shash) */ | |
99 | ENTRY(clmul_ghash_mul) | |
100 | movups (%rdi), DATA | |
101 | movups (%rsi), SHASH | |
102 | movaps .Lbswap_mask, BSWAP | |
564ec0ec | 103 | PSHUFB_XMM BSWAP DATA |
0e1227d3 | 104 | call __clmul_gf128mul_ble |
564ec0ec | 105 | PSHUFB_XMM BSWAP DATA |
0e1227d3 HY |
106 | movups DATA, (%rdi) |
107 | ret | |
108 | ||
109 | /* | |
110 | * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, | |
111 | * const be128 *shash); | |
112 | */ | |
113 | ENTRY(clmul_ghash_update) | |
114 | cmp $16, %rdx | |
115 | jb .Lupdate_just_ret # check length | |
116 | movaps .Lbswap_mask, BSWAP | |
117 | movups (%rdi), DATA | |
118 | movups (%rcx), SHASH | |
564ec0ec | 119 | PSHUFB_XMM BSWAP DATA |
0e1227d3 HY |
120 | .align 4 |
121 | .Lupdate_loop: | |
122 | movups (%rsi), IN1 | |
564ec0ec | 123 | PSHUFB_XMM BSWAP IN1 |
0e1227d3 HY |
124 | pxor IN1, DATA |
125 | call __clmul_gf128mul_ble | |
126 | sub $16, %rdx | |
127 | add $16, %rsi | |
128 | cmp $16, %rdx | |
129 | jge .Lupdate_loop | |
564ec0ec | 130 | PSHUFB_XMM BSWAP DATA |
0e1227d3 HY |
131 | movups DATA, (%rdi) |
132 | .Lupdate_just_ret: | |
133 | ret | |
134 | ||
135 | /* | |
136 | * void clmul_ghash_setkey(be128 *shash, const u8 *key); | |
137 | * | |
138 | * Calculate hash_key << 1 mod poly | |
139 | */ | |
140 | ENTRY(clmul_ghash_setkey) | |
141 | movaps .Lbswap_mask, BSWAP | |
142 | movups (%rsi), %xmm0 | |
564ec0ec | 143 | PSHUFB_XMM BSWAP %xmm0 |
0e1227d3 HY |
144 | movaps %xmm0, %xmm1 |
145 | psllq $1, %xmm0 | |
146 | psrlq $63, %xmm1 | |
147 | movaps %xmm1, %xmm2 | |
148 | pslldq $8, %xmm1 | |
149 | psrldq $8, %xmm2 | |
150 | por %xmm1, %xmm0 | |
151 | # reduction | |
152 | pshufd $0b00100100, %xmm2, %xmm1 | |
153 | pcmpeqd .Ltwo_one, %xmm1 | |
154 | pand .Lpoly, %xmm1 | |
155 | pxor %xmm1, %xmm0 | |
156 | movups %xmm0, (%rdi) | |
157 | ret |