]>
Commit | Line | Data |
---|---|---|
54b6a1bd HY |
1 | /* |
2 | * Implement AES algorithm in Intel AES-NI instructions. | |
3 | * | |
4 | * The white paper of AES-NI instructions can be downloaded from: | |
5 | * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf | |
6 | * | |
7 | * Copyright (C) 2008, Intel Corp. | |
8 | * Author: Huang Ying <ying.huang@intel.com> | |
9 | * Vinodh Gopal <vinodh.gopal@intel.com> | |
10 | * Kahraman Akdemir | |
11 | * | |
12 | * This program is free software; you can redistribute it and/or modify | |
13 | * it under the terms of the GNU General Public License as published by | |
14 | * the Free Software Foundation; either version 2 of the License, or | |
15 | * (at your option) any later version. | |
16 | */ | |
17 | ||
18 | #include <linux/linkage.h> | |
b369e521 | 19 | #include <asm/inst.h> |
54b6a1bd HY |
20 | |
21 | .text | |
22 | ||
23 | #define STATE1 %xmm0 | |
24 | #define STATE2 %xmm4 | |
25 | #define STATE3 %xmm5 | |
26 | #define STATE4 %xmm6 | |
27 | #define STATE STATE1 | |
28 | #define IN1 %xmm1 | |
29 | #define IN2 %xmm7 | |
30 | #define IN3 %xmm8 | |
31 | #define IN4 %xmm9 | |
32 | #define IN IN1 | |
33 | #define KEY %xmm2 | |
34 | #define IV %xmm3 | |
35 | ||
36 | #define KEYP %rdi | |
37 | #define OUTP %rsi | |
38 | #define INP %rdx | |
39 | #define LEN %rcx | |
40 | #define IVP %r8 | |
41 | #define KLEN %r9d | |
42 | #define T1 %r10 | |
43 | #define TKEYP T1 | |
44 | #define T2 %r11 | |
45 | ||
46 | _key_expansion_128: | |
47 | _key_expansion_256a: | |
48 | pshufd $0b11111111, %xmm1, %xmm1 | |
49 | shufps $0b00010000, %xmm0, %xmm4 | |
50 | pxor %xmm4, %xmm0 | |
51 | shufps $0b10001100, %xmm0, %xmm4 | |
52 | pxor %xmm4, %xmm0 | |
53 | pxor %xmm1, %xmm0 | |
54 | movaps %xmm0, (%rcx) | |
55 | add $0x10, %rcx | |
56 | ret | |
57 | ||
58 | _key_expansion_192a: | |
59 | pshufd $0b01010101, %xmm1, %xmm1 | |
60 | shufps $0b00010000, %xmm0, %xmm4 | |
61 | pxor %xmm4, %xmm0 | |
62 | shufps $0b10001100, %xmm0, %xmm4 | |
63 | pxor %xmm4, %xmm0 | |
64 | pxor %xmm1, %xmm0 | |
65 | ||
66 | movaps %xmm2, %xmm5 | |
67 | movaps %xmm2, %xmm6 | |
68 | pslldq $4, %xmm5 | |
69 | pshufd $0b11111111, %xmm0, %xmm3 | |
70 | pxor %xmm3, %xmm2 | |
71 | pxor %xmm5, %xmm2 | |
72 | ||
73 | movaps %xmm0, %xmm1 | |
74 | shufps $0b01000100, %xmm0, %xmm6 | |
75 | movaps %xmm6, (%rcx) | |
76 | shufps $0b01001110, %xmm2, %xmm1 | |
77 | movaps %xmm1, 16(%rcx) | |
78 | add $0x20, %rcx | |
79 | ret | |
80 | ||
81 | _key_expansion_192b: | |
82 | pshufd $0b01010101, %xmm1, %xmm1 | |
83 | shufps $0b00010000, %xmm0, %xmm4 | |
84 | pxor %xmm4, %xmm0 | |
85 | shufps $0b10001100, %xmm0, %xmm4 | |
86 | pxor %xmm4, %xmm0 | |
87 | pxor %xmm1, %xmm0 | |
88 | ||
89 | movaps %xmm2, %xmm5 | |
90 | pslldq $4, %xmm5 | |
91 | pshufd $0b11111111, %xmm0, %xmm3 | |
92 | pxor %xmm3, %xmm2 | |
93 | pxor %xmm5, %xmm2 | |
94 | ||
95 | movaps %xmm0, (%rcx) | |
96 | add $0x10, %rcx | |
97 | ret | |
98 | ||
99 | _key_expansion_256b: | |
100 | pshufd $0b10101010, %xmm1, %xmm1 | |
101 | shufps $0b00010000, %xmm2, %xmm4 | |
102 | pxor %xmm4, %xmm2 | |
103 | shufps $0b10001100, %xmm2, %xmm4 | |
104 | pxor %xmm4, %xmm2 | |
105 | pxor %xmm1, %xmm2 | |
106 | movaps %xmm2, (%rcx) | |
107 | add $0x10, %rcx | |
108 | ret | |
109 | ||
110 | /* | |
111 | * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, | |
112 | * unsigned int key_len) | |
113 | */ | |
114 | ENTRY(aesni_set_key) | |
115 | movups (%rsi), %xmm0 # user key (first 16 bytes) | |
116 | movaps %xmm0, (%rdi) | |
117 | lea 0x10(%rdi), %rcx # key addr | |
118 | movl %edx, 480(%rdi) | |
119 | pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x | |
120 | cmp $24, %dl | |
121 | jb .Lenc_key128 | |
122 | je .Lenc_key192 | |
123 | movups 0x10(%rsi), %xmm2 # other user key | |
124 | movaps %xmm2, (%rcx) | |
125 | add $0x10, %rcx | |
b369e521 | 126 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
54b6a1bd | 127 | call _key_expansion_256a |
b369e521 | 128 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 |
54b6a1bd | 129 | call _key_expansion_256b |
b369e521 | 130 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
54b6a1bd | 131 | call _key_expansion_256a |
b369e521 | 132 | AESKEYGENASSIST 0x2 %xmm0 %xmm1 |
54b6a1bd | 133 | call _key_expansion_256b |
b369e521 | 134 | AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
54b6a1bd | 135 | call _key_expansion_256a |
b369e521 | 136 | AESKEYGENASSIST 0x4 %xmm0 %xmm1 |
54b6a1bd | 137 | call _key_expansion_256b |
b369e521 | 138 | AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
54b6a1bd | 139 | call _key_expansion_256a |
b369e521 | 140 | AESKEYGENASSIST 0x8 %xmm0 %xmm1 |
54b6a1bd | 141 | call _key_expansion_256b |
b369e521 | 142 | AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
54b6a1bd | 143 | call _key_expansion_256a |
b369e521 | 144 | AESKEYGENASSIST 0x10 %xmm0 %xmm1 |
54b6a1bd | 145 | call _key_expansion_256b |
b369e521 | 146 | AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
54b6a1bd | 147 | call _key_expansion_256a |
b369e521 | 148 | AESKEYGENASSIST 0x20 %xmm0 %xmm1 |
54b6a1bd | 149 | call _key_expansion_256b |
b369e521 | 150 | AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
54b6a1bd HY |
151 | call _key_expansion_256a |
152 | jmp .Ldec_key | |
153 | .Lenc_key192: | |
154 | movq 0x10(%rsi), %xmm2 # other user key | |
b369e521 | 155 | AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 |
54b6a1bd | 156 | call _key_expansion_192a |
b369e521 | 157 | AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 |
54b6a1bd | 158 | call _key_expansion_192b |
b369e521 | 159 | AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3 |
54b6a1bd | 160 | call _key_expansion_192a |
b369e521 | 161 | AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4 |
54b6a1bd | 162 | call _key_expansion_192b |
b369e521 | 163 | AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5 |
54b6a1bd | 164 | call _key_expansion_192a |
b369e521 | 165 | AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6 |
54b6a1bd | 166 | call _key_expansion_192b |
b369e521 | 167 | AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7 |
54b6a1bd | 168 | call _key_expansion_192a |
b369e521 | 169 | AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8 |
54b6a1bd HY |
170 | call _key_expansion_192b |
171 | jmp .Ldec_key | |
172 | .Lenc_key128: | |
b369e521 | 173 | AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1 |
54b6a1bd | 174 | call _key_expansion_128 |
b369e521 | 175 | AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2 |
54b6a1bd | 176 | call _key_expansion_128 |
b369e521 | 177 | AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3 |
54b6a1bd | 178 | call _key_expansion_128 |
b369e521 | 179 | AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4 |
54b6a1bd | 180 | call _key_expansion_128 |
b369e521 | 181 | AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5 |
54b6a1bd | 182 | call _key_expansion_128 |
b369e521 | 183 | AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6 |
54b6a1bd | 184 | call _key_expansion_128 |
b369e521 | 185 | AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7 |
54b6a1bd | 186 | call _key_expansion_128 |
b369e521 | 187 | AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8 |
54b6a1bd | 188 | call _key_expansion_128 |
b369e521 | 189 | AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9 |
54b6a1bd | 190 | call _key_expansion_128 |
b369e521 | 191 | AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 |
54b6a1bd HY |
192 | call _key_expansion_128 |
193 | .Ldec_key: | |
194 | sub $0x10, %rcx | |
195 | movaps (%rdi), %xmm0 | |
196 | movaps (%rcx), %xmm1 | |
197 | movaps %xmm0, 240(%rcx) | |
198 | movaps %xmm1, 240(%rdi) | |
199 | add $0x10, %rdi | |
200 | lea 240-16(%rcx), %rsi | |
201 | .align 4 | |
202 | .Ldec_key_loop: | |
203 | movaps (%rdi), %xmm0 | |
b369e521 | 204 | AESIMC %xmm0 %xmm1 |
54b6a1bd HY |
205 | movaps %xmm1, (%rsi) |
206 | add $0x10, %rdi | |
207 | sub $0x10, %rsi | |
208 | cmp %rcx, %rdi | |
209 | jb .Ldec_key_loop | |
210 | xor %rax, %rax | |
211 | ret | |
212 | ||
213 | /* | |
214 | * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | |
215 | */ | |
216 | ENTRY(aesni_enc) | |
217 | movl 480(KEYP), KLEN # key length | |
218 | movups (INP), STATE # input | |
219 | call _aesni_enc1 | |
220 | movups STATE, (OUTP) # output | |
221 | ret | |
222 | ||
223 | /* | |
224 | * _aesni_enc1: internal ABI | |
225 | * input: | |
226 | * KEYP: key struct pointer | |
227 | * KLEN: round count | |
228 | * STATE: initial state (input) | |
229 | * output: | |
230 | * STATE: finial state (output) | |
231 | * changed: | |
232 | * KEY | |
233 | * TKEYP (T1) | |
234 | */ | |
235 | _aesni_enc1: | |
236 | movaps (KEYP), KEY # key | |
237 | mov KEYP, TKEYP | |
238 | pxor KEY, STATE # round 0 | |
239 | add $0x30, TKEYP | |
240 | cmp $24, KLEN | |
241 | jb .Lenc128 | |
242 | lea 0x20(TKEYP), TKEYP | |
243 | je .Lenc192 | |
244 | add $0x20, TKEYP | |
245 | movaps -0x60(TKEYP), KEY | |
b369e521 | 246 | AESENC KEY STATE |
54b6a1bd | 247 | movaps -0x50(TKEYP), KEY |
b369e521 | 248 | AESENC KEY STATE |
54b6a1bd HY |
249 | .align 4 |
250 | .Lenc192: | |
251 | movaps -0x40(TKEYP), KEY | |
b369e521 | 252 | AESENC KEY STATE |
54b6a1bd | 253 | movaps -0x30(TKEYP), KEY |
b369e521 | 254 | AESENC KEY STATE |
54b6a1bd HY |
255 | .align 4 |
256 | .Lenc128: | |
257 | movaps -0x20(TKEYP), KEY | |
b369e521 | 258 | AESENC KEY STATE |
54b6a1bd | 259 | movaps -0x10(TKEYP), KEY |
b369e521 | 260 | AESENC KEY STATE |
54b6a1bd | 261 | movaps (TKEYP), KEY |
b369e521 | 262 | AESENC KEY STATE |
54b6a1bd | 263 | movaps 0x10(TKEYP), KEY |
b369e521 | 264 | AESENC KEY STATE |
54b6a1bd | 265 | movaps 0x20(TKEYP), KEY |
b369e521 | 266 | AESENC KEY STATE |
54b6a1bd | 267 | movaps 0x30(TKEYP), KEY |
b369e521 | 268 | AESENC KEY STATE |
54b6a1bd | 269 | movaps 0x40(TKEYP), KEY |
b369e521 | 270 | AESENC KEY STATE |
54b6a1bd | 271 | movaps 0x50(TKEYP), KEY |
b369e521 | 272 | AESENC KEY STATE |
54b6a1bd | 273 | movaps 0x60(TKEYP), KEY |
b369e521 | 274 | AESENC KEY STATE |
54b6a1bd | 275 | movaps 0x70(TKEYP), KEY |
b369e521 | 276 | AESENCLAST KEY STATE |
54b6a1bd HY |
277 | ret |
278 | ||
279 | /* | |
280 | * _aesni_enc4: internal ABI | |
281 | * input: | |
282 | * KEYP: key struct pointer | |
283 | * KLEN: round count | |
284 | * STATE1: initial state (input) | |
285 | * STATE2 | |
286 | * STATE3 | |
287 | * STATE4 | |
288 | * output: | |
289 | * STATE1: finial state (output) | |
290 | * STATE2 | |
291 | * STATE3 | |
292 | * STATE4 | |
293 | * changed: | |
294 | * KEY | |
295 | * TKEYP (T1) | |
296 | */ | |
297 | _aesni_enc4: | |
298 | movaps (KEYP), KEY # key | |
299 | mov KEYP, TKEYP | |
300 | pxor KEY, STATE1 # round 0 | |
301 | pxor KEY, STATE2 | |
302 | pxor KEY, STATE3 | |
303 | pxor KEY, STATE4 | |
304 | add $0x30, TKEYP | |
305 | cmp $24, KLEN | |
306 | jb .L4enc128 | |
307 | lea 0x20(TKEYP), TKEYP | |
308 | je .L4enc192 | |
309 | add $0x20, TKEYP | |
310 | movaps -0x60(TKEYP), KEY | |
b369e521 HY |
311 | AESENC KEY STATE1 |
312 | AESENC KEY STATE2 | |
313 | AESENC KEY STATE3 | |
314 | AESENC KEY STATE4 | |
54b6a1bd | 315 | movaps -0x50(TKEYP), KEY |
b369e521 HY |
316 | AESENC KEY STATE1 |
317 | AESENC KEY STATE2 | |
318 | AESENC KEY STATE3 | |
319 | AESENC KEY STATE4 | |
54b6a1bd HY |
320 | #.align 4 |
321 | .L4enc192: | |
322 | movaps -0x40(TKEYP), KEY | |
b369e521 HY |
323 | AESENC KEY STATE1 |
324 | AESENC KEY STATE2 | |
325 | AESENC KEY STATE3 | |
326 | AESENC KEY STATE4 | |
54b6a1bd | 327 | movaps -0x30(TKEYP), KEY |
b369e521 HY |
328 | AESENC KEY STATE1 |
329 | AESENC KEY STATE2 | |
330 | AESENC KEY STATE3 | |
331 | AESENC KEY STATE4 | |
54b6a1bd HY |
332 | #.align 4 |
333 | .L4enc128: | |
334 | movaps -0x20(TKEYP), KEY | |
b369e521 HY |
335 | AESENC KEY STATE1 |
336 | AESENC KEY STATE2 | |
337 | AESENC KEY STATE3 | |
338 | AESENC KEY STATE4 | |
54b6a1bd | 339 | movaps -0x10(TKEYP), KEY |
b369e521 HY |
340 | AESENC KEY STATE1 |
341 | AESENC KEY STATE2 | |
342 | AESENC KEY STATE3 | |
343 | AESENC KEY STATE4 | |
54b6a1bd | 344 | movaps (TKEYP), KEY |
b369e521 HY |
345 | AESENC KEY STATE1 |
346 | AESENC KEY STATE2 | |
347 | AESENC KEY STATE3 | |
348 | AESENC KEY STATE4 | |
54b6a1bd | 349 | movaps 0x10(TKEYP), KEY |
b369e521 HY |
350 | AESENC KEY STATE1 |
351 | AESENC KEY STATE2 | |
352 | AESENC KEY STATE3 | |
353 | AESENC KEY STATE4 | |
54b6a1bd | 354 | movaps 0x20(TKEYP), KEY |
b369e521 HY |
355 | AESENC KEY STATE1 |
356 | AESENC KEY STATE2 | |
357 | AESENC KEY STATE3 | |
358 | AESENC KEY STATE4 | |
54b6a1bd | 359 | movaps 0x30(TKEYP), KEY |
b369e521 HY |
360 | AESENC KEY STATE1 |
361 | AESENC KEY STATE2 | |
362 | AESENC KEY STATE3 | |
363 | AESENC KEY STATE4 | |
54b6a1bd | 364 | movaps 0x40(TKEYP), KEY |
b369e521 HY |
365 | AESENC KEY STATE1 |
366 | AESENC KEY STATE2 | |
367 | AESENC KEY STATE3 | |
368 | AESENC KEY STATE4 | |
54b6a1bd | 369 | movaps 0x50(TKEYP), KEY |
b369e521 HY |
370 | AESENC KEY STATE1 |
371 | AESENC KEY STATE2 | |
372 | AESENC KEY STATE3 | |
373 | AESENC KEY STATE4 | |
54b6a1bd | 374 | movaps 0x60(TKEYP), KEY |
b369e521 HY |
375 | AESENC KEY STATE1 |
376 | AESENC KEY STATE2 | |
377 | AESENC KEY STATE3 | |
378 | AESENC KEY STATE4 | |
54b6a1bd | 379 | movaps 0x70(TKEYP), KEY |
b369e521 HY |
380 | AESENCLAST KEY STATE1 # last round |
381 | AESENCLAST KEY STATE2 | |
382 | AESENCLAST KEY STATE3 | |
383 | AESENCLAST KEY STATE4 | |
54b6a1bd HY |
384 | ret |
385 | ||
386 | /* | |
387 | * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) | |
388 | */ | |
389 | ENTRY(aesni_dec) | |
390 | mov 480(KEYP), KLEN # key length | |
391 | add $240, KEYP | |
392 | movups (INP), STATE # input | |
393 | call _aesni_dec1 | |
394 | movups STATE, (OUTP) #output | |
395 | ret | |
396 | ||
397 | /* | |
398 | * _aesni_dec1: internal ABI | |
399 | * input: | |
400 | * KEYP: key struct pointer | |
401 | * KLEN: key length | |
402 | * STATE: initial state (input) | |
403 | * output: | |
404 | * STATE: finial state (output) | |
405 | * changed: | |
406 | * KEY | |
407 | * TKEYP (T1) | |
408 | */ | |
409 | _aesni_dec1: | |
410 | movaps (KEYP), KEY # key | |
411 | mov KEYP, TKEYP | |
412 | pxor KEY, STATE # round 0 | |
413 | add $0x30, TKEYP | |
414 | cmp $24, KLEN | |
415 | jb .Ldec128 | |
416 | lea 0x20(TKEYP), TKEYP | |
417 | je .Ldec192 | |
418 | add $0x20, TKEYP | |
419 | movaps -0x60(TKEYP), KEY | |
b369e521 | 420 | AESDEC KEY STATE |
54b6a1bd | 421 | movaps -0x50(TKEYP), KEY |
b369e521 | 422 | AESDEC KEY STATE |
54b6a1bd HY |
423 | .align 4 |
424 | .Ldec192: | |
425 | movaps -0x40(TKEYP), KEY | |
b369e521 | 426 | AESDEC KEY STATE |
54b6a1bd | 427 | movaps -0x30(TKEYP), KEY |
b369e521 | 428 | AESDEC KEY STATE |
54b6a1bd HY |
429 | .align 4 |
430 | .Ldec128: | |
431 | movaps -0x20(TKEYP), KEY | |
b369e521 | 432 | AESDEC KEY STATE |
54b6a1bd | 433 | movaps -0x10(TKEYP), KEY |
b369e521 | 434 | AESDEC KEY STATE |
54b6a1bd | 435 | movaps (TKEYP), KEY |
b369e521 | 436 | AESDEC KEY STATE |
54b6a1bd | 437 | movaps 0x10(TKEYP), KEY |
b369e521 | 438 | AESDEC KEY STATE |
54b6a1bd | 439 | movaps 0x20(TKEYP), KEY |
b369e521 | 440 | AESDEC KEY STATE |
54b6a1bd | 441 | movaps 0x30(TKEYP), KEY |
b369e521 | 442 | AESDEC KEY STATE |
54b6a1bd | 443 | movaps 0x40(TKEYP), KEY |
b369e521 | 444 | AESDEC KEY STATE |
54b6a1bd | 445 | movaps 0x50(TKEYP), KEY |
b369e521 | 446 | AESDEC KEY STATE |
54b6a1bd | 447 | movaps 0x60(TKEYP), KEY |
b369e521 | 448 | AESDEC KEY STATE |
54b6a1bd | 449 | movaps 0x70(TKEYP), KEY |
b369e521 | 450 | AESDECLAST KEY STATE |
54b6a1bd HY |
451 | ret |
452 | ||
453 | /* | |
454 | * _aesni_dec4: internal ABI | |
455 | * input: | |
456 | * KEYP: key struct pointer | |
457 | * KLEN: key length | |
458 | * STATE1: initial state (input) | |
459 | * STATE2 | |
460 | * STATE3 | |
461 | * STATE4 | |
462 | * output: | |
463 | * STATE1: finial state (output) | |
464 | * STATE2 | |
465 | * STATE3 | |
466 | * STATE4 | |
467 | * changed: | |
468 | * KEY | |
469 | * TKEYP (T1) | |
470 | */ | |
471 | _aesni_dec4: | |
472 | movaps (KEYP), KEY # key | |
473 | mov KEYP, TKEYP | |
474 | pxor KEY, STATE1 # round 0 | |
475 | pxor KEY, STATE2 | |
476 | pxor KEY, STATE3 | |
477 | pxor KEY, STATE4 | |
478 | add $0x30, TKEYP | |
479 | cmp $24, KLEN | |
480 | jb .L4dec128 | |
481 | lea 0x20(TKEYP), TKEYP | |
482 | je .L4dec192 | |
483 | add $0x20, TKEYP | |
484 | movaps -0x60(TKEYP), KEY | |
b369e521 HY |
485 | AESDEC KEY STATE1 |
486 | AESDEC KEY STATE2 | |
487 | AESDEC KEY STATE3 | |
488 | AESDEC KEY STATE4 | |
54b6a1bd | 489 | movaps -0x50(TKEYP), KEY |
b369e521 HY |
490 | AESDEC KEY STATE1 |
491 | AESDEC KEY STATE2 | |
492 | AESDEC KEY STATE3 | |
493 | AESDEC KEY STATE4 | |
54b6a1bd HY |
494 | .align 4 |
495 | .L4dec192: | |
496 | movaps -0x40(TKEYP), KEY | |
b369e521 HY |
497 | AESDEC KEY STATE1 |
498 | AESDEC KEY STATE2 | |
499 | AESDEC KEY STATE3 | |
500 | AESDEC KEY STATE4 | |
54b6a1bd | 501 | movaps -0x30(TKEYP), KEY |
b369e521 HY |
502 | AESDEC KEY STATE1 |
503 | AESDEC KEY STATE2 | |
504 | AESDEC KEY STATE3 | |
505 | AESDEC KEY STATE4 | |
54b6a1bd HY |
506 | .align 4 |
507 | .L4dec128: | |
508 | movaps -0x20(TKEYP), KEY | |
b369e521 HY |
509 | AESDEC KEY STATE1 |
510 | AESDEC KEY STATE2 | |
511 | AESDEC KEY STATE3 | |
512 | AESDEC KEY STATE4 | |
54b6a1bd | 513 | movaps -0x10(TKEYP), KEY |
b369e521 HY |
514 | AESDEC KEY STATE1 |
515 | AESDEC KEY STATE2 | |
516 | AESDEC KEY STATE3 | |
517 | AESDEC KEY STATE4 | |
54b6a1bd | 518 | movaps (TKEYP), KEY |
b369e521 HY |
519 | AESDEC KEY STATE1 |
520 | AESDEC KEY STATE2 | |
521 | AESDEC KEY STATE3 | |
522 | AESDEC KEY STATE4 | |
54b6a1bd | 523 | movaps 0x10(TKEYP), KEY |
b369e521 HY |
524 | AESDEC KEY STATE1 |
525 | AESDEC KEY STATE2 | |
526 | AESDEC KEY STATE3 | |
527 | AESDEC KEY STATE4 | |
54b6a1bd | 528 | movaps 0x20(TKEYP), KEY |
b369e521 HY |
529 | AESDEC KEY STATE1 |
530 | AESDEC KEY STATE2 | |
531 | AESDEC KEY STATE3 | |
532 | AESDEC KEY STATE4 | |
54b6a1bd | 533 | movaps 0x30(TKEYP), KEY |
b369e521 HY |
534 | AESDEC KEY STATE1 |
535 | AESDEC KEY STATE2 | |
536 | AESDEC KEY STATE3 | |
537 | AESDEC KEY STATE4 | |
54b6a1bd | 538 | movaps 0x40(TKEYP), KEY |
b369e521 HY |
539 | AESDEC KEY STATE1 |
540 | AESDEC KEY STATE2 | |
541 | AESDEC KEY STATE3 | |
542 | AESDEC KEY STATE4 | |
54b6a1bd | 543 | movaps 0x50(TKEYP), KEY |
b369e521 HY |
544 | AESDEC KEY STATE1 |
545 | AESDEC KEY STATE2 | |
546 | AESDEC KEY STATE3 | |
547 | AESDEC KEY STATE4 | |
54b6a1bd | 548 | movaps 0x60(TKEYP), KEY |
b369e521 HY |
549 | AESDEC KEY STATE1 |
550 | AESDEC KEY STATE2 | |
551 | AESDEC KEY STATE3 | |
552 | AESDEC KEY STATE4 | |
54b6a1bd | 553 | movaps 0x70(TKEYP), KEY |
b369e521 HY |
554 | AESDECLAST KEY STATE1 # last round |
555 | AESDECLAST KEY STATE2 | |
556 | AESDECLAST KEY STATE3 | |
557 | AESDECLAST KEY STATE4 | |
54b6a1bd HY |
558 | ret |
559 | ||
560 | /* | |
561 | * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | |
562 | * size_t len) | |
563 | */ | |
564 | ENTRY(aesni_ecb_enc) | |
565 | test LEN, LEN # check length | |
566 | jz .Lecb_enc_ret | |
567 | mov 480(KEYP), KLEN | |
568 | cmp $16, LEN | |
569 | jb .Lecb_enc_ret | |
570 | cmp $64, LEN | |
571 | jb .Lecb_enc_loop1 | |
572 | .align 4 | |
573 | .Lecb_enc_loop4: | |
574 | movups (INP), STATE1 | |
575 | movups 0x10(INP), STATE2 | |
576 | movups 0x20(INP), STATE3 | |
577 | movups 0x30(INP), STATE4 | |
578 | call _aesni_enc4 | |
579 | movups STATE1, (OUTP) | |
580 | movups STATE2, 0x10(OUTP) | |
581 | movups STATE3, 0x20(OUTP) | |
582 | movups STATE4, 0x30(OUTP) | |
583 | sub $64, LEN | |
584 | add $64, INP | |
585 | add $64, OUTP | |
586 | cmp $64, LEN | |
587 | jge .Lecb_enc_loop4 | |
588 | cmp $16, LEN | |
589 | jb .Lecb_enc_ret | |
590 | .align 4 | |
591 | .Lecb_enc_loop1: | |
592 | movups (INP), STATE1 | |
593 | call _aesni_enc1 | |
594 | movups STATE1, (OUTP) | |
595 | sub $16, LEN | |
596 | add $16, INP | |
597 | add $16, OUTP | |
598 | cmp $16, LEN | |
599 | jge .Lecb_enc_loop1 | |
600 | .Lecb_enc_ret: | |
601 | ret | |
602 | ||
603 | /* | |
604 | * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | |
605 | * size_t len); | |
606 | */ | |
607 | ENTRY(aesni_ecb_dec) | |
608 | test LEN, LEN | |
609 | jz .Lecb_dec_ret | |
610 | mov 480(KEYP), KLEN | |
611 | add $240, KEYP | |
612 | cmp $16, LEN | |
613 | jb .Lecb_dec_ret | |
614 | cmp $64, LEN | |
615 | jb .Lecb_dec_loop1 | |
616 | .align 4 | |
617 | .Lecb_dec_loop4: | |
618 | movups (INP), STATE1 | |
619 | movups 0x10(INP), STATE2 | |
620 | movups 0x20(INP), STATE3 | |
621 | movups 0x30(INP), STATE4 | |
622 | call _aesni_dec4 | |
623 | movups STATE1, (OUTP) | |
624 | movups STATE2, 0x10(OUTP) | |
625 | movups STATE3, 0x20(OUTP) | |
626 | movups STATE4, 0x30(OUTP) | |
627 | sub $64, LEN | |
628 | add $64, INP | |
629 | add $64, OUTP | |
630 | cmp $64, LEN | |
631 | jge .Lecb_dec_loop4 | |
632 | cmp $16, LEN | |
633 | jb .Lecb_dec_ret | |
634 | .align 4 | |
635 | .Lecb_dec_loop1: | |
636 | movups (INP), STATE1 | |
637 | call _aesni_dec1 | |
638 | movups STATE1, (OUTP) | |
639 | sub $16, LEN | |
640 | add $16, INP | |
641 | add $16, OUTP | |
642 | cmp $16, LEN | |
643 | jge .Lecb_dec_loop1 | |
644 | .Lecb_dec_ret: | |
645 | ret | |
646 | ||
647 | /* | |
648 | * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | |
649 | * size_t len, u8 *iv) | |
650 | */ | |
651 | ENTRY(aesni_cbc_enc) | |
652 | cmp $16, LEN | |
653 | jb .Lcbc_enc_ret | |
654 | mov 480(KEYP), KLEN | |
655 | movups (IVP), STATE # load iv as initial state | |
656 | .align 4 | |
657 | .Lcbc_enc_loop: | |
658 | movups (INP), IN # load input | |
659 | pxor IN, STATE | |
660 | call _aesni_enc1 | |
661 | movups STATE, (OUTP) # store output | |
662 | sub $16, LEN | |
663 | add $16, INP | |
664 | add $16, OUTP | |
665 | cmp $16, LEN | |
666 | jge .Lcbc_enc_loop | |
667 | movups STATE, (IVP) | |
668 | .Lcbc_enc_ret: | |
669 | ret | |
670 | ||
671 | /* | |
672 | * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src, | |
673 | * size_t len, u8 *iv) | |
674 | */ | |
675 | ENTRY(aesni_cbc_dec) | |
676 | cmp $16, LEN | |
e6efaa02 | 677 | jb .Lcbc_dec_just_ret |
54b6a1bd HY |
678 | mov 480(KEYP), KLEN |
679 | add $240, KEYP | |
680 | movups (IVP), IV | |
681 | cmp $64, LEN | |
682 | jb .Lcbc_dec_loop1 | |
683 | .align 4 | |
684 | .Lcbc_dec_loop4: | |
685 | movups (INP), IN1 | |
686 | movaps IN1, STATE1 | |
687 | movups 0x10(INP), IN2 | |
688 | movaps IN2, STATE2 | |
689 | movups 0x20(INP), IN3 | |
690 | movaps IN3, STATE3 | |
691 | movups 0x30(INP), IN4 | |
692 | movaps IN4, STATE4 | |
693 | call _aesni_dec4 | |
694 | pxor IV, STATE1 | |
695 | pxor IN1, STATE2 | |
696 | pxor IN2, STATE3 | |
697 | pxor IN3, STATE4 | |
698 | movaps IN4, IV | |
699 | movups STATE1, (OUTP) | |
700 | movups STATE2, 0x10(OUTP) | |
701 | movups STATE3, 0x20(OUTP) | |
702 | movups STATE4, 0x30(OUTP) | |
703 | sub $64, LEN | |
704 | add $64, INP | |
705 | add $64, OUTP | |
706 | cmp $64, LEN | |
707 | jge .Lcbc_dec_loop4 | |
708 | cmp $16, LEN | |
709 | jb .Lcbc_dec_ret | |
710 | .align 4 | |
711 | .Lcbc_dec_loop1: | |
712 | movups (INP), IN | |
713 | movaps IN, STATE | |
714 | call _aesni_dec1 | |
715 | pxor IV, STATE | |
716 | movups STATE, (OUTP) | |
717 | movaps IN, IV | |
718 | sub $16, LEN | |
719 | add $16, INP | |
720 | add $16, OUTP | |
721 | cmp $16, LEN | |
722 | jge .Lcbc_dec_loop1 | |
54b6a1bd | 723 | .Lcbc_dec_ret: |
e6efaa02 HY |
724 | movups IV, (IVP) |
725 | .Lcbc_dec_just_ret: | |
54b6a1bd | 726 | ret |