]> bbs.cooldavid.org Git - net-next-2.6.git/blame - arch/x86/crypto/aesni-intel_asm.S
crypto: aesni-intel - Use gas macro for AES-NI instructions
[net-next-2.6.git] / arch / x86 / crypto / aesni-intel_asm.S
CommitLineData
54b6a1bd
HY
1/*
2 * Implement AES algorithm in Intel AES-NI instructions.
3 *
4 * The white paper of AES-NI instructions can be downloaded from:
5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
6 *
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 * Vinodh Gopal <vinodh.gopal@intel.com>
10 * Kahraman Akdemir
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 */
17
18#include <linux/linkage.h>
b369e521 19#include <asm/inst.h>
54b6a1bd
HY
20
21.text
22
23#define STATE1 %xmm0
24#define STATE2 %xmm4
25#define STATE3 %xmm5
26#define STATE4 %xmm6
27#define STATE STATE1
28#define IN1 %xmm1
29#define IN2 %xmm7
30#define IN3 %xmm8
31#define IN4 %xmm9
32#define IN IN1
33#define KEY %xmm2
34#define IV %xmm3
35
36#define KEYP %rdi
37#define OUTP %rsi
38#define INP %rdx
39#define LEN %rcx
40#define IVP %r8
41#define KLEN %r9d
42#define T1 %r10
43#define TKEYP T1
44#define T2 %r11
45
46_key_expansion_128:
47_key_expansion_256a:
48 pshufd $0b11111111, %xmm1, %xmm1
49 shufps $0b00010000, %xmm0, %xmm4
50 pxor %xmm4, %xmm0
51 shufps $0b10001100, %xmm0, %xmm4
52 pxor %xmm4, %xmm0
53 pxor %xmm1, %xmm0
54 movaps %xmm0, (%rcx)
55 add $0x10, %rcx
56 ret
57
58_key_expansion_192a:
59 pshufd $0b01010101, %xmm1, %xmm1
60 shufps $0b00010000, %xmm0, %xmm4
61 pxor %xmm4, %xmm0
62 shufps $0b10001100, %xmm0, %xmm4
63 pxor %xmm4, %xmm0
64 pxor %xmm1, %xmm0
65
66 movaps %xmm2, %xmm5
67 movaps %xmm2, %xmm6
68 pslldq $4, %xmm5
69 pshufd $0b11111111, %xmm0, %xmm3
70 pxor %xmm3, %xmm2
71 pxor %xmm5, %xmm2
72
73 movaps %xmm0, %xmm1
74 shufps $0b01000100, %xmm0, %xmm6
75 movaps %xmm6, (%rcx)
76 shufps $0b01001110, %xmm2, %xmm1
77 movaps %xmm1, 16(%rcx)
78 add $0x20, %rcx
79 ret
80
81_key_expansion_192b:
82 pshufd $0b01010101, %xmm1, %xmm1
83 shufps $0b00010000, %xmm0, %xmm4
84 pxor %xmm4, %xmm0
85 shufps $0b10001100, %xmm0, %xmm4
86 pxor %xmm4, %xmm0
87 pxor %xmm1, %xmm0
88
89 movaps %xmm2, %xmm5
90 pslldq $4, %xmm5
91 pshufd $0b11111111, %xmm0, %xmm3
92 pxor %xmm3, %xmm2
93 pxor %xmm5, %xmm2
94
95 movaps %xmm0, (%rcx)
96 add $0x10, %rcx
97 ret
98
99_key_expansion_256b:
100 pshufd $0b10101010, %xmm1, %xmm1
101 shufps $0b00010000, %xmm2, %xmm4
102 pxor %xmm4, %xmm2
103 shufps $0b10001100, %xmm2, %xmm4
104 pxor %xmm4, %xmm2
105 pxor %xmm1, %xmm2
106 movaps %xmm2, (%rcx)
107 add $0x10, %rcx
108 ret
109
110/*
111 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
112 * unsigned int key_len)
113 */
114ENTRY(aesni_set_key)
115 movups (%rsi), %xmm0 # user key (first 16 bytes)
116 movaps %xmm0, (%rdi)
117 lea 0x10(%rdi), %rcx # key addr
118 movl %edx, 480(%rdi)
119 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
120 cmp $24, %dl
121 jb .Lenc_key128
122 je .Lenc_key192
123 movups 0x10(%rsi), %xmm2 # other user key
124 movaps %xmm2, (%rcx)
125 add $0x10, %rcx
b369e521 126 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
54b6a1bd 127 call _key_expansion_256a
b369e521 128 AESKEYGENASSIST 0x1 %xmm0 %xmm1
54b6a1bd 129 call _key_expansion_256b
b369e521 130 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
54b6a1bd 131 call _key_expansion_256a
b369e521 132 AESKEYGENASSIST 0x2 %xmm0 %xmm1
54b6a1bd 133 call _key_expansion_256b
b369e521 134 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
54b6a1bd 135 call _key_expansion_256a
b369e521 136 AESKEYGENASSIST 0x4 %xmm0 %xmm1
54b6a1bd 137 call _key_expansion_256b
b369e521 138 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
54b6a1bd 139 call _key_expansion_256a
b369e521 140 AESKEYGENASSIST 0x8 %xmm0 %xmm1
54b6a1bd 141 call _key_expansion_256b
b369e521 142 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
54b6a1bd 143 call _key_expansion_256a
b369e521 144 AESKEYGENASSIST 0x10 %xmm0 %xmm1
54b6a1bd 145 call _key_expansion_256b
b369e521 146 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
54b6a1bd 147 call _key_expansion_256a
b369e521 148 AESKEYGENASSIST 0x20 %xmm0 %xmm1
54b6a1bd 149 call _key_expansion_256b
b369e521 150 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
54b6a1bd
HY
151 call _key_expansion_256a
152 jmp .Ldec_key
153.Lenc_key192:
154 movq 0x10(%rsi), %xmm2 # other user key
b369e521 155 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
54b6a1bd 156 call _key_expansion_192a
b369e521 157 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
54b6a1bd 158 call _key_expansion_192b
b369e521 159 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
54b6a1bd 160 call _key_expansion_192a
b369e521 161 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
54b6a1bd 162 call _key_expansion_192b
b369e521 163 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
54b6a1bd 164 call _key_expansion_192a
b369e521 165 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
54b6a1bd 166 call _key_expansion_192b
b369e521 167 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
54b6a1bd 168 call _key_expansion_192a
b369e521 169 AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
54b6a1bd
HY
170 call _key_expansion_192b
171 jmp .Ldec_key
172.Lenc_key128:
b369e521 173 AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
54b6a1bd 174 call _key_expansion_128
b369e521 175 AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
54b6a1bd 176 call _key_expansion_128
b369e521 177 AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
54b6a1bd 178 call _key_expansion_128
b369e521 179 AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
54b6a1bd 180 call _key_expansion_128
b369e521 181 AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
54b6a1bd 182 call _key_expansion_128
b369e521 183 AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
54b6a1bd 184 call _key_expansion_128
b369e521 185 AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
54b6a1bd 186 call _key_expansion_128
b369e521 187 AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
54b6a1bd 188 call _key_expansion_128
b369e521 189 AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
54b6a1bd 190 call _key_expansion_128
b369e521 191 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
54b6a1bd
HY
192 call _key_expansion_128
193.Ldec_key:
194 sub $0x10, %rcx
195 movaps (%rdi), %xmm0
196 movaps (%rcx), %xmm1
197 movaps %xmm0, 240(%rcx)
198 movaps %xmm1, 240(%rdi)
199 add $0x10, %rdi
200 lea 240-16(%rcx), %rsi
201.align 4
202.Ldec_key_loop:
203 movaps (%rdi), %xmm0
b369e521 204 AESIMC %xmm0 %xmm1
54b6a1bd
HY
205 movaps %xmm1, (%rsi)
206 add $0x10, %rdi
207 sub $0x10, %rsi
208 cmp %rcx, %rdi
209 jb .Ldec_key_loop
210 xor %rax, %rax
211 ret
212
213/*
214 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
215 */
216ENTRY(aesni_enc)
217 movl 480(KEYP), KLEN # key length
218 movups (INP), STATE # input
219 call _aesni_enc1
220 movups STATE, (OUTP) # output
221 ret
222
223/*
224 * _aesni_enc1: internal ABI
225 * input:
226 * KEYP: key struct pointer
227 * KLEN: round count
228 * STATE: initial state (input)
229 * output:
230 * STATE: finial state (output)
231 * changed:
232 * KEY
233 * TKEYP (T1)
234 */
235_aesni_enc1:
236 movaps (KEYP), KEY # key
237 mov KEYP, TKEYP
238 pxor KEY, STATE # round 0
239 add $0x30, TKEYP
240 cmp $24, KLEN
241 jb .Lenc128
242 lea 0x20(TKEYP), TKEYP
243 je .Lenc192
244 add $0x20, TKEYP
245 movaps -0x60(TKEYP), KEY
b369e521 246 AESENC KEY STATE
54b6a1bd 247 movaps -0x50(TKEYP), KEY
b369e521 248 AESENC KEY STATE
54b6a1bd
HY
249.align 4
250.Lenc192:
251 movaps -0x40(TKEYP), KEY
b369e521 252 AESENC KEY STATE
54b6a1bd 253 movaps -0x30(TKEYP), KEY
b369e521 254 AESENC KEY STATE
54b6a1bd
HY
255.align 4
256.Lenc128:
257 movaps -0x20(TKEYP), KEY
b369e521 258 AESENC KEY STATE
54b6a1bd 259 movaps -0x10(TKEYP), KEY
b369e521 260 AESENC KEY STATE
54b6a1bd 261 movaps (TKEYP), KEY
b369e521 262 AESENC KEY STATE
54b6a1bd 263 movaps 0x10(TKEYP), KEY
b369e521 264 AESENC KEY STATE
54b6a1bd 265 movaps 0x20(TKEYP), KEY
b369e521 266 AESENC KEY STATE
54b6a1bd 267 movaps 0x30(TKEYP), KEY
b369e521 268 AESENC KEY STATE
54b6a1bd 269 movaps 0x40(TKEYP), KEY
b369e521 270 AESENC KEY STATE
54b6a1bd 271 movaps 0x50(TKEYP), KEY
b369e521 272 AESENC KEY STATE
54b6a1bd 273 movaps 0x60(TKEYP), KEY
b369e521 274 AESENC KEY STATE
54b6a1bd 275 movaps 0x70(TKEYP), KEY
b369e521 276 AESENCLAST KEY STATE
54b6a1bd
HY
277 ret
278
279/*
280 * _aesni_enc4: internal ABI
281 * input:
282 * KEYP: key struct pointer
283 * KLEN: round count
284 * STATE1: initial state (input)
285 * STATE2
286 * STATE3
287 * STATE4
288 * output:
289 * STATE1: finial state (output)
290 * STATE2
291 * STATE3
292 * STATE4
293 * changed:
294 * KEY
295 * TKEYP (T1)
296 */
297_aesni_enc4:
298 movaps (KEYP), KEY # key
299 mov KEYP, TKEYP
300 pxor KEY, STATE1 # round 0
301 pxor KEY, STATE2
302 pxor KEY, STATE3
303 pxor KEY, STATE4
304 add $0x30, TKEYP
305 cmp $24, KLEN
306 jb .L4enc128
307 lea 0x20(TKEYP), TKEYP
308 je .L4enc192
309 add $0x20, TKEYP
310 movaps -0x60(TKEYP), KEY
b369e521
HY
311 AESENC KEY STATE1
312 AESENC KEY STATE2
313 AESENC KEY STATE3
314 AESENC KEY STATE4
54b6a1bd 315 movaps -0x50(TKEYP), KEY
b369e521
HY
316 AESENC KEY STATE1
317 AESENC KEY STATE2
318 AESENC KEY STATE3
319 AESENC KEY STATE4
54b6a1bd
HY
320#.align 4
321.L4enc192:
322 movaps -0x40(TKEYP), KEY
b369e521
HY
323 AESENC KEY STATE1
324 AESENC KEY STATE2
325 AESENC KEY STATE3
326 AESENC KEY STATE4
54b6a1bd 327 movaps -0x30(TKEYP), KEY
b369e521
HY
328 AESENC KEY STATE1
329 AESENC KEY STATE2
330 AESENC KEY STATE3
331 AESENC KEY STATE4
54b6a1bd
HY
332#.align 4
333.L4enc128:
334 movaps -0x20(TKEYP), KEY
b369e521
HY
335 AESENC KEY STATE1
336 AESENC KEY STATE2
337 AESENC KEY STATE3
338 AESENC KEY STATE4
54b6a1bd 339 movaps -0x10(TKEYP), KEY
b369e521
HY
340 AESENC KEY STATE1
341 AESENC KEY STATE2
342 AESENC KEY STATE3
343 AESENC KEY STATE4
54b6a1bd 344 movaps (TKEYP), KEY
b369e521
HY
345 AESENC KEY STATE1
346 AESENC KEY STATE2
347 AESENC KEY STATE3
348 AESENC KEY STATE4
54b6a1bd 349 movaps 0x10(TKEYP), KEY
b369e521
HY
350 AESENC KEY STATE1
351 AESENC KEY STATE2
352 AESENC KEY STATE3
353 AESENC KEY STATE4
54b6a1bd 354 movaps 0x20(TKEYP), KEY
b369e521
HY
355 AESENC KEY STATE1
356 AESENC KEY STATE2
357 AESENC KEY STATE3
358 AESENC KEY STATE4
54b6a1bd 359 movaps 0x30(TKEYP), KEY
b369e521
HY
360 AESENC KEY STATE1
361 AESENC KEY STATE2
362 AESENC KEY STATE3
363 AESENC KEY STATE4
54b6a1bd 364 movaps 0x40(TKEYP), KEY
b369e521
HY
365 AESENC KEY STATE1
366 AESENC KEY STATE2
367 AESENC KEY STATE3
368 AESENC KEY STATE4
54b6a1bd 369 movaps 0x50(TKEYP), KEY
b369e521
HY
370 AESENC KEY STATE1
371 AESENC KEY STATE2
372 AESENC KEY STATE3
373 AESENC KEY STATE4
54b6a1bd 374 movaps 0x60(TKEYP), KEY
b369e521
HY
375 AESENC KEY STATE1
376 AESENC KEY STATE2
377 AESENC KEY STATE3
378 AESENC KEY STATE4
54b6a1bd 379 movaps 0x70(TKEYP), KEY
b369e521
HY
380 AESENCLAST KEY STATE1 # last round
381 AESENCLAST KEY STATE2
382 AESENCLAST KEY STATE3
383 AESENCLAST KEY STATE4
54b6a1bd
HY
384 ret
385
386/*
387 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
388 */
389ENTRY(aesni_dec)
390 mov 480(KEYP), KLEN # key length
391 add $240, KEYP
392 movups (INP), STATE # input
393 call _aesni_dec1
394 movups STATE, (OUTP) #output
395 ret
396
397/*
398 * _aesni_dec1: internal ABI
399 * input:
400 * KEYP: key struct pointer
401 * KLEN: key length
402 * STATE: initial state (input)
403 * output:
404 * STATE: finial state (output)
405 * changed:
406 * KEY
407 * TKEYP (T1)
408 */
409_aesni_dec1:
410 movaps (KEYP), KEY # key
411 mov KEYP, TKEYP
412 pxor KEY, STATE # round 0
413 add $0x30, TKEYP
414 cmp $24, KLEN
415 jb .Ldec128
416 lea 0x20(TKEYP), TKEYP
417 je .Ldec192
418 add $0x20, TKEYP
419 movaps -0x60(TKEYP), KEY
b369e521 420 AESDEC KEY STATE
54b6a1bd 421 movaps -0x50(TKEYP), KEY
b369e521 422 AESDEC KEY STATE
54b6a1bd
HY
423.align 4
424.Ldec192:
425 movaps -0x40(TKEYP), KEY
b369e521 426 AESDEC KEY STATE
54b6a1bd 427 movaps -0x30(TKEYP), KEY
b369e521 428 AESDEC KEY STATE
54b6a1bd
HY
429.align 4
430.Ldec128:
431 movaps -0x20(TKEYP), KEY
b369e521 432 AESDEC KEY STATE
54b6a1bd 433 movaps -0x10(TKEYP), KEY
b369e521 434 AESDEC KEY STATE
54b6a1bd 435 movaps (TKEYP), KEY
b369e521 436 AESDEC KEY STATE
54b6a1bd 437 movaps 0x10(TKEYP), KEY
b369e521 438 AESDEC KEY STATE
54b6a1bd 439 movaps 0x20(TKEYP), KEY
b369e521 440 AESDEC KEY STATE
54b6a1bd 441 movaps 0x30(TKEYP), KEY
b369e521 442 AESDEC KEY STATE
54b6a1bd 443 movaps 0x40(TKEYP), KEY
b369e521 444 AESDEC KEY STATE
54b6a1bd 445 movaps 0x50(TKEYP), KEY
b369e521 446 AESDEC KEY STATE
54b6a1bd 447 movaps 0x60(TKEYP), KEY
b369e521 448 AESDEC KEY STATE
54b6a1bd 449 movaps 0x70(TKEYP), KEY
b369e521 450 AESDECLAST KEY STATE
54b6a1bd
HY
451 ret
452
453/*
454 * _aesni_dec4: internal ABI
455 * input:
456 * KEYP: key struct pointer
457 * KLEN: key length
458 * STATE1: initial state (input)
459 * STATE2
460 * STATE3
461 * STATE4
462 * output:
463 * STATE1: finial state (output)
464 * STATE2
465 * STATE3
466 * STATE4
467 * changed:
468 * KEY
469 * TKEYP (T1)
470 */
471_aesni_dec4:
472 movaps (KEYP), KEY # key
473 mov KEYP, TKEYP
474 pxor KEY, STATE1 # round 0
475 pxor KEY, STATE2
476 pxor KEY, STATE3
477 pxor KEY, STATE4
478 add $0x30, TKEYP
479 cmp $24, KLEN
480 jb .L4dec128
481 lea 0x20(TKEYP), TKEYP
482 je .L4dec192
483 add $0x20, TKEYP
484 movaps -0x60(TKEYP), KEY
b369e521
HY
485 AESDEC KEY STATE1
486 AESDEC KEY STATE2
487 AESDEC KEY STATE3
488 AESDEC KEY STATE4
54b6a1bd 489 movaps -0x50(TKEYP), KEY
b369e521
HY
490 AESDEC KEY STATE1
491 AESDEC KEY STATE2
492 AESDEC KEY STATE3
493 AESDEC KEY STATE4
54b6a1bd
HY
494.align 4
495.L4dec192:
496 movaps -0x40(TKEYP), KEY
b369e521
HY
497 AESDEC KEY STATE1
498 AESDEC KEY STATE2
499 AESDEC KEY STATE3
500 AESDEC KEY STATE4
54b6a1bd 501 movaps -0x30(TKEYP), KEY
b369e521
HY
502 AESDEC KEY STATE1
503 AESDEC KEY STATE2
504 AESDEC KEY STATE3
505 AESDEC KEY STATE4
54b6a1bd
HY
506.align 4
507.L4dec128:
508 movaps -0x20(TKEYP), KEY
b369e521
HY
509 AESDEC KEY STATE1
510 AESDEC KEY STATE2
511 AESDEC KEY STATE3
512 AESDEC KEY STATE4
54b6a1bd 513 movaps -0x10(TKEYP), KEY
b369e521
HY
514 AESDEC KEY STATE1
515 AESDEC KEY STATE2
516 AESDEC KEY STATE3
517 AESDEC KEY STATE4
54b6a1bd 518 movaps (TKEYP), KEY
b369e521
HY
519 AESDEC KEY STATE1
520 AESDEC KEY STATE2
521 AESDEC KEY STATE3
522 AESDEC KEY STATE4
54b6a1bd 523 movaps 0x10(TKEYP), KEY
b369e521
HY
524 AESDEC KEY STATE1
525 AESDEC KEY STATE2
526 AESDEC KEY STATE3
527 AESDEC KEY STATE4
54b6a1bd 528 movaps 0x20(TKEYP), KEY
b369e521
HY
529 AESDEC KEY STATE1
530 AESDEC KEY STATE2
531 AESDEC KEY STATE3
532 AESDEC KEY STATE4
54b6a1bd 533 movaps 0x30(TKEYP), KEY
b369e521
HY
534 AESDEC KEY STATE1
535 AESDEC KEY STATE2
536 AESDEC KEY STATE3
537 AESDEC KEY STATE4
54b6a1bd 538 movaps 0x40(TKEYP), KEY
b369e521
HY
539 AESDEC KEY STATE1
540 AESDEC KEY STATE2
541 AESDEC KEY STATE3
542 AESDEC KEY STATE4
54b6a1bd 543 movaps 0x50(TKEYP), KEY
b369e521
HY
544 AESDEC KEY STATE1
545 AESDEC KEY STATE2
546 AESDEC KEY STATE3
547 AESDEC KEY STATE4
54b6a1bd 548 movaps 0x60(TKEYP), KEY
b369e521
HY
549 AESDEC KEY STATE1
550 AESDEC KEY STATE2
551 AESDEC KEY STATE3
552 AESDEC KEY STATE4
54b6a1bd 553 movaps 0x70(TKEYP), KEY
b369e521
HY
554 AESDECLAST KEY STATE1 # last round
555 AESDECLAST KEY STATE2
556 AESDECLAST KEY STATE3
557 AESDECLAST KEY STATE4
54b6a1bd
HY
558 ret
559
560/*
561 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
562 * size_t len)
563 */
564ENTRY(aesni_ecb_enc)
565 test LEN, LEN # check length
566 jz .Lecb_enc_ret
567 mov 480(KEYP), KLEN
568 cmp $16, LEN
569 jb .Lecb_enc_ret
570 cmp $64, LEN
571 jb .Lecb_enc_loop1
572.align 4
573.Lecb_enc_loop4:
574 movups (INP), STATE1
575 movups 0x10(INP), STATE2
576 movups 0x20(INP), STATE3
577 movups 0x30(INP), STATE4
578 call _aesni_enc4
579 movups STATE1, (OUTP)
580 movups STATE2, 0x10(OUTP)
581 movups STATE3, 0x20(OUTP)
582 movups STATE4, 0x30(OUTP)
583 sub $64, LEN
584 add $64, INP
585 add $64, OUTP
586 cmp $64, LEN
587 jge .Lecb_enc_loop4
588 cmp $16, LEN
589 jb .Lecb_enc_ret
590.align 4
591.Lecb_enc_loop1:
592 movups (INP), STATE1
593 call _aesni_enc1
594 movups STATE1, (OUTP)
595 sub $16, LEN
596 add $16, INP
597 add $16, OUTP
598 cmp $16, LEN
599 jge .Lecb_enc_loop1
600.Lecb_enc_ret:
601 ret
602
603/*
604 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
605 * size_t len);
606 */
607ENTRY(aesni_ecb_dec)
608 test LEN, LEN
609 jz .Lecb_dec_ret
610 mov 480(KEYP), KLEN
611 add $240, KEYP
612 cmp $16, LEN
613 jb .Lecb_dec_ret
614 cmp $64, LEN
615 jb .Lecb_dec_loop1
616.align 4
617.Lecb_dec_loop4:
618 movups (INP), STATE1
619 movups 0x10(INP), STATE2
620 movups 0x20(INP), STATE3
621 movups 0x30(INP), STATE4
622 call _aesni_dec4
623 movups STATE1, (OUTP)
624 movups STATE2, 0x10(OUTP)
625 movups STATE3, 0x20(OUTP)
626 movups STATE4, 0x30(OUTP)
627 sub $64, LEN
628 add $64, INP
629 add $64, OUTP
630 cmp $64, LEN
631 jge .Lecb_dec_loop4
632 cmp $16, LEN
633 jb .Lecb_dec_ret
634.align 4
635.Lecb_dec_loop1:
636 movups (INP), STATE1
637 call _aesni_dec1
638 movups STATE1, (OUTP)
639 sub $16, LEN
640 add $16, INP
641 add $16, OUTP
642 cmp $16, LEN
643 jge .Lecb_dec_loop1
644.Lecb_dec_ret:
645 ret
646
647/*
648 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
649 * size_t len, u8 *iv)
650 */
651ENTRY(aesni_cbc_enc)
652 cmp $16, LEN
653 jb .Lcbc_enc_ret
654 mov 480(KEYP), KLEN
655 movups (IVP), STATE # load iv as initial state
656.align 4
657.Lcbc_enc_loop:
658 movups (INP), IN # load input
659 pxor IN, STATE
660 call _aesni_enc1
661 movups STATE, (OUTP) # store output
662 sub $16, LEN
663 add $16, INP
664 add $16, OUTP
665 cmp $16, LEN
666 jge .Lcbc_enc_loop
667 movups STATE, (IVP)
668.Lcbc_enc_ret:
669 ret
670
671/*
672 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
673 * size_t len, u8 *iv)
674 */
675ENTRY(aesni_cbc_dec)
676 cmp $16, LEN
e6efaa02 677 jb .Lcbc_dec_just_ret
54b6a1bd
HY
678 mov 480(KEYP), KLEN
679 add $240, KEYP
680 movups (IVP), IV
681 cmp $64, LEN
682 jb .Lcbc_dec_loop1
683.align 4
684.Lcbc_dec_loop4:
685 movups (INP), IN1
686 movaps IN1, STATE1
687 movups 0x10(INP), IN2
688 movaps IN2, STATE2
689 movups 0x20(INP), IN3
690 movaps IN3, STATE3
691 movups 0x30(INP), IN4
692 movaps IN4, STATE4
693 call _aesni_dec4
694 pxor IV, STATE1
695 pxor IN1, STATE2
696 pxor IN2, STATE3
697 pxor IN3, STATE4
698 movaps IN4, IV
699 movups STATE1, (OUTP)
700 movups STATE2, 0x10(OUTP)
701 movups STATE3, 0x20(OUTP)
702 movups STATE4, 0x30(OUTP)
703 sub $64, LEN
704 add $64, INP
705 add $64, OUTP
706 cmp $64, LEN
707 jge .Lcbc_dec_loop4
708 cmp $16, LEN
709 jb .Lcbc_dec_ret
710.align 4
711.Lcbc_dec_loop1:
712 movups (INP), IN
713 movaps IN, STATE
714 call _aesni_dec1
715 pxor IV, STATE
716 movups STATE, (OUTP)
717 movaps IN, IV
718 sub $16, LEN
719 add $16, INP
720 add $16, OUTP
721 cmp $16, LEN
722 jge .Lcbc_dec_loop1
54b6a1bd 723.Lcbc_dec_ret:
e6efaa02
HY
724 movups IV, (IVP)
725.Lcbc_dec_just_ret:
54b6a1bd 726 ret