summaryrefslogtreecommitdiff
path: root/arch/arm64/crypto/aes-ce-ccm-core.S
blob: 0132872bd780c74b59248bb9ce4fe1348fbab85f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * aes-ce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
 *
 * Copyright (C) 2013 - 2017 Linaro Ltd.
 * Copyright (C) 2024 Google LLC
 *
 * Author: Ard Biesheuvel <ardb@kernel.org>
 */

#include <linux/linkage.h>
#include <asm/assembler.h>

	.text
	.arch	armv8-a+crypto

	/*
	 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
	 * 			 u32 rounds);
	 */
SYM_FUNC_START(ce_aes_ccm_final)
	ld1	{v3.4s}, [x2], #16		/* load first round key */
	ld1	{v0.16b}, [x0]			/* load mac */
	cmp	w3, #12				/* which key size? */
	sub	w3, w3, #2			/* modified # of rounds */
	ld1	{v1.16b}, [x1]			/* load 1st ctriv */
	bmi	0f
	bne	3f
	mov	v5.16b, v3.16b
	b	2f
0:	mov	v4.16b, v3.16b
1:	ld1	{v5.4s}, [x2], #16		/* load next round key */
	aese	v0.16b, v4.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v4.16b
	aesmc	v1.16b, v1.16b
2:	ld1	{v3.4s}, [x2], #16		/* load next round key */
	aese	v0.16b, v5.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v5.16b
	aesmc	v1.16b, v1.16b
3:	ld1	{v4.4s}, [x2], #16		/* load next round key */
	subs	w3, w3, #3
	aese	v0.16b, v3.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v3.16b
	aesmc	v1.16b, v1.16b
	bpl	1b
	aese	v0.16b, v4.16b
	aese	v1.16b, v4.16b
	/* final round key cancels out */
	eor	v0.16b, v0.16b, v1.16b		/* en-/decrypt the mac */
	st1	{v0.16b}, [x0]			/* store result */
	ret
SYM_FUNC_END(ce_aes_ccm_final)

	.macro	aes_ccm_do_crypt,enc
	cbz	x2, 5f
	ldr	x8, [x6, #8]			/* load lower ctr */
	ld1	{v0.16b}, [x5]			/* load mac */
CPU_LE(	rev	x8, x8			)	/* keep swabbed ctr in reg */
0:	/* outer loop */
	ld1	{v1.8b}, [x6]			/* load upper ctr */
	prfm	pldl1strm, [x1]
	add	x8, x8, #1
	rev	x9, x8
	cmp	w4, #12				/* which key size? */
	sub	w7, w4, #2			/* get modified # of rounds */
	ins	v1.d[1], x9			/* no carry in lower ctr */
	ld1	{v3.4s}, [x3]			/* load first round key */
	add	x10, x3, #16
	bmi	1f
	bne	4f
	mov	v5.16b, v3.16b
	b	3f
1:	mov	v4.16b, v3.16b
	ld1	{v5.4s}, [x10], #16		/* load 2nd round key */
2:	/* inner loop: 3 rounds, 2x interleaved */
	aese	v0.16b, v4.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v4.16b
	aesmc	v1.16b, v1.16b
3:	ld1	{v3.4s}, [x10], #16		/* load next round key */
	aese	v0.16b, v5.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v5.16b
	aesmc	v1.16b, v1.16b
4:	ld1	{v4.4s}, [x10], #16		/* load next round key */
	subs	w7, w7, #3
	aese	v0.16b, v3.16b
	aesmc	v0.16b, v0.16b
	aese	v1.16b, v3.16b
	aesmc	v1.16b, v1.16b
	ld1	{v5.4s}, [x10], #16		/* load next round key */
	bpl	2b
	aese	v0.16b, v4.16b
	aese	v1.16b, v4.16b
	subs	w2, w2, #16
	bmi	6f				/* partial block? */
	ld1	{v2.16b}, [x1], #16		/* load next input block */
	.if	\enc == 1
	eor	v2.16b, v2.16b, v5.16b		/* final round enc+mac */
	eor	v6.16b, v1.16b, v2.16b		/* xor with crypted ctr */
	.else
	eor	v2.16b, v2.16b, v1.16b		/* xor with crypted ctr */
	eor	v6.16b, v2.16b, v5.16b		/* final round enc */
	.endif
	eor	v0.16b, v0.16b, v2.16b		/* xor mac with pt ^ rk[last] */
	st1	{v6.16b}, [x0], #16		/* write output block */
	bne	0b
CPU_LE(	rev	x8, x8			)
	st1	{v0.16b}, [x5]			/* store mac */
	str	x8, [x6, #8]			/* store lsb end of ctr (BE) */
5:	ret

6:	eor	v0.16b, v0.16b, v5.16b		/* final round mac */
	eor	v1.16b, v1.16b, v5.16b		/* final round enc */

	add	x1, x1, w2, sxtw		/* rewind the input pointer (w2 < 0) */
	add	x0, x0, w2, sxtw		/* rewind the output pointer */

	adr_l	x8, .Lpermute			/* load permute vectors */
	add	x9, x8, w2, sxtw
	sub	x8, x8, w2, sxtw
	ld1	{v7.16b-v8.16b}, [x9]
	ld1	{v9.16b}, [x8]

	ld1	{v2.16b}, [x1]			/* load a full block of input */
	tbl	v1.16b, {v1.16b}, v7.16b	/* move keystream to end of register */
	.if	\enc == 1
	tbl	v7.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v7 */
	eor	v2.16b, v2.16b, v1.16b		/* encrypt partial input block */
	.else
	eor	v2.16b, v2.16b, v1.16b		/* decrypt partial input block */
	tbl	v7.16b, {v2.16b}, v9.16b	/* copy plaintext to start of v7 */
	.endif
	eor	v0.16b, v0.16b, v7.16b		/* fold plaintext into mac */
	tbx	v2.16b, {v6.16b}, v8.16b	/* insert output from previous iteration */

	st1	{v0.16b}, [x5]			/* store mac */
	st1	{v2.16b}, [x0]			/* store output block */
	ret
	.endm

	/*
	 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
	 * 			   u8 const rk[], u32 rounds, u8 mac[],
	 * 			   u8 ctr[]);
	 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
	 * 			   u8 const rk[], u32 rounds, u8 mac[],
	 * 			   u8 ctr[]);
	 */
SYM_FUNC_START(ce_aes_ccm_encrypt)
	aes_ccm_do_crypt	1
SYM_FUNC_END(ce_aes_ccm_encrypt)

SYM_FUNC_START(ce_aes_ccm_decrypt)
	aes_ccm_do_crypt	0
SYM_FUNC_END(ce_aes_ccm_decrypt)

	.section ".rodata", "a"
	.align	6
	.fill	15, 1, 0xff
.Lpermute:
	.byte	0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
	.byte	0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
	.fill	15, 1, 0xff