20
20
#include <crypto/sha3.h>
21
21
#include <asm/unaligned.h>
22
22
23
+ /*
24
+ * On some 32-bit architectures (mn10300 and h8300), GCC ends up using
25
+ * over 1 KB of stack if we inline the round calculation into the loop
26
+ * in keccakf(). On the other hand, on 64-bit architectures with plenty
27
+ * of [64-bit wide] general purpose registers, not inlining it severely
28
+ * hurts performance. So let's use 64-bitness as a heuristic to decide
29
+ * whether to inline or not.
30
+ */
31
+ #ifdef CONFIG_64BIT
32
+ #define SHA3_INLINE inline
33
+ #else
34
+ #define SHA3_INLINE noinline
35
+ #endif
36
+
23
37
#define KECCAK_ROUNDS 24
24
38
25
39
static const u64 keccakf_rndc [24 ] = {
@@ -35,111 +49,115 @@ static const u64 keccakf_rndc[24] = {
35
49
36
50
/* update the state with given number of rounds */
37
51
38
- static void __attribute__(( __optimize__ ( "O3" ))) keccakf (u64 st [25 ])
52
+ static SHA3_INLINE void keccakf_round (u64 st [25 ])
39
53
{
40
54
u64 t [5 ], tt , bc [5 ];
41
- int round ;
42
55
43
- for (round = 0 ; round < KECCAK_ROUNDS ; round ++ ) {
56
+ /* Theta */
57
+ bc [0 ] = st [0 ] ^ st [5 ] ^ st [10 ] ^ st [15 ] ^ st [20 ];
58
+ bc [1 ] = st [1 ] ^ st [6 ] ^ st [11 ] ^ st [16 ] ^ st [21 ];
59
+ bc [2 ] = st [2 ] ^ st [7 ] ^ st [12 ] ^ st [17 ] ^ st [22 ];
60
+ bc [3 ] = st [3 ] ^ st [8 ] ^ st [13 ] ^ st [18 ] ^ st [23 ];
61
+ bc [4 ] = st [4 ] ^ st [9 ] ^ st [14 ] ^ st [19 ] ^ st [24 ];
62
+
63
+ t [0 ] = bc [4 ] ^ rol64 (bc [1 ], 1 );
64
+ t [1 ] = bc [0 ] ^ rol64 (bc [2 ], 1 );
65
+ t [2 ] = bc [1 ] ^ rol64 (bc [3 ], 1 );
66
+ t [3 ] = bc [2 ] ^ rol64 (bc [4 ], 1 );
67
+ t [4 ] = bc [3 ] ^ rol64 (bc [0 ], 1 );
68
+
69
+ st [0 ] ^= t [0 ];
70
+
71
+ /* Rho Pi */
72
+ tt = st [1 ];
73
+ st [ 1 ] = rol64 (st [ 6 ] ^ t [1 ], 44 );
74
+ st [ 6 ] = rol64 (st [ 9 ] ^ t [4 ], 20 );
75
+ st [ 9 ] = rol64 (st [22 ] ^ t [2 ], 61 );
76
+ st [22 ] = rol64 (st [14 ] ^ t [4 ], 39 );
77
+ st [14 ] = rol64 (st [20 ] ^ t [0 ], 18 );
78
+ st [20 ] = rol64 (st [ 2 ] ^ t [2 ], 62 );
79
+ st [ 2 ] = rol64 (st [12 ] ^ t [2 ], 43 );
80
+ st [12 ] = rol64 (st [13 ] ^ t [3 ], 25 );
81
+ st [13 ] = rol64 (st [19 ] ^ t [4 ], 8 );
82
+ st [19 ] = rol64 (st [23 ] ^ t [3 ], 56 );
83
+ st [23 ] = rol64 (st [15 ] ^ t [0 ], 41 );
84
+ st [15 ] = rol64 (st [ 4 ] ^ t [4 ], 27 );
85
+ st [ 4 ] = rol64 (st [24 ] ^ t [4 ], 14 );
86
+ st [24 ] = rol64 (st [21 ] ^ t [1 ], 2 );
87
+ st [21 ] = rol64 (st [ 8 ] ^ t [3 ], 55 );
88
+ st [ 8 ] = rol64 (st [16 ] ^ t [1 ], 45 );
89
+ st [16 ] = rol64 (st [ 5 ] ^ t [0 ], 36 );
90
+ st [ 5 ] = rol64 (st [ 3 ] ^ t [3 ], 28 );
91
+ st [ 3 ] = rol64 (st [18 ] ^ t [3 ], 21 );
92
+ st [18 ] = rol64 (st [17 ] ^ t [2 ], 15 );
93
+ st [17 ] = rol64 (st [11 ] ^ t [1 ], 10 );
94
+ st [11 ] = rol64 (st [ 7 ] ^ t [2 ], 6 );
95
+ st [ 7 ] = rol64 (st [10 ] ^ t [0 ], 3 );
96
+ st [10 ] = rol64 ( tt ^ t [1 ], 1 );
97
+
98
+ /* Chi */
99
+ bc [ 0 ] = ~st [ 1 ] & st [ 2 ];
100
+ bc [ 1 ] = ~st [ 2 ] & st [ 3 ];
101
+ bc [ 2 ] = ~st [ 3 ] & st [ 4 ];
102
+ bc [ 3 ] = ~st [ 4 ] & st [ 0 ];
103
+ bc [ 4 ] = ~st [ 0 ] & st [ 1 ];
104
+ st [ 0 ] ^= bc [ 0 ];
105
+ st [ 1 ] ^= bc [ 1 ];
106
+ st [ 2 ] ^= bc [ 2 ];
107
+ st [ 3 ] ^= bc [ 3 ];
108
+ st [ 4 ] ^= bc [ 4 ];
109
+
110
+ bc [ 0 ] = ~st [ 6 ] & st [ 7 ];
111
+ bc [ 1 ] = ~st [ 7 ] & st [ 8 ];
112
+ bc [ 2 ] = ~st [ 8 ] & st [ 9 ];
113
+ bc [ 3 ] = ~st [ 9 ] & st [ 5 ];
114
+ bc [ 4 ] = ~st [ 5 ] & st [ 6 ];
115
+ st [ 5 ] ^= bc [ 0 ];
116
+ st [ 6 ] ^= bc [ 1 ];
117
+ st [ 7 ] ^= bc [ 2 ];
118
+ st [ 8 ] ^= bc [ 3 ];
119
+ st [ 9 ] ^= bc [ 4 ];
120
+
121
+ bc [ 0 ] = ~st [11 ] & st [12 ];
122
+ bc [ 1 ] = ~st [12 ] & st [13 ];
123
+ bc [ 2 ] = ~st [13 ] & st [14 ];
124
+ bc [ 3 ] = ~st [14 ] & st [10 ];
125
+ bc [ 4 ] = ~st [10 ] & st [11 ];
126
+ st [10 ] ^= bc [ 0 ];
127
+ st [11 ] ^= bc [ 1 ];
128
+ st [12 ] ^= bc [ 2 ];
129
+ st [13 ] ^= bc [ 3 ];
130
+ st [14 ] ^= bc [ 4 ];
131
+
132
+ bc [ 0 ] = ~st [16 ] & st [17 ];
133
+ bc [ 1 ] = ~st [17 ] & st [18 ];
134
+ bc [ 2 ] = ~st [18 ] & st [19 ];
135
+ bc [ 3 ] = ~st [19 ] & st [15 ];
136
+ bc [ 4 ] = ~st [15 ] & st [16 ];
137
+ st [15 ] ^= bc [ 0 ];
138
+ st [16 ] ^= bc [ 1 ];
139
+ st [17 ] ^= bc [ 2 ];
140
+ st [18 ] ^= bc [ 3 ];
141
+ st [19 ] ^= bc [ 4 ];
142
+
143
+ bc [ 0 ] = ~st [21 ] & st [22 ];
144
+ bc [ 1 ] = ~st [22 ] & st [23 ];
145
+ bc [ 2 ] = ~st [23 ] & st [24 ];
146
+ bc [ 3 ] = ~st [24 ] & st [20 ];
147
+ bc [ 4 ] = ~st [20 ] & st [21 ];
148
+ st [20 ] ^= bc [ 0 ];
149
+ st [21 ] ^= bc [ 1 ];
150
+ st [22 ] ^= bc [ 2 ];
151
+ st [23 ] ^= bc [ 3 ];
152
+ st [24 ] ^= bc [ 4 ];
153
+ }
44
154
45
- /* Theta */
46
- bc [0 ] = st [0 ] ^ st [5 ] ^ st [10 ] ^ st [15 ] ^ st [20 ];
47
- bc [1 ] = st [1 ] ^ st [6 ] ^ st [11 ] ^ st [16 ] ^ st [21 ];
48
- bc [2 ] = st [2 ] ^ st [7 ] ^ st [12 ] ^ st [17 ] ^ st [22 ];
49
- bc [3 ] = st [3 ] ^ st [8 ] ^ st [13 ] ^ st [18 ] ^ st [23 ];
50
- bc [4 ] = st [4 ] ^ st [9 ] ^ st [14 ] ^ st [19 ] ^ st [24 ];
51
-
52
- t [0 ] = bc [4 ] ^ rol64 (bc [1 ], 1 );
53
- t [1 ] = bc [0 ] ^ rol64 (bc [2 ], 1 );
54
- t [2 ] = bc [1 ] ^ rol64 (bc [3 ], 1 );
55
- t [3 ] = bc [2 ] ^ rol64 (bc [4 ], 1 );
56
- t [4 ] = bc [3 ] ^ rol64 (bc [0 ], 1 );
57
-
58
- st [0 ] ^= t [0 ];
59
-
60
- /* Rho Pi */
61
- tt = st [1 ];
62
- st [ 1 ] = rol64 (st [ 6 ] ^ t [1 ], 44 );
63
- st [ 6 ] = rol64 (st [ 9 ] ^ t [4 ], 20 );
64
- st [ 9 ] = rol64 (st [22 ] ^ t [2 ], 61 );
65
- st [22 ] = rol64 (st [14 ] ^ t [4 ], 39 );
66
- st [14 ] = rol64 (st [20 ] ^ t [0 ], 18 );
67
- st [20 ] = rol64 (st [ 2 ] ^ t [2 ], 62 );
68
- st [ 2 ] = rol64 (st [12 ] ^ t [2 ], 43 );
69
- st [12 ] = rol64 (st [13 ] ^ t [3 ], 25 );
70
- st [13 ] = rol64 (st [19 ] ^ t [4 ], 8 );
71
- st [19 ] = rol64 (st [23 ] ^ t [3 ], 56 );
72
- st [23 ] = rol64 (st [15 ] ^ t [0 ], 41 );
73
- st [15 ] = rol64 (st [ 4 ] ^ t [4 ], 27 );
74
- st [ 4 ] = rol64 (st [24 ] ^ t [4 ], 14 );
75
- st [24 ] = rol64 (st [21 ] ^ t [1 ], 2 );
76
- st [21 ] = rol64 (st [ 8 ] ^ t [3 ], 55 );
77
- st [ 8 ] = rol64 (st [16 ] ^ t [1 ], 45 );
78
- st [16 ] = rol64 (st [ 5 ] ^ t [0 ], 36 );
79
- st [ 5 ] = rol64 (st [ 3 ] ^ t [3 ], 28 );
80
- st [ 3 ] = rol64 (st [18 ] ^ t [3 ], 21 );
81
- st [18 ] = rol64 (st [17 ] ^ t [2 ], 15 );
82
- st [17 ] = rol64 (st [11 ] ^ t [1 ], 10 );
83
- st [11 ] = rol64 (st [ 7 ] ^ t [2 ], 6 );
84
- st [ 7 ] = rol64 (st [10 ] ^ t [0 ], 3 );
85
- st [10 ] = rol64 ( tt ^ t [1 ], 1 );
86
-
87
- /* Chi */
88
- bc [ 0 ] = ~st [ 1 ] & st [ 2 ];
89
- bc [ 1 ] = ~st [ 2 ] & st [ 3 ];
90
- bc [ 2 ] = ~st [ 3 ] & st [ 4 ];
91
- bc [ 3 ] = ~st [ 4 ] & st [ 0 ];
92
- bc [ 4 ] = ~st [ 0 ] & st [ 1 ];
93
- st [ 0 ] ^= bc [ 0 ];
94
- st [ 1 ] ^= bc [ 1 ];
95
- st [ 2 ] ^= bc [ 2 ];
96
- st [ 3 ] ^= bc [ 3 ];
97
- st [ 4 ] ^= bc [ 4 ];
98
-
99
- bc [ 0 ] = ~st [ 6 ] & st [ 7 ];
100
- bc [ 1 ] = ~st [ 7 ] & st [ 8 ];
101
- bc [ 2 ] = ~st [ 8 ] & st [ 9 ];
102
- bc [ 3 ] = ~st [ 9 ] & st [ 5 ];
103
- bc [ 4 ] = ~st [ 5 ] & st [ 6 ];
104
- st [ 5 ] ^= bc [ 0 ];
105
- st [ 6 ] ^= bc [ 1 ];
106
- st [ 7 ] ^= bc [ 2 ];
107
- st [ 8 ] ^= bc [ 3 ];
108
- st [ 9 ] ^= bc [ 4 ];
109
-
110
- bc [ 0 ] = ~st [11 ] & st [12 ];
111
- bc [ 1 ] = ~st [12 ] & st [13 ];
112
- bc [ 2 ] = ~st [13 ] & st [14 ];
113
- bc [ 3 ] = ~st [14 ] & st [10 ];
114
- bc [ 4 ] = ~st [10 ] & st [11 ];
115
- st [10 ] ^= bc [ 0 ];
116
- st [11 ] ^= bc [ 1 ];
117
- st [12 ] ^= bc [ 2 ];
118
- st [13 ] ^= bc [ 3 ];
119
- st [14 ] ^= bc [ 4 ];
120
-
121
- bc [ 0 ] = ~st [16 ] & st [17 ];
122
- bc [ 1 ] = ~st [17 ] & st [18 ];
123
- bc [ 2 ] = ~st [18 ] & st [19 ];
124
- bc [ 3 ] = ~st [19 ] & st [15 ];
125
- bc [ 4 ] = ~st [15 ] & st [16 ];
126
- st [15 ] ^= bc [ 0 ];
127
- st [16 ] ^= bc [ 1 ];
128
- st [17 ] ^= bc [ 2 ];
129
- st [18 ] ^= bc [ 3 ];
130
- st [19 ] ^= bc [ 4 ];
131
-
132
- bc [ 0 ] = ~st [21 ] & st [22 ];
133
- bc [ 1 ] = ~st [22 ] & st [23 ];
134
- bc [ 2 ] = ~st [23 ] & st [24 ];
135
- bc [ 3 ] = ~st [24 ] & st [20 ];
136
- bc [ 4 ] = ~st [20 ] & st [21 ];
137
- st [20 ] ^= bc [ 0 ];
138
- st [21 ] ^= bc [ 1 ];
139
- st [22 ] ^= bc [ 2 ];
140
- st [23 ] ^= bc [ 3 ];
141
- st [24 ] ^= bc [ 4 ];
155
+ static void __optimize ("O3" ) keccakf (u64 st [25 ])
156
+ {
157
+ int round ;
142
158
159
+ for (round = 0 ; round < KECCAK_ROUNDS ; round ++ ) {
160
+ keccakf_round (st );
143
161
/* Iota */
144
162
st [0 ] ^= keccakf_rndc [round ];
145
163
}
0 commit comments