1
// Copyright (c) 2014-2019 The Bitcoin Core developers
2
// Distributed under the MIT software license, see the accompanying
3
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
4

            
5
#include <crypto/sha256.h>
6
#include <crypto/common.h>
7

            
8
#include <assert.h>
9
#include <string.h>
10

            
11
#include <compat/cpuid.h>
12

            
13
#if defined(__linux__) && defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
14
#include <sys/auxv.h>
15
#include <asm/hwcap.h>
16
#endif
17

            
18
#if defined(MAC_OSX) && defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
19
#include <sys/types.h>
20
#include <sys/sysctl.h>
21
#endif
22

            
23
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
24
#if defined(USE_ASM)
25
namespace sha256_sse4
26
{
27
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
28
}
29
#endif
30
#endif
31

            
32
namespace sha256d64_sse41
33
{
34
void Transform_4way(unsigned char* out, const unsigned char* in);
35
}
36

            
37
namespace sha256d64_avx2
38
{
39
void Transform_8way(unsigned char* out, const unsigned char* in);
40
}
41

            
42
namespace sha256d64_x86_shani
43
{
44
void Transform_2way(unsigned char* out, const unsigned char* in);
45
}
46

            
47
namespace sha256_x86_shani
48
{
49
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
50
}
51

            
52
namespace sha256_arm_shani
53
{
54
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks);
55
}
56

            
57
namespace sha256d64_arm_shani
58
{
59
void Transform_2way(unsigned char* out, const unsigned char* in);
60
}
61

            
62
// Internal implementation code.
63
namespace
64
{
65
/// Internal SHA-256 implementation.
66
namespace sha256
67
{
68
2728960
uint32_t inline Ch(uint32_t x, uint32_t y, uint32_t z) { return z ^ (x & (y ^ z)); }
69
2728960
uint32_t inline Maj(uint32_t x, uint32_t y, uint32_t z) { return (x & y) | (z & (x | y)); }
70
2728960
uint32_t inline Sigma0(uint32_t x) { return (x >> 2 | x << 30) ^ (x >> 13 | x << 19) ^ (x >> 22 | x << 10); }
71
2728960
uint32_t inline Sigma1(uint32_t x) { return (x >> 6 | x << 26) ^ (x >> 11 | x << 21) ^ (x >> 25 | x << 7); }
72
2046720
uint32_t inline sigma0(uint32_t x) { return (x >> 7 | x << 25) ^ (x >> 18 | x << 14) ^ (x >> 3); }
73
2046720
uint32_t inline sigma1(uint32_t x) { return (x >> 17 | x << 15) ^ (x >> 19 | x << 13) ^ (x >> 10); }
74

            
75
/** One round of SHA-256. */
76
void inline Round(uint32_t a, uint32_t b, uint32_t c, uint32_t& d, uint32_t e, uint32_t f, uint32_t g, uint32_t& h, uint32_t k)
77
2728960
{
78
2728960
    uint32_t t1 = h + Sigma1(e) + Ch(e, f, g) + k;
79
2728960
    uint32_t t2 = Sigma0(a) + Maj(a, b, c);
80
2728960
    d += t1;
81
2728960
    h = t1 + t2;
82
2728960
}
83

            
84
/** Initialize SHA-256 state. */
85
void inline Initialize(uint32_t* s)
86
14148
{
87
14148
    s[0] = 0x6a09e667ul;
88
14148
    s[1] = 0xbb67ae85ul;
89
14148
    s[2] = 0x3c6ef372ul;
90
14148
    s[3] = 0xa54ff53aul;
91
14148
    s[4] = 0x510e527ful;
92
14148
    s[5] = 0x9b05688cul;
93
14148
    s[6] = 0x1f83d9abul;
94
14148
    s[7] = 0x5be0cd19ul;
95
14148
}
96

            
97
/** Perform a number of SHA-256 transformations, processing 64-byte chunks. */
98
void Transform(uint32_t* s, const unsigned char* chunk, size_t blocks)
99
17095
{
100
59735
    while (blocks--) {
101
42640
        uint32_t a = s[0], b = s[1], c = s[2], d = s[3], e = s[4], f = s[5], g = s[6], h = s[7];
102
42640
        uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
103

            
104
42640
        Round(a, b, c, d, e, f, g, h, 0x428a2f98 + (w0 = ReadBE32(chunk + 0)));
105
42640
        Round(h, a, b, c, d, e, f, g, 0x71374491 + (w1 = ReadBE32(chunk + 4)));
106
42640
        Round(g, h, a, b, c, d, e, f, 0xb5c0fbcf + (w2 = ReadBE32(chunk + 8)));
107
42640
        Round(f, g, h, a, b, c, d, e, 0xe9b5dba5 + (w3 = ReadBE32(chunk + 12)));
108
42640
        Round(e, f, g, h, a, b, c, d, 0x3956c25b + (w4 = ReadBE32(chunk + 16)));
109
42640
        Round(d, e, f, g, h, a, b, c, 0x59f111f1 + (w5 = ReadBE32(chunk + 20)));
110
42640
        Round(c, d, e, f, g, h, a, b, 0x923f82a4 + (w6 = ReadBE32(chunk + 24)));
111
42640
        Round(b, c, d, e, f, g, h, a, 0xab1c5ed5 + (w7 = ReadBE32(chunk + 28)));
112
42640
        Round(a, b, c, d, e, f, g, h, 0xd807aa98 + (w8 = ReadBE32(chunk + 32)));
113
42640
        Round(h, a, b, c, d, e, f, g, 0x12835b01 + (w9 = ReadBE32(chunk + 36)));
114
42640
        Round(g, h, a, b, c, d, e, f, 0x243185be + (w10 = ReadBE32(chunk + 40)));
115
42640
        Round(f, g, h, a, b, c, d, e, 0x550c7dc3 + (w11 = ReadBE32(chunk + 44)));
116
42640
        Round(e, f, g, h, a, b, c, d, 0x72be5d74 + (w12 = ReadBE32(chunk + 48)));
117
42640
        Round(d, e, f, g, h, a, b, c, 0x80deb1fe + (w13 = ReadBE32(chunk + 52)));
118
42640
        Round(c, d, e, f, g, h, a, b, 0x9bdc06a7 + (w14 = ReadBE32(chunk + 56)));
119
42640
        Round(b, c, d, e, f, g, h, a, 0xc19bf174 + (w15 = ReadBE32(chunk + 60)));
120

            
121
42640
        Round(a, b, c, d, e, f, g, h, 0xe49b69c1 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
122
42640
        Round(h, a, b, c, d, e, f, g, 0xefbe4786 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
123
42640
        Round(g, h, a, b, c, d, e, f, 0x0fc19dc6 + (w2 += sigma1(w0) + w11 + sigma0(w3)));
124
42640
        Round(f, g, h, a, b, c, d, e, 0x240ca1cc + (w3 += sigma1(w1) + w12 + sigma0(w4)));
125
42640
        Round(e, f, g, h, a, b, c, d, 0x2de92c6f + (w4 += sigma1(w2) + w13 + sigma0(w5)));
126
42640
        Round(d, e, f, g, h, a, b, c, 0x4a7484aa + (w5 += sigma1(w3) + w14 + sigma0(w6)));
127
42640
        Round(c, d, e, f, g, h, a, b, 0x5cb0a9dc + (w6 += sigma1(w4) + w15 + sigma0(w7)));
128
42640
        Round(b, c, d, e, f, g, h, a, 0x76f988da + (w7 += sigma1(w5) + w0 + sigma0(w8)));
129
42640
        Round(a, b, c, d, e, f, g, h, 0x983e5152 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
130
42640
        Round(h, a, b, c, d, e, f, g, 0xa831c66d + (w9 += sigma1(w7) + w2 + sigma0(w10)));
131
42640
        Round(g, h, a, b, c, d, e, f, 0xb00327c8 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
132
42640
        Round(f, g, h, a, b, c, d, e, 0xbf597fc7 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
133
42640
        Round(e, f, g, h, a, b, c, d, 0xc6e00bf3 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
134
42640
        Round(d, e, f, g, h, a, b, c, 0xd5a79147 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
135
42640
        Round(c, d, e, f, g, h, a, b, 0x06ca6351 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
136
42640
        Round(b, c, d, e, f, g, h, a, 0x14292967 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
137

            
138
42640
        Round(a, b, c, d, e, f, g, h, 0x27b70a85 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
139
42640
        Round(h, a, b, c, d, e, f, g, 0x2e1b2138 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
140
42640
        Round(g, h, a, b, c, d, e, f, 0x4d2c6dfc + (w2 += sigma1(w0) + w11 + sigma0(w3)));
141
42640
        Round(f, g, h, a, b, c, d, e, 0x53380d13 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
142
42640
        Round(e, f, g, h, a, b, c, d, 0x650a7354 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
143
42640
        Round(d, e, f, g, h, a, b, c, 0x766a0abb + (w5 += sigma1(w3) + w14 + sigma0(w6)));
144
42640
        Round(c, d, e, f, g, h, a, b, 0x81c2c92e + (w6 += sigma1(w4) + w15 + sigma0(w7)));
145
42640
        Round(b, c, d, e, f, g, h, a, 0x92722c85 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
146
42640
        Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1 + (w8 += sigma1(w6) + w1 + sigma0(w9)));
147
42640
        Round(h, a, b, c, d, e, f, g, 0xa81a664b + (w9 += sigma1(w7) + w2 + sigma0(w10)));
148
42640
        Round(g, h, a, b, c, d, e, f, 0xc24b8b70 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
149
42640
        Round(f, g, h, a, b, c, d, e, 0xc76c51a3 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
150
42640
        Round(e, f, g, h, a, b, c, d, 0xd192e819 + (w12 += sigma1(w10) + w5 + sigma0(w13)));
151
42640
        Round(d, e, f, g, h, a, b, c, 0xd6990624 + (w13 += sigma1(w11) + w6 + sigma0(w14)));
152
42640
        Round(c, d, e, f, g, h, a, b, 0xf40e3585 + (w14 += sigma1(w12) + w7 + sigma0(w15)));
153
42640
        Round(b, c, d, e, f, g, h, a, 0x106aa070 + (w15 += sigma1(w13) + w8 + sigma0(w0)));
154

            
155
42640
        Round(a, b, c, d, e, f, g, h, 0x19a4c116 + (w0 += sigma1(w14) + w9 + sigma0(w1)));
156
42640
        Round(h, a, b, c, d, e, f, g, 0x1e376c08 + (w1 += sigma1(w15) + w10 + sigma0(w2)));
157
42640
        Round(g, h, a, b, c, d, e, f, 0x2748774c + (w2 += sigma1(w0) + w11 + sigma0(w3)));
158
42640
        Round(f, g, h, a, b, c, d, e, 0x34b0bcb5 + (w3 += sigma1(w1) + w12 + sigma0(w4)));
159
42640
        Round(e, f, g, h, a, b, c, d, 0x391c0cb3 + (w4 += sigma1(w2) + w13 + sigma0(w5)));
160
42640
        Round(d, e, f, g, h, a, b, c, 0x4ed8aa4a + (w5 += sigma1(w3) + w14 + sigma0(w6)));
161
42640
        Round(c, d, e, f, g, h, a, b, 0x5b9cca4f + (w6 += sigma1(w4) + w15 + sigma0(w7)));
162
42640
        Round(b, c, d, e, f, g, h, a, 0x682e6ff3 + (w7 += sigma1(w5) + w0 + sigma0(w8)));
163
42640
        Round(a, b, c, d, e, f, g, h, 0x748f82ee + (w8 += sigma1(w6) + w1 + sigma0(w9)));
164
42640
        Round(h, a, b, c, d, e, f, g, 0x78a5636f + (w9 += sigma1(w7) + w2 + sigma0(w10)));
165
42640
        Round(g, h, a, b, c, d, e, f, 0x84c87814 + (w10 += sigma1(w8) + w3 + sigma0(w11)));
166
42640
        Round(f, g, h, a, b, c, d, e, 0x8cc70208 + (w11 += sigma1(w9) + w4 + sigma0(w12)));
167
42640
        Round(e, f, g, h, a, b, c, d, 0x90befffa + (w12 += sigma1(w10) + w5 + sigma0(w13)));
168
42640
        Round(d, e, f, g, h, a, b, c, 0xa4506ceb + (w13 += sigma1(w11) + w6 + sigma0(w14)));
169
42640
        Round(c, d, e, f, g, h, a, b, 0xbef9a3f7 + (w14 + sigma1(w12) + w7 + sigma0(w15)));
170
42640
        Round(b, c, d, e, f, g, h, a, 0xc67178f2 + (w15 + sigma1(w13) + w8 + sigma0(w0)));
171

            
172
42640
        s[0] += a;
173
42640
        s[1] += b;
174
42640
        s[2] += c;
175
42640
        s[3] += d;
176
42640
        s[4] += e;
177
42640
        s[5] += f;
178
42640
        s[6] += g;
179
42640
        s[7] += h;
180
42640
        chunk += 64;
181
42640
    }
182
17095
}
183

            
184
void TransformD64(unsigned char* out, const unsigned char* in)
185
{
186
    // Transform 1
187
    uint32_t a = 0x6a09e667ul;
188
    uint32_t b = 0xbb67ae85ul;
189
    uint32_t c = 0x3c6ef372ul;
190
    uint32_t d = 0xa54ff53aul;
191
    uint32_t e = 0x510e527ful;
192
    uint32_t f = 0x9b05688cul;
193
    uint32_t g = 0x1f83d9abul;
194
    uint32_t h = 0x5be0cd19ul;
195

            
196
    uint32_t w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15;
197

            
198
    Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + (w0 = ReadBE32(in + 0)));
199
    Round(h, a, b, c, d, e, f, g, 0x71374491ul + (w1 = ReadBE32(in + 4)));
200
    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + (w2 = ReadBE32(in + 8)));
201
    Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + (w3 = ReadBE32(in + 12)));
202
    Round(e, f, g, h, a, b, c, d, 0x3956c25bul + (w4 = ReadBE32(in + 16)));
203
    Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + (w5 = ReadBE32(in + 20)));
204
    Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + (w6 = ReadBE32(in + 24)));
205
    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + (w7 = ReadBE32(in + 28)));
206
    Round(a, b, c, d, e, f, g, h, 0xd807aa98ul + (w8 = ReadBE32(in + 32)));
207
    Round(h, a, b, c, d, e, f, g, 0x12835b01ul + (w9 = ReadBE32(in + 36)));
208
    Round(g, h, a, b, c, d, e, f, 0x243185beul + (w10 = ReadBE32(in + 40)));
209
    Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul + (w11 = ReadBE32(in + 44)));
210
    Round(e, f, g, h, a, b, c, d, 0x72be5d74ul + (w12 = ReadBE32(in + 48)));
211
    Round(d, e, f, g, h, a, b, c, 0x80deb1feul + (w13 = ReadBE32(in + 52)));
212
    Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul + (w14 = ReadBE32(in + 56)));
213
    Round(b, c, d, e, f, g, h, a, 0xc19bf174ul + (w15 = ReadBE32(in + 60)));
214
    Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
215
    Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
216
    Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
217
    Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
218
    Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + w13 + sigma0(w5)));
219
    Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
220
    Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
221
    Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
222
    Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
223
    Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
224
    Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
225
    Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
226
    Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
227
    Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
228
    Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
229
    Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
230
    Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
231
    Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
232
    Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
233
    Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
234
    Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
235
    Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
236
    Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
237
    Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
238
    Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
239
    Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
240
    Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
241
    Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
242
    Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
243
    Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
244
    Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
245
    Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
246
    Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
247
    Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
248
    Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
249
    Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
250
    Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
251
    Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
252
    Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
253
    Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
254
    Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
255
    Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
256
    Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
257
    Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
258
    Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
259
    Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
260
    Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
261
    Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
262

            
263
    a += 0x6a09e667ul;
264
    b += 0xbb67ae85ul;
265
    c += 0x3c6ef372ul;
266
    d += 0xa54ff53aul;
267
    e += 0x510e527ful;
268
    f += 0x9b05688cul;
269
    g += 0x1f83d9abul;
270
    h += 0x5be0cd19ul;
271

            
272
    uint32_t t0 = a, t1 = b, t2 = c, t3 = d, t4 = e, t5 = f, t6 = g, t7 = h;
273

            
274
    // Transform 2
275
    Round(a, b, c, d, e, f, g, h, 0xc28a2f98ul);
276
    Round(h, a, b, c, d, e, f, g, 0x71374491ul);
277
    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful);
278
    Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul);
279
    Round(e, f, g, h, a, b, c, d, 0x3956c25bul);
280
    Round(d, e, f, g, h, a, b, c, 0x59f111f1ul);
281
    Round(c, d, e, f, g, h, a, b, 0x923f82a4ul);
282
    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul);
283
    Round(a, b, c, d, e, f, g, h, 0xd807aa98ul);
284
    Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
285
    Round(g, h, a, b, c, d, e, f, 0x243185beul);
286
    Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
287
    Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
288
    Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
289
    Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
290
    Round(b, c, d, e, f, g, h, a, 0xc19bf374ul);
291
    Round(a, b, c, d, e, f, g, h, 0x649b69c1ul);
292
    Round(h, a, b, c, d, e, f, g, 0xf0fe4786ul);
293
    Round(g, h, a, b, c, d, e, f, 0x0fe1edc6ul);
294
    Round(f, g, h, a, b, c, d, e, 0x240cf254ul);
295
    Round(e, f, g, h, a, b, c, d, 0x4fe9346ful);
296
    Round(d, e, f, g, h, a, b, c, 0x6cc984beul);
297
    Round(c, d, e, f, g, h, a, b, 0x61b9411eul);
298
    Round(b, c, d, e, f, g, h, a, 0x16f988faul);
299
    Round(a, b, c, d, e, f, g, h, 0xf2c65152ul);
300
    Round(h, a, b, c, d, e, f, g, 0xa88e5a6dul);
301
    Round(g, h, a, b, c, d, e, f, 0xb019fc65ul);
302
    Round(f, g, h, a, b, c, d, e, 0xb9d99ec7ul);
303
    Round(e, f, g, h, a, b, c, d, 0x9a1231c3ul);
304
    Round(d, e, f, g, h, a, b, c, 0xe70eeaa0ul);
305
    Round(c, d, e, f, g, h, a, b, 0xfdb1232bul);
306
    Round(b, c, d, e, f, g, h, a, 0xc7353eb0ul);
307
    Round(a, b, c, d, e, f, g, h, 0x3069bad5ul);
308
    Round(h, a, b, c, d, e, f, g, 0xcb976d5ful);
309
    Round(g, h, a, b, c, d, e, f, 0x5a0f118ful);
310
    Round(f, g, h, a, b, c, d, e, 0xdc1eeefdul);
311
    Round(e, f, g, h, a, b, c, d, 0x0a35b689ul);
312
    Round(d, e, f, g, h, a, b, c, 0xde0b7a04ul);
313
    Round(c, d, e, f, g, h, a, b, 0x58f4ca9dul);
314
    Round(b, c, d, e, f, g, h, a, 0xe15d5b16ul);
315
    Round(a, b, c, d, e, f, g, h, 0x007f3e86ul);
316
    Round(h, a, b, c, d, e, f, g, 0x37088980ul);
317
    Round(g, h, a, b, c, d, e, f, 0xa507ea32ul);
318
    Round(f, g, h, a, b, c, d, e, 0x6fab9537ul);
319
    Round(e, f, g, h, a, b, c, d, 0x17406110ul);
320
    Round(d, e, f, g, h, a, b, c, 0x0d8cd6f1ul);
321
    Round(c, d, e, f, g, h, a, b, 0xcdaa3b6dul);
322
    Round(b, c, d, e, f, g, h, a, 0xc0bbbe37ul);
323
    Round(a, b, c, d, e, f, g, h, 0x83613bdaul);
324
    Round(h, a, b, c, d, e, f, g, 0xdb48a363ul);
325
    Round(g, h, a, b, c, d, e, f, 0x0b02e931ul);
326
    Round(f, g, h, a, b, c, d, e, 0x6fd15ca7ul);
327
    Round(e, f, g, h, a, b, c, d, 0x521afacaul);
328
    Round(d, e, f, g, h, a, b, c, 0x31338431ul);
329
    Round(c, d, e, f, g, h, a, b, 0x6ed41a95ul);
330
    Round(b, c, d, e, f, g, h, a, 0x6d437890ul);
331
    Round(a, b, c, d, e, f, g, h, 0xc39c91f2ul);
332
    Round(h, a, b, c, d, e, f, g, 0x9eccabbdul);
333
    Round(g, h, a, b, c, d, e, f, 0xb5c9a0e6ul);
334
    Round(f, g, h, a, b, c, d, e, 0x532fb63cul);
335
    Round(e, f, g, h, a, b, c, d, 0xd2c741c6ul);
336
    Round(d, e, f, g, h, a, b, c, 0x07237ea3ul);
337
    Round(c, d, e, f, g, h, a, b, 0xa4954b68ul);
338
    Round(b, c, d, e, f, g, h, a, 0x4c191d76ul);
339

            
340
    w0 = t0 + a;
341
    w1 = t1 + b;
342
    w2 = t2 + c;
343
    w3 = t3 + d;
344
    w4 = t4 + e;
345
    w5 = t5 + f;
346
    w6 = t6 + g;
347
    w7 = t7 + h;
348

            
349
    // Transform 3
350
    a = 0x6a09e667ul;
351
    b = 0xbb67ae85ul;
352
    c = 0x3c6ef372ul;
353
    d = 0xa54ff53aul;
354
    e = 0x510e527ful;
355
    f = 0x9b05688cul;
356
    g = 0x1f83d9abul;
357
    h = 0x5be0cd19ul;
358

            
359
    Round(a, b, c, d, e, f, g, h, 0x428a2f98ul + w0);
360
    Round(h, a, b, c, d, e, f, g, 0x71374491ul + w1);
361
    Round(g, h, a, b, c, d, e, f, 0xb5c0fbcful + w2);
362
    Round(f, g, h, a, b, c, d, e, 0xe9b5dba5ul + w3);
363
    Round(e, f, g, h, a, b, c, d, 0x3956c25bul + w4);
364
    Round(d, e, f, g, h, a, b, c, 0x59f111f1ul + w5);
365
    Round(c, d, e, f, g, h, a, b, 0x923f82a4ul + w6);
366
    Round(b, c, d, e, f, g, h, a, 0xab1c5ed5ul + w7);
367
    Round(a, b, c, d, e, f, g, h, 0x5807aa98ul);
368
    Round(h, a, b, c, d, e, f, g, 0x12835b01ul);
369
    Round(g, h, a, b, c, d, e, f, 0x243185beul);
370
    Round(f, g, h, a, b, c, d, e, 0x550c7dc3ul);
371
    Round(e, f, g, h, a, b, c, d, 0x72be5d74ul);
372
    Round(d, e, f, g, h, a, b, c, 0x80deb1feul);
373
    Round(c, d, e, f, g, h, a, b, 0x9bdc06a7ul);
374
    Round(b, c, d, e, f, g, h, a, 0xc19bf274ul);
375
    Round(a, b, c, d, e, f, g, h, 0xe49b69c1ul + (w0 += sigma0(w1)));
376
    Round(h, a, b, c, d, e, f, g, 0xefbe4786ul + (w1 += 0xa00000ul + sigma0(w2)));
377
    Round(g, h, a, b, c, d, e, f, 0x0fc19dc6ul + (w2 += sigma1(w0) + sigma0(w3)));
378
    Round(f, g, h, a, b, c, d, e, 0x240ca1ccul + (w3 += sigma1(w1) + sigma0(w4)));
379
    Round(e, f, g, h, a, b, c, d, 0x2de92c6ful + (w4 += sigma1(w2) + sigma0(w5)));
380
    Round(d, e, f, g, h, a, b, c, 0x4a7484aaul + (w5 += sigma1(w3) + sigma0(w6)));
381
    Round(c, d, e, f, g, h, a, b, 0x5cb0a9dcul + (w6 += sigma1(w4) + 0x100ul + sigma0(w7)));
382
    Round(b, c, d, e, f, g, h, a, 0x76f988daul + (w7 += sigma1(w5) + w0 + 0x11002000ul));
383
    Round(a, b, c, d, e, f, g, h, 0x983e5152ul + (w8 = 0x80000000ul + sigma1(w6) + w1));
384
    Round(h, a, b, c, d, e, f, g, 0xa831c66dul + (w9 = sigma1(w7) + w2));
385
    Round(g, h, a, b, c, d, e, f, 0xb00327c8ul + (w10 = sigma1(w8) + w3));
386
    Round(f, g, h, a, b, c, d, e, 0xbf597fc7ul + (w11 = sigma1(w9) + w4));
387
    Round(e, f, g, h, a, b, c, d, 0xc6e00bf3ul + (w12 = sigma1(w10) + w5));
388
    Round(d, e, f, g, h, a, b, c, 0xd5a79147ul + (w13 = sigma1(w11) + w6));
389
    Round(c, d, e, f, g, h, a, b, 0x06ca6351ul + (w14 = sigma1(w12) + w7 + 0x400022ul));
390
    Round(b, c, d, e, f, g, h, a, 0x14292967ul + (w15 = 0x100ul + sigma1(w13) + w8 + sigma0(w0)));
391
    Round(a, b, c, d, e, f, g, h, 0x27b70a85ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
392
    Round(h, a, b, c, d, e, f, g, 0x2e1b2138ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
393
    Round(g, h, a, b, c, d, e, f, 0x4d2c6dfcul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
394
    Round(f, g, h, a, b, c, d, e, 0x53380d13ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
395
    Round(e, f, g, h, a, b, c, d, 0x650a7354ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
396
    Round(d, e, f, g, h, a, b, c, 0x766a0abbul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
397
    Round(c, d, e, f, g, h, a, b, 0x81c2c92eul + (w6 += sigma1(w4) + w15 + sigma0(w7)));
398
    Round(b, c, d, e, f, g, h, a, 0x92722c85ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
399
    Round(a, b, c, d, e, f, g, h, 0xa2bfe8a1ul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
400
    Round(h, a, b, c, d, e, f, g, 0xa81a664bul + (w9 += sigma1(w7) + w2 + sigma0(w10)));
401
    Round(g, h, a, b, c, d, e, f, 0xc24b8b70ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
402
    Round(f, g, h, a, b, c, d, e, 0xc76c51a3ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
403
    Round(e, f, g, h, a, b, c, d, 0xd192e819ul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
404
    Round(d, e, f, g, h, a, b, c, 0xd6990624ul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
405
    Round(c, d, e, f, g, h, a, b, 0xf40e3585ul + (w14 += sigma1(w12) + w7 + sigma0(w15)));
406
    Round(b, c, d, e, f, g, h, a, 0x106aa070ul + (w15 += sigma1(w13) + w8 + sigma0(w0)));
407
    Round(a, b, c, d, e, f, g, h, 0x19a4c116ul + (w0 += sigma1(w14) + w9 + sigma0(w1)));
408
    Round(h, a, b, c, d, e, f, g, 0x1e376c08ul + (w1 += sigma1(w15) + w10 + sigma0(w2)));
409
    Round(g, h, a, b, c, d, e, f, 0x2748774cul + (w2 += sigma1(w0) + w11 + sigma0(w3)));
410
    Round(f, g, h, a, b, c, d, e, 0x34b0bcb5ul + (w3 += sigma1(w1) + w12 + sigma0(w4)));
411
    Round(e, f, g, h, a, b, c, d, 0x391c0cb3ul + (w4 += sigma1(w2) + w13 + sigma0(w5)));
412
    Round(d, e, f, g, h, a, b, c, 0x4ed8aa4aul + (w5 += sigma1(w3) + w14 + sigma0(w6)));
413
    Round(c, d, e, f, g, h, a, b, 0x5b9cca4ful + (w6 += sigma1(w4) + w15 + sigma0(w7)));
414
    Round(b, c, d, e, f, g, h, a, 0x682e6ff3ul + (w7 += sigma1(w5) + w0 + sigma0(w8)));
415
    Round(a, b, c, d, e, f, g, h, 0x748f82eeul + (w8 += sigma1(w6) + w1 + sigma0(w9)));
416
    Round(h, a, b, c, d, e, f, g, 0x78a5636ful + (w9 += sigma1(w7) + w2 + sigma0(w10)));
417
    Round(g, h, a, b, c, d, e, f, 0x84c87814ul + (w10 += sigma1(w8) + w3 + sigma0(w11)));
418
    Round(f, g, h, a, b, c, d, e, 0x8cc70208ul + (w11 += sigma1(w9) + w4 + sigma0(w12)));
419
    Round(e, f, g, h, a, b, c, d, 0x90befffaul + (w12 += sigma1(w10) + w5 + sigma0(w13)));
420
    Round(d, e, f, g, h, a, b, c, 0xa4506cebul + (w13 += sigma1(w11) + w6 + sigma0(w14)));
421
    Round(c, d, e, f, g, h, a, b, 0xbef9a3f7ul + (w14 + sigma1(w12) + w7 + sigma0(w15)));
422
    Round(b, c, d, e, f, g, h, a, 0xc67178f2ul + (w15 + sigma1(w13) + w8 + sigma0(w0)));
423

            
424
    // Output
425
    WriteBE32(out + 0, a + 0x6a09e667ul);
426
    WriteBE32(out + 4, b + 0xbb67ae85ul);
427
    WriteBE32(out + 8, c + 0x3c6ef372ul);
428
    WriteBE32(out + 12, d + 0xa54ff53aul);
429
    WriteBE32(out + 16, e + 0x510e527ful);
430
    WriteBE32(out + 20, f + 0x9b05688cul);
431
    WriteBE32(out + 24, g + 0x1f83d9abul);
432
    WriteBE32(out + 28, h + 0x5be0cd19ul);
433
}
434

            
435
} // namespace sha256
436

            
437
typedef void (*TransformType)(uint32_t*, const unsigned char*, size_t);
438
typedef void (*TransformD64Type)(unsigned char*, const unsigned char*);
439

            
440
template<TransformType tr>
441
void TransformD64Wrapper(unsigned char* out, const unsigned char* in)
442
{
443
    uint32_t s[8];
444
    static const unsigned char padding1[64] = {
445
        0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
446
        0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447
        0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
448
        0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0
449
    };
450
    unsigned char buffer2[64] = {
451
        0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
452
        0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
453
        0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
454
        0,    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0
455
    };
456
    sha256::Initialize(s);
457
    tr(s, in, 1);
458
    tr(s, padding1, 1);
459
    WriteBE32(buffer2 + 0, s[0]);
460
    WriteBE32(buffer2 + 4, s[1]);
461
    WriteBE32(buffer2 + 8, s[2]);
462
    WriteBE32(buffer2 + 12, s[3]);
463
    WriteBE32(buffer2 + 16, s[4]);
464
    WriteBE32(buffer2 + 20, s[5]);
465
    WriteBE32(buffer2 + 24, s[6]);
466
    WriteBE32(buffer2 + 28, s[7]);
467
    sha256::Initialize(s);
468
    tr(s, buffer2, 1);
469
    WriteBE32(out + 0, s[0]);
470
    WriteBE32(out + 4, s[1]);
471
    WriteBE32(out + 8, s[2]);
472
    WriteBE32(out + 12, s[3]);
473
    WriteBE32(out + 16, s[4]);
474
    WriteBE32(out + 20, s[5]);
475
    WriteBE32(out + 24, s[6]);
476
    WriteBE32(out + 28, s[7]);
477
}
478

            
479
TransformType Transform = sha256::Transform;
480
TransformD64Type TransformD64 = sha256::TransformD64;
481
TransformD64Type TransformD64_2way = nullptr;
482
TransformD64Type TransformD64_4way = nullptr;
483
TransformD64Type TransformD64_8way = nullptr;
484

            
485
bool SelfTest() {
486
    // Input state (equal to the initial SHA256 state)
487
    static const uint32_t init[8] = {
488
        0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul
489
    };
490
    // Some random input data to test with
491
    static const unsigned char data[641] = "-" // Intentionally not aligned
492
        "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do "
493
        "eiusmod tempor incididunt ut labore et dolore magna aliqua. Et m"
494
        "olestie ac feugiat sed lectus vestibulum mattis ullamcorper. Mor"
495
        "bi blandit cursus risus at ultrices mi tempus imperdiet nulla. N"
496
        "unc congue nisi vita suscipit tellus mauris. Imperdiet proin fer"
497
        "mentum leo vel orci. Massa tempor nec feugiat nisl pretium fusce"
498
        " id velit. Telus in metus vulputate eu scelerisque felis. Mi tem"
499
        "pus imperdiet nulla malesuada pellentesque. Tristique magna sit.";
500
    // Expected output state for hashing the i*64 first input bytes above (excluding SHA256 padding).
501
    static const uint32_t result[9][8] = {
502
        {0x6a09e667ul, 0xbb67ae85ul, 0x3c6ef372ul, 0xa54ff53aul, 0x510e527ful, 0x9b05688cul, 0x1f83d9abul, 0x5be0cd19ul},
503
        {0x91f8ec6bul, 0x4da10fe3ul, 0x1c9c292cul, 0x45e18185ul, 0x435cc111ul, 0x3ca26f09ul, 0xeb954caeul, 0x402a7069ul},
504
        {0xcabea5acul, 0x374fb97cul, 0x182ad996ul, 0x7bd69cbful, 0x450ff900ul, 0xc1d2be8aul, 0x6a41d505ul, 0xe6212dc3ul},
505
        {0xbcff09d6ul, 0x3e76f36eul, 0x3ecb2501ul, 0x78866e97ul, 0xe1c1e2fdul, 0x32f4eafful, 0x8aa6c4e5ul, 0xdfc024bcul},
506
        {0xa08c5d94ul, 0x0a862f93ul, 0x6b7f2f40ul, 0x8f9fae76ul, 0x6d40439ful, 0x79dcee0cul, 0x3e39ff3aul, 0xdc3bdbb1ul},
507
        {0x216a0895ul, 0x9f1a3662ul, 0xe99946f9ul, 0x87ba4364ul, 0x0fb5db2cul, 0x12bed3d3ul, 0x6689c0c7ul, 0x292f1b04ul},
508
        {0xca3067f8ul, 0xbc8c2656ul, 0x37cb7e0dul, 0x9b6b8b0ful, 0x46dc380bul, 0xf1287f57ul, 0xc42e4b23ul, 0x3fefe94dul},
509
        {0x3e4c4039ul, 0xbb6fca8cul, 0x6f27d2f7ul, 0x301e44a4ul, 0x8352ba14ul, 0x5769ce37ul, 0x48a1155ful, 0xc0e1c4c6ul},
510
        {0xfe2fa9ddul, 0x69d0862bul, 0x1ae0db23ul, 0x471f9244ul, 0xf55c0145ul, 0xc30f9c3bul, 0x40a84ea0ul, 0x5b8a266cul},
511
    };
512
    // Expected output for each of the individual 8 64-byte messages under full double SHA256 (including padding).
513
    static const unsigned char result_d64[256] = {
514
        0x09, 0x3a, 0xc4, 0xd0, 0x0f, 0xf7, 0x57, 0xe1, 0x72, 0x85, 0x79, 0x42, 0xfe, 0xe7, 0xe0, 0xa0,
515
        0xfc, 0x52, 0xd7, 0xdb, 0x07, 0x63, 0x45, 0xfb, 0x53, 0x14, 0x7d, 0x17, 0x22, 0x86, 0xf0, 0x52,
516
        0x48, 0xb6, 0x11, 0x9e, 0x6e, 0x48, 0x81, 0x6d, 0xcc, 0x57, 0x1f, 0xb2, 0x97, 0xa8, 0xd5, 0x25,
517
        0x9b, 0x82, 0xaa, 0x89, 0xe2, 0xfd, 0x2d, 0x56, 0xe8, 0x28, 0x83, 0x0b, 0xe2, 0xfa, 0x53, 0xb7,
518
        0xd6, 0x6b, 0x07, 0x85, 0x83, 0xb0, 0x10, 0xa2, 0xf5, 0x51, 0x3c, 0xf9, 0x60, 0x03, 0xab, 0x45,
519
        0x6c, 0x15, 0x6e, 0xef, 0xb5, 0xac, 0x3e, 0x6c, 0xdf, 0xb4, 0x92, 0x22, 0x2d, 0xce, 0xbf, 0x3e,
520
        0xe9, 0xe5, 0xf6, 0x29, 0x0e, 0x01, 0x4f, 0xd2, 0xd4, 0x45, 0x65, 0xb3, 0xbb, 0xf2, 0x4c, 0x16,
521
        0x37, 0x50, 0x3c, 0x6e, 0x49, 0x8c, 0x5a, 0x89, 0x2b, 0x1b, 0xab, 0xc4, 0x37, 0xd1, 0x46, 0xe9,
522
        0x3d, 0x0e, 0x85, 0xa2, 0x50, 0x73, 0xa1, 0x5e, 0x54, 0x37, 0xd7, 0x94, 0x17, 0x56, 0xc2, 0xd8,
523
        0xe5, 0x9f, 0xed, 0x4e, 0xae, 0x15, 0x42, 0x06, 0x0d, 0x74, 0x74, 0x5e, 0x24, 0x30, 0xce, 0xd1,
524
        0x9e, 0x50, 0xa3, 0x9a, 0xb8, 0xf0, 0x4a, 0x57, 0x69, 0x78, 0x67, 0x12, 0x84, 0x58, 0xbe, 0xc7,
525
        0x36, 0xaa, 0xee, 0x7c, 0x64, 0xa3, 0x76, 0xec, 0xff, 0x55, 0x41, 0x00, 0x2a, 0x44, 0x68, 0x4d,
526
        0xb6, 0x53, 0x9e, 0x1c, 0x95, 0xb7, 0xca, 0xdc, 0x7f, 0x7d, 0x74, 0x27, 0x5c, 0x8e, 0xa6, 0x84,
527
        0xb5, 0xac, 0x87, 0xa9, 0xf3, 0xff, 0x75, 0xf2, 0x34, 0xcd, 0x1a, 0x3b, 0x82, 0x2c, 0x2b, 0x4e,
528
        0x6a, 0x46, 0x30, 0xa6, 0x89, 0x86, 0x23, 0xac, 0xf8, 0xa5, 0x15, 0xe9, 0x0a, 0xaa, 0x1e, 0x9a,
529
        0xd7, 0x93, 0x6b, 0x28, 0xe4, 0x3b, 0xfd, 0x59, 0xc6, 0xed, 0x7c, 0x5f, 0xa5, 0x41, 0xcb, 0x51
530
    };
531

            
532

            
533
    // Test Transform() for 0 through 8 transformations.
534
    for (size_t i = 0; i <= 8; ++i) {
535
        uint32_t state[8];
536
        std::copy(init, init + 8, state);
537
        Transform(state, data + 1, i);
538
        if (!std::equal(state, state + 8, result[i])) return false;
539
    }
540

            
541
    // Test TransformD64
542
    unsigned char out[32];
543
    TransformD64(out, data + 1);
544
    if (!std::equal(out, out + 32, result_d64)) return false;
545

            
546
    // Test TransformD64_2way, if available.
547
    if (TransformD64_2way) {
548
        unsigned char out[64];
549
        TransformD64_2way(out, data + 1);
550
        if (!std::equal(out, out + 64, result_d64)) return false;
551
    }
552

            
553
    // Test TransformD64_4way, if available.
554
    if (TransformD64_4way) {
555
        unsigned char out[128];
556
        TransformD64_4way(out, data + 1);
557
        if (!std::equal(out, out + 128, result_d64)) return false;
558
    }
559

            
560
    // Test TransformD64_8way, if available.
561
    if (TransformD64_8way) {
562
        unsigned char out[256];
563
        TransformD64_8way(out, data + 1);
564
        if (!std::equal(out, out + 256, result_d64)) return false;
565
    }
566

            
567
    return true;
568
}
569

            
570
#if defined(USE_ASM) && (defined(__x86_64__) || defined(__amd64__) || defined(__i386__))
571
/** Check whether the OS has enabled AVX registers. */
572
bool AVXEnabled()
573
{
574
    uint32_t a, d;
575
    __asm__("xgetbv" : "=a"(a), "=d"(d) : "c"(0));
576
    return (a & 6) == 6;
577
}
578
#endif
579
} // namespace
580

            
581

            
582
std::string SHA256AutoDetect()
583
{
584
    std::string ret = "standard";
585
#if defined(USE_ASM) && defined(HAVE_GETCPUID)
586
    bool have_sse4 = false;
587
    bool have_xsave = false;
588
    bool have_avx = false;
589
    bool have_avx2 = false;
590
    bool have_x86_shani = false;
591
    bool enabled_avx = false;
592

            
593
    (void)AVXEnabled;
594
    (void)have_sse4;
595
    (void)have_avx;
596
    (void)have_xsave;
597
    (void)have_avx2;
598
    (void)have_x86_shani;
599
    (void)enabled_avx;
600

            
601
    uint32_t eax, ebx, ecx, edx;
602
    GetCPUID(1, 0, eax, ebx, ecx, edx);
603
    have_sse4 = (ecx >> 19) & 1;
604
    have_xsave = (ecx >> 27) & 1;
605
    have_avx = (ecx >> 28) & 1;
606
    if (have_xsave && have_avx) {
607
        enabled_avx = AVXEnabled();
608
    }
609
    if (have_sse4) {
610
        GetCPUID(7, 0, eax, ebx, ecx, edx);
611
        have_avx2 = (ebx >> 5) & 1;
612
        have_x86_shani = (ebx >> 29) & 1;
613
    }
614

            
615
#if defined(ENABLE_X86_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
616
    if (have_x86_shani) {
617
        Transform = sha256_x86_shani::Transform;
618
        TransformD64 = TransformD64Wrapper<sha256_x86_shani::Transform>;
619
        TransformD64_2way = sha256d64_x86_shani::Transform_2way;
620
        ret = "x86_shani(1way,2way)";
621
        have_sse4 = false; // Disable SSE4/AVX2;
622
        have_avx2 = false;
623
    }
624
#endif
625

            
626
    if (have_sse4) {
627
#if defined(__x86_64__) || defined(__amd64__)
628
        Transform = sha256_sse4::Transform;
629
        TransformD64 = TransformD64Wrapper<sha256_sse4::Transform>;
630
        ret = "sse4(1way)";
631
#endif
632
#if defined(ENABLE_SSE41) && !defined(BUILD_BITCOIN_INTERNAL)
633
        TransformD64_4way = sha256d64_sse41::Transform_4way;
634
        ret += ",sse41(4way)";
635
#endif
636
    }
637

            
638
#if defined(ENABLE_AVX2) && !defined(BUILD_BITCOIN_INTERNAL)
639
    if (have_avx2 && have_avx && enabled_avx) {
640
        TransformD64_8way = sha256d64_avx2::Transform_8way;
641
        ret += ",avx2(8way)";
642
    }
643
#endif
644
#endif
645

            
646
#if defined(ENABLE_ARM_SHANI) && !defined(BUILD_BITCOIN_INTERNAL)
647
    bool have_arm_shani = false;
648

            
649
#if defined(__linux__)
650
#if defined(__arm__) // 32-bit
651
    if (getauxval(AT_HWCAP2) & HWCAP2_SHA2) {
652
        have_arm_shani = true;
653
    }
654
#endif
655
#if defined(__aarch64__) // 64-bit
656
    if (getauxval(AT_HWCAP) & HWCAP_SHA2) {
657
        have_arm_shani = true;
658
    }
659
#endif
660
#endif
661

            
662
#if defined(MAC_OSX)
663
    int val = 0;
664
    size_t len = sizeof(val);
665
    if (sysctlbyname("hw.optional.arm.FEAT_SHA256", &val, &len, nullptr, 0) == 0) {
666
        have_arm_shani = val != 0;
667
    }
668
#endif
669

            
670
    if (have_arm_shani) {
671
        Transform = sha256_arm_shani::Transform;
672
        TransformD64 = TransformD64Wrapper<sha256_arm_shani::Transform>;
673
        TransformD64_2way = sha256d64_arm_shani::Transform_2way;
674
        ret = "arm_shani(1way,2way)";
675
    }
676
#endif
677

            
678
    assert(SelfTest());
679
    return ret;
680
}
681

            
682
////// SHA-256
683

            
684
CSHA256::CSHA256() : bytes(0)
685
13499
{
686
13499
    sha256::Initialize(s);
687
13499
}
688

            
689
CSHA256& CSHA256::Write(const unsigned char* data, size_t len)
690
42432
{
691
42432
    const unsigned char* end = data + len;
692
42432
    size_t bufsize = bytes % 64;
693
42432
    if (bufsize && bufsize + len >= 64) {
694
        // Fill the buffer, and process it.
695
14503
        memcpy(buf + bufsize, data, 64 - bufsize);
696
14503
        bytes += 64 - bufsize;
697
14503
        data += 64 - bufsize;
698
14503
        Transform(s, buf, 1);
699
14503
        bufsize = 0;
700
14503
    }
701
42432
    if (end - data >= 64) {
702
2592
        size_t blocks = (end - data) / 64;
703
2592
        Transform(s, data, blocks);
704
2592
        data += 64 * blocks;
705
2592
        bytes += 64 * blocks;
706
2592
    }
707
42432
    if (end > data) {
708
        // Fill the buffer with what remains.
709
28252
        memcpy(buf + bufsize, data, end - data);
710
28252
        bytes += end - data;
711
28252
    }
712
42432
    return *this;
713
42432
}
714

            
715
void CSHA256::Finalize(unsigned char hash[OUTPUT_SIZE])
716
14140
{
717
14140
    static const unsigned char pad[64] = {0x80};
718
14140
    unsigned char sizedesc[8];
719
14140
    WriteBE64(sizedesc, bytes << 3);
720
14140
    Write(pad, 1 + ((119 - (bytes % 64)) % 64));
721
14140
    Write(sizedesc, 8);
722
14140
    WriteBE32(hash, s[0]);
723
14140
    WriteBE32(hash + 4, s[1]);
724
14140
    WriteBE32(hash + 8, s[2]);
725
14140
    WriteBE32(hash + 12, s[3]);
726
14140
    WriteBE32(hash + 16, s[4]);
727
14140
    WriteBE32(hash + 20, s[5]);
728
14140
    WriteBE32(hash + 24, s[6]);
729
14140
    WriteBE32(hash + 28, s[7]);
730
14140
}
731

            
732
CSHA256& CSHA256::Reset()
733
649
{
734
649
    bytes = 0;
735
649
    sha256::Initialize(s);
736
649
    return *this;
737
649
}
738

            
739
void SHA256D64(unsigned char* out, const unsigned char* in, size_t blocks)
740
{
741
    if (TransformD64_8way) {
742
        while (blocks >= 8) {
743
            TransformD64_8way(out, in);
744
            out += 256;
745
            in += 512;
746
            blocks -= 8;
747
        }
748
    }
749
    if (TransformD64_4way) {
750
        while (blocks >= 4) {
751
            TransformD64_4way(out, in);
752
            out += 128;
753
            in += 256;
754
            blocks -= 4;
755
        }
756
    }
757
    if (TransformD64_2way) {
758
        while (blocks >= 2) {
759
            TransformD64_2way(out, in);
760
            out += 64;
761
            in += 128;
762
            blocks -= 2;
763
        }
764
    }
765
    while (blocks) {
766
        TransformD64(out, in);
767
        out += 32;
768
        in += 64;
769
        --blocks;
770
    }
771
}