Crypto++ 8.5
Free C++ class library of cryptographic schemes
gcm.cpp
1// gcm.cpp - originally written and placed in the public domain by Wei Dai.
2// ARM and Aarch64 added by Jeffrey Walton. The ARM carryless
3// multiply routines are less efficient because they shadow x86.
4// The precomputed key table integration makes it tricky to use the
5// more efficient ARMv8 implementation of the multiply and reduce.
6
7// use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM gcm.cpp" to generate MASM code
8
9#include "pch.h"
10#include "config.h"
11
12#ifndef CRYPTOPP_IMPORTS
13#ifndef CRYPTOPP_GENERATE_X64_MASM
14
15// Visual Studio .Net 2003 compiler crash
16#if defined(_MSC_VER) && (_MSC_VER < 1400)
17# pragma optimize("", off)
18#endif
19
20#include "gcm.h"
21#include "cpu.h"
22
23#if defined(CRYPTOPP_DISABLE_GCM_ASM)
24# undef CRYPTOPP_X86_ASM_AVAILABLE
25# undef CRYPTOPP_X32_ASM_AVAILABLE
26# undef CRYPTOPP_X64_ASM_AVAILABLE
27# undef CRYPTOPP_SSE2_ASM_AVAILABLE
28#endif
29
30NAMESPACE_BEGIN(CryptoPP)
31
32#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
33// Different assemblers accept different mnemonics: 'movd eax, xmm0' vs
34// 'movd rax, xmm0' vs 'mov eax, xmm0' vs 'mov rax, xmm0'
35#if defined(CRYPTOPP_DISABLE_MIXED_ASM)
36// 'movd eax, xmm0' only. REG_WORD() macro not used. Clang path.
37# define USE_MOVD_REG32 1
38#elif defined(__GNUC__) || defined(_MSC_VER)
39// 'movd eax, xmm0' or 'movd rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
40# define USE_MOVD_REG32_OR_REG64 1
41#else
42// 'mov eax, xmm0' or 'mov rax, xmm0'. REG_WORD() macro supplies REG32 or REG64.
43# define USE_MOV_REG32_OR_REG64 1
44#endif
45#endif // CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
46
47// Clang intrinsic casts, http://bugs.llvm.org/show_bug.cgi?id=20670
48#define M128_CAST(x) ((__m128i *)(void *)(x))
49#define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
50
51word16 GCM_Base::s_reductionTable[256];
52volatile bool GCM_Base::s_reductionTableInitialized = false;
53
54void GCM_Base::GCTR::IncrementCounterBy256()
55{
56 IncrementCounterByOne(m_counterArray+BlockSize()-4, 3);
57}
58
59static inline void Xor16(byte *a, const byte *b, const byte *c)
60{
61 CRYPTOPP_ASSERT(IsAlignedOn(a,GetAlignmentOf<word64>()));
62 CRYPTOPP_ASSERT(IsAlignedOn(b,GetAlignmentOf<word64>()));
63 CRYPTOPP_ASSERT(IsAlignedOn(c,GetAlignmentOf<word64>()));
64 ((word64 *)(void *)a)[0] = ((word64 *)(void *)b)[0] ^ ((word64 *)(void *)c)[0];
65 ((word64 *)(void *)a)[1] = ((word64 *)(void *)b)[1] ^ ((word64 *)(void *)c)[1];
66}
67
68#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
69// SunCC 5.10-5.11 compiler crash. Move GCM_Xor16_SSE2 out-of-line, and place in
70// a source file with a SSE architecture switch. Also see GH #226 and GH #284.
71extern void GCM_Xor16_SSE2(byte *a, const byte *b, const byte *c);
72#endif // SSE2
73
74#if CRYPTOPP_ARM_NEON_AVAILABLE
75extern void GCM_Xor16_NEON(byte *a, const byte *b, const byte *c);
76#endif
77
78#if CRYPTOPP_POWER8_AVAILABLE
79extern void GCM_Xor16_POWER8(byte *a, const byte *b, const byte *c);
80#endif
81
82#if CRYPTOPP_CLMUL_AVAILABLE
83extern void GCM_SetKeyWithoutResync_CLMUL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
84extern size_t GCM_AuthenticateBlocks_CLMUL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
85const unsigned int s_cltableSizeInBlocks = 8;
86extern void GCM_ReverseHashBufferIfNeeded_CLMUL(byte *hashBuffer);
87#endif // CRYPTOPP_CLMUL_AVAILABLE
88
89#if CRYPTOPP_ARM_PMULL_AVAILABLE
90extern void GCM_SetKeyWithoutResync_PMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
91extern size_t GCM_AuthenticateBlocks_PMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
92const unsigned int s_cltableSizeInBlocks = 8;
93extern void GCM_ReverseHashBufferIfNeeded_PMULL(byte *hashBuffer);
94#endif // CRYPTOPP_ARM_PMULL_AVAILABLE
95
96#if CRYPTOPP_POWER8_VMULL_AVAILABLE
97extern void GCM_SetKeyWithoutResync_VMULL(const byte *hashKey, byte *mulTable, unsigned int tableSize);
98extern size_t GCM_AuthenticateBlocks_VMULL(const byte *data, size_t len, const byte *mtable, byte *hbuffer);
99const unsigned int s_cltableSizeInBlocks = 8;
100extern void GCM_ReverseHashBufferIfNeeded_VMULL(byte *hashBuffer);
101#endif // CRYPTOPP_POWER8_VMULL_AVAILABLE
102
103void GCM_Base::SetKeyWithoutResync(const byte *userKey, size_t keylength, const NameValuePairs &params)
104{
105 BlockCipher &blockCipher = AccessBlockCipher();
106 blockCipher.SetKey(userKey, keylength, params);
107
108 // GCM is only defined for 16-byte block ciphers at the moment.
109 // However, variable blocksize support means we have to defer
110 // blocksize checks to runtime after the key is set. Also see
111 // https://github.com/weidai11/cryptopp/issues/408.
112 const unsigned int blockSize = blockCipher.BlockSize();
113 CRYPTOPP_ASSERT(blockSize == REQUIRED_BLOCKSIZE);
114 if (blockCipher.BlockSize() != REQUIRED_BLOCKSIZE)
115 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
116
117 int tableSize, i, j, k;
118
119#if CRYPTOPP_CLMUL_AVAILABLE
120 if (HasCLMUL())
121 {
122 // Avoid "parameter not used" error and suppress Coverity finding
123 (void)params.GetIntValue(Name::TableSize(), tableSize);
124 tableSize = s_cltableSizeInBlocks * blockSize;
125 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
126 }
127 else
128#elif CRYPTOPP_ARM_PMULL_AVAILABLE
129 if (HasPMULL())
130 {
131 // Avoid "parameter not used" error and suppress Coverity finding
132 (void)params.GetIntValue(Name::TableSize(), tableSize);
133 tableSize = s_cltableSizeInBlocks * blockSize;
134 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
135 }
136 else
137#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
138 if (HasPMULL())
139 {
140 // Avoid "parameter not used" error and suppress Coverity finding
141 (void)params.GetIntValue(Name::TableSize(), tableSize);
142 tableSize = s_cltableSizeInBlocks * blockSize;
143 CRYPTOPP_ASSERT(tableSize > static_cast<int>(blockSize));
144 }
145 else
146#endif
147 {
148 if (params.GetIntValue(Name::TableSize(), tableSize))
149 tableSize = (tableSize >= 64*1024) ? 64*1024 : 2*1024;
150 else
151 tableSize = (GetTablesOption() == GCM_64K_Tables) ? 64*1024 : 2*1024;
152
153 //#if defined(_MSC_VER) && (_MSC_VER < 1400)
154 // VC 2003 workaround: compiler generates bad code for 64K tables
155 //tableSize = 2*1024;
156 //#endif
157 }
158
159 m_buffer.resize(3*blockSize + tableSize);
160 byte *mulTable = MulTable();
161 byte *hashKey = HashKey();
162 memset(hashKey, 0, REQUIRED_BLOCKSIZE);
163 blockCipher.ProcessBlock(hashKey);
164
165#if CRYPTOPP_CLMUL_AVAILABLE
166 if (HasCLMUL())
167 {
168 GCM_SetKeyWithoutResync_CLMUL(hashKey, mulTable, tableSize);
169 return;
170 }
171#elif CRYPTOPP_ARM_PMULL_AVAILABLE
172 if (HasPMULL())
173 {
174 GCM_SetKeyWithoutResync_PMULL(hashKey, mulTable, tableSize);
175 return;
176 }
177#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
178 if (HasPMULL())
179 {
180 GCM_SetKeyWithoutResync_VMULL(hashKey, mulTable, tableSize);
181 return;
182 }
183#endif
184
185 word64 V0, V1;
187 Block::Get(hashKey)(V0)(V1);
188
189 if (tableSize == 64*1024)
190 {
191 for (i=0; i<128; i++)
192 {
193 k = i%8;
194 Block::Put(NULLPTR, mulTable+(i/8)*256*16+(size_t(1)<<(11-k)))(V0)(V1);
195
196 int x = (int)V1 & 1;
197 V1 = (V1>>1) | (V0<<63);
198 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
199 }
200
201 for (i=0; i<16; i++)
202 {
203 memset(mulTable+i*256*16, 0, 16);
204#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
205 if (HasSSE2())
206 for (j=2; j<=0x80; j*=2)
207 for (k=1; k<j; k++)
208 GCM_Xor16_SSE2(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
209 else
210#elif CRYPTOPP_ARM_NEON_AVAILABLE
211 if (HasNEON())
212 for (j=2; j<=0x80; j*=2)
213 for (k=1; k<j; k++)
214 GCM_Xor16_NEON(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
215 else
216#elif CRYPTOPP_POWER8_AVAILABLE
217 if (HasPower8())
218 for (j=2; j<=0x80; j*=2)
219 for (k=1; k<j; k++)
220 GCM_Xor16_POWER8(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
221 else
222#endif
223 for (j=2; j<=0x80; j*=2)
224 for (k=1; k<j; k++)
225 Xor16(mulTable+i*256*16+(j+k)*16, mulTable+i*256*16+j*16, mulTable+i*256*16+k*16);
226 }
227 }
228 else
229 {
230 if (!s_reductionTableInitialized)
231 {
232 s_reductionTable[0] = 0;
233 word16 x = 0x01c2;
234 s_reductionTable[1] = ByteReverse(x);
235 for (unsigned int ii=2; ii<=0x80; ii*=2)
236 {
237 x <<= 1;
238 s_reductionTable[ii] = ByteReverse(x);
239 for (unsigned int jj=1; jj<ii; jj++)
240 s_reductionTable[ii+jj] = s_reductionTable[ii] ^ s_reductionTable[jj];
241 }
242 s_reductionTableInitialized = true;
243 }
244
245 for (i=0; i<128-24; i++)
246 {
247 k = i%32;
248 if (k < 4)
249 Block::Put(NULLPTR, mulTable+1024+(i/32)*256+(size_t(1)<<(7-k)))(V0)(V1);
250 else if (k < 8)
251 Block::Put(NULLPTR, mulTable+(i/32)*256+(size_t(1)<<(11-k)))(V0)(V1);
252
253 int x = (int)V1 & 1;
254 V1 = (V1>>1) | (V0<<63);
255 V0 = (V0>>1) ^ (x ? W64LIT(0xe1) << 56 : 0);
256 }
257
258 for (i=0; i<4; i++)
259 {
260 memset(mulTable+i*256, 0, 16);
261 memset(mulTable+1024+i*256, 0, 16);
262#if CRYPTOPP_SSE2_INTRIN_AVAILABLE || CRYPTOPP_SSE2_ASM_AVAILABLE
263 if (HasSSE2())
264 for (j=2; j<=8; j*=2)
265 for (k=1; k<j; k++)
266 {
267 GCM_Xor16_SSE2(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
268 GCM_Xor16_SSE2(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
269 }
270 else
271#elif CRYPTOPP_ARM_NEON_AVAILABLE
272 if (HasNEON())
273 for (j=2; j<=8; j*=2)
274 for (k=1; k<j; k++)
275 {
276 GCM_Xor16_NEON(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
277 GCM_Xor16_NEON(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
278 }
279 else
280#elif CRYPTOPP_POWER8_AVAILABLE
281 if (HasPower8())
282 for (j=2; j<=8; j*=2)
283 for (k=1; k<j; k++)
284 {
285 GCM_Xor16_POWER8(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
286 GCM_Xor16_POWER8(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
287 }
288 else
289#endif
290 for (j=2; j<=8; j*=2)
291 for (k=1; k<j; k++)
292 {
293 Xor16(mulTable+i*256+(j+k)*16, mulTable+i*256+j*16, mulTable+i*256+k*16);
294 Xor16(mulTable+1024+i*256+(j+k)*16, mulTable+1024+i*256+j*16, mulTable+1024+i*256+k*16);
295 }
296 }
297 }
298}
299
300inline void GCM_Base::ReverseHashBufferIfNeeded()
301{
302#if CRYPTOPP_CLMUL_AVAILABLE
303 if (HasCLMUL())
304 {
305 GCM_ReverseHashBufferIfNeeded_CLMUL(HashBuffer());
306 }
307#elif CRYPTOPP_ARM_PMULL_AVAILABLE
308 if (HasPMULL())
309 {
310 GCM_ReverseHashBufferIfNeeded_PMULL(HashBuffer());
311 }
312#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
313 if (HasPMULL())
314 {
315 GCM_ReverseHashBufferIfNeeded_VMULL(HashBuffer());
316 }
317#endif
318}
319
320void GCM_Base::Resync(const byte *iv, size_t len)
321{
322 BlockCipher &cipher = AccessBlockCipher();
323 byte *hashBuffer = HashBuffer();
324
325 if (len == 12)
326 {
327 memcpy(hashBuffer, iv, len);
328 memset(hashBuffer+len, 0, 3);
329 hashBuffer[len+3] = 1;
330 }
331 else
332 {
333 size_t origLen = len;
334 memset(hashBuffer, 0, HASH_BLOCKSIZE);
335
336 if (len >= HASH_BLOCKSIZE)
337 {
338 len = GCM_Base::AuthenticateBlocks(iv, len);
339 iv += (origLen - len);
340 }
341
342 if (len > 0)
343 {
344 memcpy(m_buffer, iv, len);
345 memset(m_buffer+len, 0, HASH_BLOCKSIZE-len);
346 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
347 }
348
349 PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(0)(origLen*8);
350 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
351
352 ReverseHashBufferIfNeeded();
353 }
354
355 if (m_state >= State_IVSet)
356 m_ctr.Resynchronize(hashBuffer, REQUIRED_BLOCKSIZE);
357 else
358 m_ctr.SetCipherWithIV(cipher, hashBuffer);
359
360 m_ctr.Seek(HASH_BLOCKSIZE);
361
362 memset(hashBuffer, 0, HASH_BLOCKSIZE);
363}
364
365unsigned int GCM_Base::OptimalDataAlignment() const
366{
367 return
368#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
369 HasSSE2() ? 16 :
370#elif CRYPTOPP_ARM_NEON_AVAILABLE
371 HasNEON() ? 4 :
372#elif CRYPTOPP_POWER8_AVAILABLE
373 HasPower8() ? 16 :
374#endif
375 GetBlockCipher().OptimalDataAlignment();
376}
377
378#if CRYPTOPP_MSC_VERSION
379# pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code
380#endif
381
382#endif // Not CRYPTOPP_GENERATE_X64_MASM
383
384#ifdef CRYPTOPP_X64_MASM_AVAILABLE
385extern "C" {
386void GCM_AuthenticateBlocks_2K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer, const word16 *reductionTable);
387void GCM_AuthenticateBlocks_64K_SSE2(const byte *data, size_t blocks, word64 *hashBuffer);
388}
389#endif
390
391#ifndef CRYPTOPP_GENERATE_X64_MASM
392
393size_t GCM_Base::AuthenticateBlocks(const byte *data, size_t len)
394{
395#if CRYPTOPP_CLMUL_AVAILABLE
396 if (HasCLMUL())
397 {
398 return GCM_AuthenticateBlocks_CLMUL(data, len, MulTable(), HashBuffer());
399 }
400#elif CRYPTOPP_ARM_PMULL_AVAILABLE
401 if (HasPMULL())
402 {
403 return GCM_AuthenticateBlocks_PMULL(data, len, MulTable(), HashBuffer());
404 }
405#elif CRYPTOPP_POWER8_VMULL_AVAILABLE
406 if (HasPMULL())
407 {
408 return GCM_AuthenticateBlocks_VMULL(data, len, MulTable(), HashBuffer());
409 }
410#endif
411
413 word64 *hashBuffer = (word64 *)(void *)HashBuffer();
414 CRYPTOPP_ASSERT(IsAlignedOn(hashBuffer,GetAlignmentOf<word64>()));
415
416 switch (2*(m_buffer.size()>=64*1024)
417#if CRYPTOPP_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
418 + HasSSE2()
419//#elif CRYPTOPP_ARM_NEON_AVAILABLE
420// + HasNEON()
421#endif
422 )
423 {
424 case 0: // non-SSE2 and 2K tables
425 {
426 byte *mulTable = MulTable();
427 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
428
429 do
430 {
431 word64 y0, y1, a0, a1, b0, b1, c0, c1, d0, d1;
432 Block::Get(data)(y0)(y1);
433 x0 ^= y0;
434 x1 ^= y1;
435
436 data += HASH_BLOCKSIZE;
437 len -= HASH_BLOCKSIZE;
438
439 #define READ_TABLE_WORD64_COMMON(a, b, c, d) *(word64 *)(void *)(mulTable+(a*1024)+(b*256)+c+d*8)
440
441 #if (CRYPTOPP_LITTLE_ENDIAN)
442 #if CRYPTOPP_BOOL_SLOW_WORD64
443 word32 z0 = (word32)x0;
444 word32 z1 = (word32)(x0>>32);
445 word32 z2 = (word32)x1;
446 word32 z3 = (word32)(x1>>32);
447 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, (d?(z##c>>((d?d-1:0)*4))&0xf0:(z##c&0xf)<<4), e)
448 #else
449 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((d%2), c, ((d+8*b)?(x##a>>(((d+8*b)?(d+8*b)-1:1)*4))&0xf0:(x##a&0xf)<<4), e)
450 #endif
451 #define GF_MOST_SIG_8BITS(a) (a##1 >> 7*8)
452 #define GF_SHIFT_8(a) a##1 = (a##1 << 8) ^ (a##0 >> 7*8); a##0 <<= 8;
453 #else
454 #define READ_TABLE_WORD64(a, b, c, d, e) READ_TABLE_WORD64_COMMON((1-d%2), c, ((15-d-8*b)?(x##a>>(((15-d-8*b)?(15-d-8*b)-1:0)*4))&0xf0:(x##a&0xf)<<4), e)
455 #define GF_MOST_SIG_8BITS(a) (a##1 & 0xff)
456 #define GF_SHIFT_8(a) a##1 = (a##1 >> 8) ^ (a##0 << 7*8); a##0 >>= 8;
457 #endif
458
459 #define GF_MUL_32BY128(op, a, b, c) \
460 a0 op READ_TABLE_WORD64(a, b, c, 0, 0) ^ READ_TABLE_WORD64(a, b, c, 1, 0); \
461 a1 op READ_TABLE_WORD64(a, b, c, 0, 1) ^ READ_TABLE_WORD64(a, b, c, 1, 1); \
462 b0 op READ_TABLE_WORD64(a, b, c, 2, 0) ^ READ_TABLE_WORD64(a, b, c, 3, 0); \
463 b1 op READ_TABLE_WORD64(a, b, c, 2, 1) ^ READ_TABLE_WORD64(a, b, c, 3, 1); \
464 c0 op READ_TABLE_WORD64(a, b, c, 4, 0) ^ READ_TABLE_WORD64(a, b, c, 5, 0); \
465 c1 op READ_TABLE_WORD64(a, b, c, 4, 1) ^ READ_TABLE_WORD64(a, b, c, 5, 1); \
466 d0 op READ_TABLE_WORD64(a, b, c, 6, 0) ^ READ_TABLE_WORD64(a, b, c, 7, 0); \
467 d1 op READ_TABLE_WORD64(a, b, c, 6, 1) ^ READ_TABLE_WORD64(a, b, c, 7, 1); \
468
469 GF_MUL_32BY128(=, 0, 0, 0)
470 GF_MUL_32BY128(^=, 0, 1, 1)
471 GF_MUL_32BY128(^=, 1, 0, 2)
472 GF_MUL_32BY128(^=, 1, 1, 3)
473
474 word32 r = (word32)s_reductionTable[GF_MOST_SIG_8BITS(d)] << 16;
475 GF_SHIFT_8(d)
476 c0 ^= d0; c1 ^= d1;
477 r ^= (word32)s_reductionTable[GF_MOST_SIG_8BITS(c)] << 8;
478 GF_SHIFT_8(c)
479 b0 ^= c0; b1 ^= c1;
480 r ^= s_reductionTable[GF_MOST_SIG_8BITS(b)];
481 GF_SHIFT_8(b)
482 a0 ^= b0; a1 ^= b1;
484 x0 = a0; x1 = a1;
485 }
486 while (len >= HASH_BLOCKSIZE);
487
488 hashBuffer[0] = x0; hashBuffer[1] = x1;
489 return len;
490 }
491
492 case 2: // non-SSE2 and 64K tables
493 {
494 byte *mulTable = MulTable();
495 word64 x0 = hashBuffer[0], x1 = hashBuffer[1];
496
497 do
498 {
499 word64 y0, y1, a0, a1;
500 Block::Get(data)(y0)(y1);
501 x0 ^= y0;
502 x1 ^= y1;
503
504 data += HASH_BLOCKSIZE;
505 len -= HASH_BLOCKSIZE;
506
507 #undef READ_TABLE_WORD64_COMMON
508 #undef READ_TABLE_WORD64
509
510 #define READ_TABLE_WORD64_COMMON(a, c, d) *(word64 *)(void *)(mulTable+(a)*256*16+(c)+(d)*8)
511
512 #if (CRYPTOPP_LITTLE_ENDIAN)
513 #if CRYPTOPP_BOOL_SLOW_WORD64
514 word32 z0 = (word32)x0;
515 word32 z1 = (word32)(x0>>32);
516 word32 z2 = (word32)x1;
517 word32 z3 = (word32)(x1>>32);
518 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, (d?(z##c>>((d?d:1)*8-4))&0xff0:(z##c&0xff)<<4), e)
519 #else
520 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((d+4*(c%2))?(x##b>>(((d+4*(c%2))?(d+4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
521 #endif
522 #else
523 #define READ_TABLE_WORD64(b, c, d, e) READ_TABLE_WORD64_COMMON(c*4+d, ((7-d-4*(c%2))?(x##b>>(((7-d-4*(c%2))?(7-d-4*(c%2)):1)*8-4))&0xff0:(x##b&0xff)<<4), e)
524 #endif
525
526 #define GF_MUL_8BY128(op, b, c, d) \
527 a0 op READ_TABLE_WORD64(b, c, d, 0);\
528 a1 op READ_TABLE_WORD64(b, c, d, 1);\
529
530 GF_MUL_8BY128(=, 0, 0, 0)
531 GF_MUL_8BY128(^=, 0, 0, 1)
532 GF_MUL_8BY128(^=, 0, 0, 2)
533 GF_MUL_8BY128(^=, 0, 0, 3)
534 GF_MUL_8BY128(^=, 0, 1, 0)
535 GF_MUL_8BY128(^=, 0, 1, 1)
536 GF_MUL_8BY128(^=, 0, 1, 2)
537 GF_MUL_8BY128(^=, 0, 1, 3)
538 GF_MUL_8BY128(^=, 1, 2, 0)
539 GF_MUL_8BY128(^=, 1, 2, 1)
540 GF_MUL_8BY128(^=, 1, 2, 2)
541 GF_MUL_8BY128(^=, 1, 2, 3)
542 GF_MUL_8BY128(^=, 1, 3, 0)
543 GF_MUL_8BY128(^=, 1, 3, 1)
544 GF_MUL_8BY128(^=, 1, 3, 2)
545 GF_MUL_8BY128(^=, 1, 3, 3)
546
547 x0 = a0; x1 = a1;
548 }
549 while (len >= HASH_BLOCKSIZE);
550
551 hashBuffer[0] = x0; hashBuffer[1] = x1;
552 return len;
553 }
554#endif // #ifndef CRYPTOPP_GENERATE_X64_MASM
555
556#ifdef CRYPTOPP_X64_MASM_AVAILABLE
557 case 1: // SSE2 and 2K tables
558 GCM_AuthenticateBlocks_2K_SSE2(data, len/16, hashBuffer, s_reductionTable);
559 return len % 16;
560 case 3: // SSE2 and 64K tables
561 GCM_AuthenticateBlocks_64K_SSE2(data, len/16, hashBuffer);
562 return len % 16;
563#endif
564
565#if CRYPTOPP_SSE2_ASM_AVAILABLE
566 case 1: // SSE2 and 2K tables
567 {
568 #ifdef __GNUC__
569 __asm__ __volatile__
570 (
571 INTEL_NOPREFIX
572 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
573 ALIGN 8
574 GCM_AuthenticateBlocks_2K_SSE2 PROC FRAME
575 rex_push_reg rsi
576 push_reg rdi
577 push_reg rbx
578 .endprolog
579 mov rsi, r8
580 mov r11, r9
581 #else
582 AS2( mov WORD_REG(cx), data )
583 AS2( mov WORD_REG(dx), len )
584 AS2( mov WORD_REG(si), hashBuffer )
585 AS2( shr WORD_REG(dx), 4 )
586 #endif
587
589 AS1(push rbx)
590 AS1(push rbp)
591 #else
592 AS_PUSH_IF86( bx)
593 AS_PUSH_IF86( bp)
594 #endif
595
596 #ifdef __GNUC__
597 AS2( mov AS_REG_7, WORD_REG(di))
599 AS2( lea AS_REG_7, s_reductionTable)
600 #endif
601
602 AS2( movdqa xmm0, [WORD_REG(si)] )
603
604 #define MUL_TABLE_0 WORD_REG(si) + 32
605 #define MUL_TABLE_1 WORD_REG(si) + 32 + 1024
606 #define RED_TABLE AS_REG_7
607
608 ASL(0)
609 AS2( movdqu xmm4, [WORD_REG(cx)] )
610 AS2( pxor xmm0, xmm4 )
611
612 AS2( movd ebx, xmm0 )
613 AS2( mov eax, AS_HEX(f0f0f0f0) )
614 AS2( and eax, ebx )
615 AS2( shl ebx, 4 )
616 AS2( and ebx, AS_HEX(f0f0f0f0) )
617 AS2( movzx edi, ah )
618 AS2( movdqa xmm5, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
619 AS2( movzx edi, al )
620 AS2( movdqa xmm4, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
621 AS2( shr eax, 16 )
622 AS2( movzx edi, ah )
623 AS2( movdqa xmm3, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
624 AS2( movzx edi, al )
625 AS2( movdqa xmm2, XMMWORD_PTR [MUL_TABLE_1 + WORD_REG(di)] )
626
627 #define SSE2_MUL_32BITS(i) \
628 AS2( psrldq xmm0, 4 )\
629 AS2( movd eax, xmm0 )\
630 AS2( and eax, AS_HEX(f0f0f0f0) )\
631 AS2( movzx edi, bh )\
632 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
633 AS2( movzx edi, bl )\
634 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
635 AS2( shr ebx, 16 )\
636 AS2( movzx edi, bh )\
637 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
638 AS2( movzx edi, bl )\
639 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + (i-1)*256 + WORD_REG(di)] )\
640 AS2( movd ebx, xmm0 )\
641 AS2( shl ebx, 4 )\
642 AS2( and ebx, AS_HEX(f0f0f0f0) )\
643 AS2( movzx edi, ah )\
644 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
645 AS2( movzx edi, al )\
646 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
647 AS2( shr eax, 16 )\
648 AS2( movzx edi, ah )\
649 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
650 AS2( movzx edi, al )\
651 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_1 + i*256 + WORD_REG(di)] )\
652
653 SSE2_MUL_32BITS(1)
654 SSE2_MUL_32BITS(2)
655 SSE2_MUL_32BITS(3)
656
657 AS2( movzx edi, bh )
658 AS2( pxor xmm5, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
659 AS2( movzx edi, bl )
660 AS2( pxor xmm4, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
661 AS2( shr ebx, 16 )
662 AS2( movzx edi, bh )
663 AS2( pxor xmm3, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
664 AS2( movzx edi, bl )
665 AS2( pxor xmm2, XMMWORD_PTR [MUL_TABLE_0 + 3*256 + WORD_REG(di)] )
666
667 AS2( movdqa xmm0, xmm3 )
668 AS2( pslldq xmm3, 1 )
669 AS2( pxor xmm2, xmm3 )
670 AS2( movdqa xmm1, xmm2 )
671 AS2( pslldq xmm2, 1 )
672 AS2( pxor xmm5, xmm2 )
673
674 AS2( psrldq xmm0, 15 )
675#if USE_MOVD_REG32
676 AS2( movd edi, xmm0 )
677#elif USE_MOV_REG32_OR_REG64
678 AS2( mov WORD_REG(di), xmm0 )
679#else // GNU Assembler
680 AS2( movd WORD_REG(di), xmm0 )
681#endif
682 AS2( movzx eax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
683 AS2( shl eax, 8 )
684
685 AS2( movdqa xmm0, xmm5 )
686 AS2( pslldq xmm5, 1 )
687 AS2( pxor xmm4, xmm5 )
688
689 AS2( psrldq xmm1, 15 )
690#if USE_MOVD_REG32
691 AS2( movd edi, xmm1 )
692#elif USE_MOV_REG32_OR_REG64
693 AS2( mov WORD_REG(di), xmm1 )
694#else
695 AS2( movd WORD_REG(di), xmm1 )
696#endif
697 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
698 AS2( shl eax, 8 )
699
700 AS2( psrldq xmm0, 15 )
701#if USE_MOVD_REG32
702 AS2( movd edi, xmm0 )
703#elif USE_MOV_REG32_OR_REG64
704 AS2( mov WORD_REG(di), xmm0 )
705#else
706 AS2( movd WORD_REG(di), xmm0 )
707#endif
708 AS2( xor ax, WORD PTR [RED_TABLE + WORD_REG(di)*2] )
709
710 AS2( movd xmm0, eax )
711 AS2( pxor xmm0, xmm4 )
712
713 AS2( add WORD_REG(cx), 16 )
714 AS2( sub WORD_REG(dx), 1 )
715 // ATT_NOPREFIX
716 ASJ( jnz, 0, b )
717 INTEL_NOPREFIX
718 AS2( movdqa [WORD_REG(si)], xmm0 )
719
721 AS1(pop rbp)
722 AS1(pop rbx)
723 #else
724 AS_POP_IF86( bp)
725 AS_POP_IF86( bx)
726 #endif
727
728 #ifdef __GNUC__
729 ATT_PREFIX
730 :
731 : "c" (data), "d" (len/16), "S" (hashBuffer), "D" (s_reductionTable)
732 : "memory", "cc", "%eax"
734 , "%ebx", "%r11"
735 #endif
736 );
737 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
738 pop rbx
739 pop rdi
740 pop rsi
741 ret
742 GCM_AuthenticateBlocks_2K_SSE2 ENDP
743 #endif
744
745 return len%16;
746 }
747 case 3: // SSE2 and 64K tables
748 {
749 #ifdef __GNUC__
750 __asm__ __volatile__
751 (
752 INTEL_NOPREFIX
753 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
754 ALIGN 8
755 GCM_AuthenticateBlocks_64K_SSE2 PROC FRAME
756 rex_push_reg rsi
757 push_reg rdi
758 .endprolog
759 mov rsi, r8
760 #else
761 AS2( mov WORD_REG(cx), data )
762 AS2( mov WORD_REG(dx), len )
763 AS2( mov WORD_REG(si), hashBuffer )
764 AS2( shr WORD_REG(dx), 4 )
765 #endif
766
767 AS2( movdqa xmm0, [WORD_REG(si)] )
768
769 #undef MUL_TABLE
770 #define MUL_TABLE(i,j) WORD_REG(si) + 32 + (i*4+j)*256*16
771
772 ASL(1)
773 AS2( movdqu xmm1, [WORD_REG(cx)] )
774 AS2( pxor xmm1, xmm0 )
775 AS2( pxor xmm0, xmm0 )
776
777 #undef SSE2_MUL_32BITS
778 #define SSE2_MUL_32BITS(i) \
779 AS2( movd eax, xmm1 )\
780 AS2( psrldq xmm1, 4 )\
781 AS2( movzx edi, al )\
782 AS2( add WORD_REG(di), WORD_REG(di) )\
783 AS2( pxor xmm0, [MUL_TABLE(i,0) + WORD_REG(di)*8] )\
784 AS2( movzx edi, ah )\
785 AS2( add WORD_REG(di), WORD_REG(di) )\
786 AS2( pxor xmm0, [MUL_TABLE(i,1) + WORD_REG(di)*8] )\
787 AS2( shr eax, 16 )\
788 AS2( movzx edi, al )\
789 AS2( add WORD_REG(di), WORD_REG(di) )\
790 AS2( pxor xmm0, [MUL_TABLE(i,2) + WORD_REG(di)*8] )\
791 AS2( movzx edi, ah )\
792 AS2( add WORD_REG(di), WORD_REG(di) )\
793 AS2( pxor xmm0, [MUL_TABLE(i,3) + WORD_REG(di)*8] )\
794
795 SSE2_MUL_32BITS(0)
796 SSE2_MUL_32BITS(1)
797 SSE2_MUL_32BITS(2)
798 SSE2_MUL_32BITS(3)
799
800 AS2( add WORD_REG(cx), 16 )
801 AS2( sub WORD_REG(dx), 1 )
802 // ATT_NOPREFIX
803 ASJ( jnz, 1, b )
804 INTEL_NOPREFIX
805 AS2( movdqa [WORD_REG(si)], xmm0 )
806
807 #ifdef __GNUC__
808 ATT_PREFIX
809 :
810 : "c" (data), "d" (len/16), "S" (hashBuffer)
811 : "memory", "cc", "%edi", "%eax"
812 );
813 #elif defined(CRYPTOPP_GENERATE_X64_MASM)
814 pop rdi
815 pop rsi
816 ret
817 GCM_AuthenticateBlocks_64K_SSE2 ENDP
818 #endif
819
820 return len%16;
821 }
822#endif
823#ifndef CRYPTOPP_GENERATE_X64_MASM
824 }
825
826 return len%16;
827}
828
829void GCM_Base::AuthenticateLastHeaderBlock()
830{
831 if (m_bufferedDataLength > 0)
832 {
833 memset(m_buffer+m_bufferedDataLength, 0, HASH_BLOCKSIZE-m_bufferedDataLength);
834 m_bufferedDataLength = 0;
835 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
836 }
837}
838
839void GCM_Base::AuthenticateLastConfidentialBlock()
840{
841 GCM_Base::AuthenticateLastHeaderBlock();
842 PutBlock<word64, BigEndian, true>(NULLPTR, m_buffer)(m_totalHeaderLength*8)(m_totalMessageLength*8);
843 GCM_Base::AuthenticateBlocks(m_buffer, HASH_BLOCKSIZE);
844}
845
846void GCM_Base::AuthenticateLastFooterBlock(byte *mac, size_t macSize)
847{
848 m_ctr.Seek(0);
849 ReverseHashBufferIfNeeded();
850 m_ctr.ProcessData(mac, HashBuffer(), macSize);
851}
852
853NAMESPACE_END
854
855#endif // Not CRYPTOPP_GENERATE_X64_MASM
856#endif
Interface for one direction (encryption or decryption) of a block cipher.
Definition: cryptlib.h:1283
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:879
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
virtual unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
unsigned int OptimalDataAlignment() const
Provides input and output data alignment for optimal performance.
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: gcm.h:36
An invalid argument was detected.
Definition: cryptlib.h:203
Interface for retrieving values given their names.
Definition: cryptlib.h:322
CRYPTOPP_DLL bool GetIntValue(const char *name, int &value) const
Get a named value with type int.
Definition: cryptlib.h:415
Access a block of memory.
Definition: misc.h:2550
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Library configuration file.
#define CRYPTOPP_BOOL_X86
32-bit x86 platform
Definition: config_cpu.h:52
#define CRYPTOPP_BOOL_X32
32-bit x32 platform
Definition: config_cpu.h:44
#define CRYPTOPP_BOOL_X64
32-bit x86 platform
Definition: config_cpu.h:48
#define W64LIT(x)
Declare an unsigned word64.
Definition: config_int.h:119
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned short word16
16-bit unsigned datatype
Definition: config_int.h:59
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
Functions for CPU features and intrinsics.
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
GCM block cipher mode of operation.
@ GCM_64K_Tables
Use a table with 64K entries.
Definition: gcm.h:27
byte ByteReverse(byte value)
Reverses bytes in a 8-bit value.
Definition: misc.h:2001
void IncrementCounterByOne(byte *inout, unsigned int size)
Performs an addition with carry on a block of bytes.
Definition: misc.h:1278
bool IsAlignedOn(const void *ptr, unsigned int alignment)
Determines whether ptr is aligned to a minimum value.
Definition: misc.h:1206
T ConditionalByteReverse(ByteOrder order, T value)
Reverses bytes in a value depending upon endianness.
Definition: misc.h:2167
Crypto++ library namespace.
const char * TableSize()
int, in bytes
Definition: argnames.h:81
const char * BlockSize()
int, in bytes
Definition: argnames.h:27
Precompiled header file.
Access a block of memory.
Definition: misc.h:2587
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68