Crypto++ 8.5
Free C++ class library of cryptographic schemes
xts.cpp
1// xts.cpp - written and placed in the public domain by Jeffrey Walton
2
3// Aarch32, Aarch64, Altivec and X86_64 include SIMD as part of the
4// base architecture. We can use the SIMD code below without an
5// architecture option. No runtime tests are required. Unfortunately,
6// we can't use it on Altivec because an architecture switch is required.
7// The updated XorBuffer gains 0.3 to 1.5 cpb on the architectures for
8// 16-byte block sizes.
9
10#include "pch.h"
11
12#include "xts.h"
13#include "misc.h"
14#include "modes.h"
15#include "cpu.h"
16
17#if defined(CRYPTOPP_DEBUG)
18# include "aes.h"
19# include "threefish.h"
20#endif
21
22// 0.3 to 0.4 cpb profit
23#if defined(__SSE2__) || defined(_M_X64)
24# include <emmintrin.h>
25// Clang intrinsic casts
26# define M128_CAST(x) ((__m128i *)(void *)(x))
27# define CONST_M128_CAST(x) ((const __m128i *)(const void *)(x))
28#endif
29
30
31#if defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
32# if (CRYPTOPP_ARM_NEON_HEADER)
33# include <arm_neon.h>
34# endif
35#endif
36
37#if defined(__ALTIVEC__)
38# include "ppc_simd.h"
39#endif
40
41ANONYMOUS_NAMESPACE_BEGIN
42
43using namespace CryptoPP;
44
45#if defined(CRYPTOPP_DEBUG) && !defined(CRYPTOPP_DOXYGEN_PROCESSING)
46
47using CryptoPP::AES;
48using CryptoPP::XTS_Mode;
49using CryptoPP::Threefish512;
50
51void Modes_TestInstantiations()
52{
53 XTS_Mode<AES>::Encryption m0;
54 XTS_Mode<AES>::Decryption m1;
55 XTS_Mode<AES>::Encryption m2;
56 XTS_Mode<AES>::Decryption m3;
57
58#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
59 XTS_Mode<Threefish512>::Encryption m4;
60 XTS_Mode<Threefish512>::Decryption m5;
61#endif
62}
63#endif // CRYPTOPP_DEBUG
64
65inline void XorBuffer(byte *output, const byte *input, const byte *mask, size_t count)
66{
67 CRYPTOPP_ASSERT(count >= 16 && (count % 16 == 0));
68
69#if defined(CRYPTOPP_DISABLE_ASM)
70 xorbuf(output, input, mask, count);
71
72#elif defined(__SSE2__) || defined(_M_X64)
73 for (size_t i=0; i<count; i+=16)
74 _mm_storeu_si128(M128_CAST(output+i),
75 _mm_xor_si128(
76 _mm_loadu_si128(CONST_M128_CAST(input+i)),
77 _mm_loadu_si128(CONST_M128_CAST(mask+i))));
78
79#elif defined(__aarch32__) || defined(__aarch64__) || defined(_M_ARM64)
80 for (size_t i=0; i<count; i+=16)
81 vst1q_u8(output+i, veorq_u8(vld1q_u8(input+i), vld1q_u8(mask+i)));
82
83#elif defined(__ALTIVEC__)
84 for (size_t i=0; i<count; i+=16)
85 VecStore(VecXor(VecLoad(input+i), VecLoad(mask+i)), output+i);
86
87#else
88 xorbuf(output, input, mask, count);
89#endif
90}
91
92inline void XorBuffer(byte *buf, const byte *mask, size_t count)
93{
94 XorBuffer(buf, buf, mask, count);
95}
96
97// Borrowed from CMAC, but little-endian representation
98inline void GF_Double(byte *out, const byte* in, unsigned int len)
99{
100#if defined(_M_X64) || defined(_M_ARM64) || defined(_LP64) || defined(__LP64__)
101 word64 carry = 0, x;
102 for (size_t i=0, idx=0; i<len/8; ++i, idx+=8)
103 {
104 x = GetWord<word64>(false, LITTLE_ENDIAN_ORDER, in+idx);
105 word64 y = (x >> 63); x = (x << 1) + carry;
106 PutWord<word64>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
107 carry = y;
108 }
109#else
110 word32 carry = 0, x;
111 for (size_t i=0, idx=0; i<len/4; ++i, idx+=4)
112 {
113 x = GetWord<word32>(false, LITTLE_ENDIAN_ORDER, in+idx);
114 word32 y = (x >> 31); x = (x << 1) + carry;
115 PutWord<word32>(false, LITTLE_ENDIAN_ORDER, out+idx, x);
116 carry = y;
117 }
118#endif
119
120#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
121
123 CRYPTOPP_ASSERT(len >= 16);
124 CRYPTOPP_ASSERT(len <= 128);
125
126 byte* k = out;
127 if (carry)
128 {
129 switch (len)
130 {
131 case 16:
132 {
133 const size_t LEIDX = 16-1;
134 k[LEIDX-15] ^= 0x87;
135 break;
136 }
137 case 32:
138 {
139 // https://crypto.stackexchange.com/q/9815/10496
140 // Polynomial x^256 + x^10 + x^5 + x^2 + 1
141 const size_t LEIDX = 32-1;
142 k[LEIDX-30] ^= 4;
143 k[LEIDX-31] ^= 0x25;
144 break;
145 }
146 case 64:
147 {
148 // https://crypto.stackexchange.com/q/9815/10496
149 // Polynomial x^512 + x^8 + x^5 + x^2 + 1
150 const size_t LEIDX = 64-1;
151 k[LEIDX-62] ^= 1;
152 k[LEIDX-63] ^= 0x25;
153 break;
154 }
155 case 128:
156 {
157 // https://crypto.stackexchange.com/q/9815/10496
158 // Polynomial x^1024 + x^19 + x^6 + x + 1
159 const size_t LEIDX = 128-1;
160 k[LEIDX-125] ^= 8;
161 k[LEIDX-126] ^= 0x00;
162 k[LEIDX-127] ^= 0x43;
163 break;
164 }
165 default:
167 }
168 }
169#else
170 CRYPTOPP_ASSERT(len == 16);
171
172 byte* k = out;
173 if (carry)
174 {
175 k[0] ^= 0x87;
176 return;
177 }
178#endif // CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
179}
180
181inline void GF_Double(byte *inout, unsigned int len)
182{
183 GF_Double(inout, inout, len);
184}
185
186ANONYMOUS_NAMESPACE_END
187
188NAMESPACE_BEGIN(CryptoPP)
189
190void XTS_ModeBase::ThrowIfInvalidBlockSize(size_t length)
191{
192#if CRYPTOPP_XTS_WIDE_BLOCK_CIPHERS
193 CRYPTOPP_ASSERT(length >= 16 && length <= 128 && IsPowerOf2(length));
194 if (length < 16 || length > 128 || !IsPowerOf2(length))
195 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not valid");
196#else
197 CRYPTOPP_ASSERT(length == 16);
198 if (length != 16)
199 throw InvalidArgument(AlgorithmName() + ": block size of underlying block cipher is not 16");
200#endif
201}
202
204{
205 CRYPTOPP_ASSERT(length % 2 == 0);
206 if (!GetBlockCipher().IsValidKeyLength((length+1)/2))
207 throw InvalidKeyLength(AlgorithmName(), length);
208}
209
210void XTS_ModeBase::SetKey(const byte *key, size_t length, const NameValuePairs &params)
211{
214
215 const size_t klen = length/2;
216 AccessBlockCipher().SetKey(key+0, klen, params);
217 AccessTweakCipher().SetKey(key+klen, klen, params);
218
219 ResizeBuffers();
220
221 size_t ivLength;
222 const byte *iv = GetIVAndThrowIfInvalid(params, ivLength);
223 Resynchronize(iv, (int)ivLength);
224}
225
226void XTS_ModeBase::Resynchronize(const byte *iv, int ivLength)
227{
229 std::memcpy(m_xregister, m_register, ivLength);
230 GetTweakCipher().ProcessBlock(m_xregister);
231}
232
234{
235 SecByteBlock iv(GetTweakCipher().BlockSize());
236 PutWord<word64>(false, order, iv, sector);
237 std::memset(iv+8, 0x00, iv.size()-8);
238
240 std::memcpy(m_xregister, iv, iv.size());
241 GetTweakCipher().ProcessBlock(m_xregister);
242}
243
244void XTS_ModeBase::ResizeBuffers()
245{
246 BlockOrientedCipherModeBase::ResizeBuffers();
247 m_xworkspace.New(GetBlockCipher().BlockSize()*ParallelBlocks);
248 m_xregister.New(GetBlockCipher().BlockSize()*ParallelBlocks);
249}
250
251// ProcessData runs either 12-4-1 blocks, 8-2-1 or 4-1 blocks. Which is
252// selected depends on ParallelBlocks in the header file. 12-4-1 or 8-2-1
253// can be used on Aarch64 and PowerPC. Intel should use 4-1 due to lack
254// of registers. The unneeded code paths should be removed by optimizer.
255// The extra gyrations save us 1.8 cpb on Aarch64 and 2.1 cpb on PowerPC.
256void XTS_ModeBase::ProcessData(byte *outString, const byte *inString, size_t length)
257{
258 // data unit is multiple of 16 bytes
259 CRYPTOPP_ASSERT(length % BlockSize() == 0);
260
261 enum { lastParallelBlock = ParallelBlocks-1 };
262 const unsigned int blockSize = GetBlockCipher().BlockSize();
263 const size_t parallelSize = blockSize*ParallelBlocks;
264
265 // encrypt the data unit, optimal size at a time
266 while (length >= parallelSize)
267 {
268 // m_xregister[0] always points to the next tweak.
269 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
270 GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
271 GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
272
273 if (ParallelBlocks > 4)
274 {
275 GF_Double(m_xregister+4*blockSize, m_xregister+3*blockSize, blockSize);
276 GF_Double(m_xregister+5*blockSize, m_xregister+4*blockSize, blockSize);
277 GF_Double(m_xregister+6*blockSize, m_xregister+5*blockSize, blockSize);
278 GF_Double(m_xregister+7*blockSize, m_xregister+6*blockSize, blockSize);
279 }
280 if (ParallelBlocks > 8)
281 {
282 GF_Double(m_xregister+8*blockSize, m_xregister+7*blockSize, blockSize);
283 GF_Double(m_xregister+9*blockSize, m_xregister+8*blockSize, blockSize);
284 GF_Double(m_xregister+10*blockSize, m_xregister+9*blockSize, blockSize);
285 GF_Double(m_xregister+11*blockSize, m_xregister+10*blockSize, blockSize);
286 }
287
288 // merge the tweak into the input block
289 XorBuffer(m_xworkspace, inString, m_xregister, parallelSize);
290
291 // encrypt one block, merge the tweak into the output block
292 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
293 outString, parallelSize, BlockTransformation::BT_AllowParallel);
294
295 // m_xregister[0] always points to the next tweak.
296 GF_Double(m_xregister+0, m_xregister+lastParallelBlock*blockSize, blockSize);
297
298 inString += parallelSize;
299 outString += parallelSize;
300 length -= parallelSize;
301 }
302
303 // encrypt the data unit, 4 blocks at a time
304 while (ParallelBlocks == 12 && length >= blockSize*4)
305 {
306 // m_xregister[0] always points to the next tweak.
307 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
308 GF_Double(m_xregister+2*blockSize, m_xregister+1*blockSize, blockSize);
309 GF_Double(m_xregister+3*blockSize, m_xregister+2*blockSize, blockSize);
310
311 // merge the tweak into the input block
312 XorBuffer(m_xworkspace, inString, m_xregister, blockSize*4);
313
314 // encrypt one block, merge the tweak into the output block
315 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
316 outString, blockSize*4, BlockTransformation::BT_AllowParallel);
317
318 // m_xregister[0] always points to the next tweak.
319 GF_Double(m_xregister+0, m_xregister+3*blockSize, blockSize);
320
321 inString += blockSize*4;
322 outString += blockSize*4;
323 length -= blockSize*4;
324 }
325
326 // encrypt the data unit, 2 blocks at a time
327 while (ParallelBlocks == 8 && length >= blockSize*2)
328 {
329 // m_xregister[0] always points to the next tweak.
330 GF_Double(m_xregister+1*blockSize, m_xregister+0*blockSize, blockSize);
331
332 // merge the tweak into the input block
333 XorBuffer(m_xworkspace, inString, m_xregister, blockSize*2);
334
335 // encrypt one block, merge the tweak into the output block
336 GetBlockCipher().AdvancedProcessBlocks(m_xworkspace, m_xregister,
337 outString, blockSize*2, BlockTransformation::BT_AllowParallel);
338
339 // m_xregister[0] always points to the next tweak.
340 GF_Double(m_xregister+0, m_xregister+1*blockSize, blockSize);
341
342 inString += blockSize*2;
343 outString += blockSize*2;
344 length -= blockSize*2;
345 }
346
347 // encrypt the data unit, blocksize at a time
348 while (length)
349 {
350 // merge the tweak into the input block
351 XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
352
353 // encrypt one block
354 GetBlockCipher().ProcessBlock(m_xworkspace);
355
356 // merge the tweak into the output block
357 XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
358
359 // Multiply T by alpha
360 GF_Double(m_xregister, blockSize);
361
362 inString += blockSize;
363 outString += blockSize;
364 length -= blockSize;
365 }
366}
367
368size_t XTS_ModeBase::ProcessLastBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
369{
370 // need at least a full AES block
371 CRYPTOPP_ASSERT(inLength >= BlockSize());
372
373 if (inLength < BlockSize())
374 throw InvalidArgument("XTS: message is too short for ciphertext stealing");
375
377 return ProcessLastPlainBlock(outString, outLength, inString, inLength);
378 else
379 return ProcessLastCipherBlock(outString, outLength, inString, inLength);
380}
381
382size_t XTS_ModeBase::ProcessLastPlainBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
383{
384 // ensure output buffer is large enough
385 CRYPTOPP_ASSERT(outLength >= inLength);
386
387 const unsigned int blockSize = GetBlockCipher().BlockSize();
388 const size_t blocks = inLength / blockSize;
389 const size_t tail = inLength % blockSize;
390 outLength = inLength;
391
392 if (tail == 0)
393 {
394 // Allow ProcessData to handle all the full blocks
395 ProcessData(outString, inString, inLength);
396 return inLength;
397 }
398 else if (blocks > 1)
399 {
400 // Allow ProcessData to handle full blocks except one
401 const size_t head = (blocks-1)*blockSize;
402 ProcessData(outString, inString, inLength-head);
403
404 outString += head;
405 inString += head; inLength -= head;
406 }
407
408 ///// handle the full block /////
409
410 // merge the tweak into the input block
411 XorBuffer(m_xworkspace, inString, m_xregister, blockSize);
412
413 // encrypt one block
414 GetBlockCipher().ProcessBlock(m_xworkspace);
415
416 // merge the tweak into the output block
417 XorBuffer(outString, m_xworkspace, m_xregister, blockSize);
418
419 // Multiply T by alpha
420 GF_Double(m_xregister, blockSize);
421
422 ///// handle final partial block /////
423
424 inString += blockSize;
425 outString += blockSize;
426 const size_t len = inLength-blockSize;
427
428 // copy in the final plaintext bytes
429 std::memcpy(m_xworkspace, inString, len);
430 // and copy out the final ciphertext bytes
431 std::memcpy(outString, outString-blockSize, len);
432 // "steal" ciphertext to complete the block
433 std::memcpy(m_xworkspace+len, outString-blockSize+len, blockSize-len);
434
435 // merge the tweak into the input block
436 XorBuffer(m_xworkspace, m_xregister, blockSize);
437
438 // encrypt one block
439 GetBlockCipher().ProcessBlock(m_xworkspace);
440
441 // merge the tweak into the previous output block
442 XorBuffer(outString-blockSize, m_xworkspace, m_xregister, blockSize);
443
444 return outLength;
445}
446
447size_t XTS_ModeBase::ProcessLastCipherBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
448{
449 // ensure output buffer is large enough
450 CRYPTOPP_ASSERT(outLength >= inLength);
451
452 const unsigned int blockSize = GetBlockCipher().BlockSize();
453 const size_t blocks = inLength / blockSize;
454 const size_t tail = inLength % blockSize;
455 outLength = inLength;
456
457 if (tail == 0)
458 {
459 // Allow ProcessData to handle all the full blocks
460 ProcessData(outString, inString, inLength);
461 return inLength;
462 }
463 else if (blocks > 1)
464 {
465 // Allow ProcessData to handle full blocks except one
466 const size_t head = (blocks-1)*blockSize;
467 ProcessData(outString, inString, inLength-head);
468
469 outString += head;
470 inString += head; inLength -= head;
471 }
472
473 #define poly1 (m_xregister+0*blockSize)
474 #define poly2 (m_xregister+1*blockSize)
475 GF_Double(poly2, poly1, blockSize);
476
477 ///// handle final partial block /////
478
479 inString += blockSize;
480 outString += blockSize;
481 const size_t len = inLength-blockSize;
482
483 // merge the tweak into the input block
484 XorBuffer(m_xworkspace, inString-blockSize, poly2, blockSize);
485
486 // encrypt one block
487 GetBlockCipher().ProcessBlock(m_xworkspace);
488
489 // merge the tweak into the output block
490 XorBuffer(m_xworkspace, poly2, blockSize);
491
492 // copy in the final plaintext bytes
493 std::memcpy(outString-blockSize, inString, len);
494 // and copy out the final ciphertext bytes
495 std::memcpy(outString, m_xworkspace, len);
496 // "steal" ciphertext to complete the block
497 std::memcpy(outString-blockSize+len, m_xworkspace+len, blockSize-len);
498
499 ///// handle the full previous block /////
500
501 inString -= blockSize;
502 outString -= blockSize;
503
504 // merge the tweak into the input block
505 XorBuffer(m_xworkspace, outString, poly1, blockSize);
506
507 // encrypt one block
508 GetBlockCipher().ProcessBlock(m_xworkspace);
509
510 // merge the tweak into the output block
511 XorBuffer(outString, m_xworkspace, poly1, blockSize);
512
513 return outLength;
514}
515
516NAMESPACE_END
#define M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:609
#define CONST_M128_CAST(x)
Clang workaround.
Definition: adv_simd.h:614
Class file for the AES cipher (Rijndael)
bool IsForwardTransformation() const
Determines if the cipher is being operated in its forward direction.
Definition: modes.h:258
void Resynchronize(const byte *iv, int length=-1)
Resynchronize with an IV.
Definition: modes.h:260
void ProcessBlock(const byte *inBlock, byte *outBlock) const
Encrypt or decrypt a block.
Definition: cryptlib.h:879
virtual size_t AdvancedProcessBlocks(const byte *inBlocks, const byte *xorBlocks, byte *outBlocks, size_t length, word32 flags) const
Encrypt and xor multiple blocks using additional flags.
@ BT_AllowParallel
Allow parallel transformations.
Definition: cryptlib.h:925
virtual unsigned int BlockSize() const =0
Provides the block size of the cipher.
An invalid argument was detected.
Definition: cryptlib.h:203
Exception thrown when an invalid key length is encountered.
Definition: simple.h:56
Interface for retrieving values given their names.
Definition: cryptlib.h:322
void New(size_type newSize)
Change size without preserving contents.
Definition: secblock.h:1022
size_type size() const
Provides the count of elements in the SecBlock.
Definition: secblock.h:849
SecBlock<byte> typedef.
Definition: secblock.h:1115
virtual void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
XTS block cipher mode of operation default implementation.
Definition: xts.h:50
bool IsValidKeyLength(size_t keylength) const
Returns whether keylength is a valid key length.
Definition: xts.h:74
void SetKey(const byte *key, size_t length, const NameValuePairs &params=g_nullNameValuePairs)
Sets or reset the key of this object.
Definition: xts.cpp:210
void ProcessData(byte *outString, const byte *inString, size_t length)
Encrypt or decrypt an array of bytes.
Definition: xts.cpp:256
void Resynchronize(const byte *iv, int ivLength=-1)
Resynchronize with an IV.
Definition: xts.cpp:226
size_t ProcessLastBlock(byte *outString, size_t outLength, const byte *inString, size_t inLength)
Encrypt or decrypt the last block of data.
Definition: xts.cpp:368
unsigned int BlockSize() const
Provides the block size of the cipher.
Definition: xts.h:84
void ThrowIfInvalidKeyLength(size_t length)
Validates the key length.
Definition: xts.cpp:203
void ThrowIfInvalidBlockSize(size_t length)
Validates the block size.
Definition: xts.cpp:190
std::string AlgorithmName() const
Provides the name of this algorithm.
Definition: xts.h:61
unsigned int word32
32-bit unsigned datatype
Definition: config_int.h:62
unsigned long long word64
64-bit unsigned datatype
Definition: config_int.h:91
Functions for CPU features and intrinsics.
ByteOrder
Provides the byte ordering.
Definition: cryptlib.h:143
@ LITTLE_ENDIAN_ORDER
byte order is little-endian
Definition: cryptlib.h:145
Utility functions for the Crypto++ library.
bool IsPowerOf2(const T &value)
Tests whether a value is a power of 2.
Definition: misc.h:990
CRYPTOPP_DLL void xorbuf(byte *buf, const byte *mask, size_t count)
Performs an XOR of a buffer with a mask.
Classes for block cipher modes of operation.
Crypto++ library namespace.
Precompiled header file.
Support functions for PowerPC and vector operations.
T1 VecXor(const T1 vec1, const T2 vec2)
XOR two vectors.
Definition: ppc_simd.h:1414
void VecStore(const T data, byte dest[16])
Stores a vector to a byte array.
Definition: ppc_simd.h:895
uint32x4_p VecLoad(const byte src[16])
Loads a vector from a byte array.
Definition: ppc_simd.h:369
Classes for the Threefish block cipher.
#define CRYPTOPP_ASSERT(exp)
Debugging and diagnostic assertion.
Definition: trap.h:68
Classes for XTS block cipher mode of operation.