00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017
00018 #include <iostream>
00019
00020 #ifdef SSE2_INTRINSICS_AVAILABLE
00021 #include <emmintrin.h>
00022 #endif
00023
00024 NAMESPACE_BEGIN(CryptoPP)
00025
00026 #ifdef SSE2_INTRINSICS_AVAILABLE
00027 template <class T>
00028 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00029 {
00030 if (n < 4)
00031 return new T[n];
00032 else
00033 return (T *)_mm_malloc(sizeof(T)*n, 16);
00034
00035 }
00036
00037 template <class T>
00038 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00039 {
00040 memset(p, 0, n*sizeof(T));
00041 if (n < 4)
00042 delete [] p;
00043 else
00044 _mm_free(p);
00045 }
00046 #endif
00047
00048 #define MAKE_DWORD(lowWord, highWord) ((dword(highWord)<<WORD_BITS) | (lowWord))
00049
00050 static int Compare(const word *A, const word *B, unsigned int N)
00051 {
00052 while (N--)
00053 if (A[N] > B[N])
00054 return 1;
00055 else if (A[N] < B[N])
00056 return -1;
00057
00058 return 0;
00059 }
00060
00061 static word Increment(word *A, unsigned int N, word B=1)
00062 {
00063 assert(N);
00064 word t = A[0];
00065 A[0] = t+B;
00066 if (A[0] >= t)
00067 return 0;
00068 for (unsigned i=1; i<N; i++)
00069 if (++A[i])
00070 return 0;
00071 return 1;
00072 }
00073
00074 static word Decrement(word *A, unsigned int N, word B=1)
00075 {
00076 assert(N);
00077 word t = A[0];
00078 A[0] = t-B;
00079 if (A[0] <= t)
00080 return 0;
00081 for (unsigned i=1; i<N; i++)
00082 if (A[i]--)
00083 return 0;
00084 return 1;
00085 }
00086
00087 static void TwosComplement(word *A, unsigned int N)
00088 {
00089 Decrement(A, N);
00090 for (unsigned i=0; i<N; i++)
00091 A[i] = ~A[i];
00092 }
00093
00094 static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
00095 {
00096 word carry=0;
00097 for(unsigned i=0; i<N; i++)
00098 {
00099 dword p = (dword)A[i] * B + carry;
00100 C[i] = LOW_WORD(p);
00101 carry = HIGH_WORD(p);
00102 }
00103 return carry;
00104 }
00105
00106 static void AtomicInverseModPower2(word *C, word A0, word A1)
00107 {
00108 assert(A0%2==1);
00109
00110 dword A=MAKE_DWORD(A0, A1), R=A0%8;
00111
00112 for (unsigned i=3; i<2*WORD_BITS; i*=2)
00113 R = R*(2-R*A);
00114
00115 assert(R*A==1);
00116
00117 C[0] = LOW_WORD(R);
00118 C[1] = HIGH_WORD(R);
00119 }
00120
00121
00122
00123 class Portable
00124 {
00125 public:
00126 static word Add(word *C, const word *A, const word *B, unsigned int N);
00127 static word Subtract(word *C, const word *A, const word *B, unsigned int N);
00128
00129 static inline void Multiply2(word *C, const word *A, const word *B);
00130 static inline word Multiply2Add(word *C, const word *A, const word *B);
00131 static void Multiply4(word *C, const word *A, const word *B);
00132 static void Multiply8(word *C, const word *A, const word *B);
00133 static inline unsigned int MultiplyRecursionLimit() {return 8;}
00134
00135 static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00136 static void Multiply4Bottom(word *C, const word *A, const word *B);
00137 static void Multiply8Bottom(word *C, const word *A, const word *B);
00138 static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00139
00140 static void Square2(word *R, const word *A);
00141 static void Square4(word *R, const word *A);
00142 static void Square8(word *R, const word *A) {assert(false);}
00143 static inline unsigned int SquareRecursionLimit() {return 4;}
00144 };
00145
00146 word Portable::Add(word *C, const word *A, const word *B, unsigned int N)
00147 {
00148 assert (N%2 == 0);
00149
00150 #ifdef IS_LITTLE_ENDIAN
00151 if (sizeof(dword) == sizeof(size_t))
00152 {
00153 dword carry = 0;
00154 N >>= 1;
00155 for (unsigned int i = 0; i < N; i++)
00156 {
00157 dword a = ((const dword *)A)[i] + carry;
00158 dword c = a + ((const dword *)B)[i];
00159 ((dword *)C)[i] = c;
00160 carry = (a < carry) | (c < a);
00161 }
00162 return (word)carry;
00163 }
00164 else
00165 #endif
00166 {
00167 word carry = 0;
00168 for (unsigned int i = 0; i < N; i+=2)
00169 {
00170 dword u = (dword) carry + A[i] + B[i];
00171 C[i] = LOW_WORD(u);
00172 u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1];
00173 C[i+1] = LOW_WORD(u);
00174 carry = HIGH_WORD(u);
00175 }
00176 return carry;
00177 }
00178 }
00179
00180 word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N)
00181 {
00182 assert (N%2 == 0);
00183
00184 #ifdef IS_LITTLE_ENDIAN
00185 if (sizeof(dword) == sizeof(size_t))
00186 {
00187 dword borrow = 0;
00188 N >>= 1;
00189 for (unsigned int i = 0; i < N; i++)
00190 {
00191 dword a = ((const dword *)A)[i];
00192 dword b = a - borrow;
00193 dword c = b - ((const dword *)B)[i];
00194 ((dword *)C)[i] = c;
00195 borrow = (b > a) | (c > b);
00196 }
00197 return (word)borrow;
00198 }
00199 else
00200 #endif
00201 {
00202 word borrow=0;
00203 for (unsigned i = 0; i < N; i+=2)
00204 {
00205 dword u = (dword) A[i] - B[i] - borrow;
00206 C[i] = LOW_WORD(u);
00207 u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u));
00208 C[i+1] = LOW_WORD(u);
00209 borrow = 0-HIGH_WORD(u);
00210 }
00211 return borrow;
00212 }
00213 }
00214
00215 void Portable::Multiply2(word *C, const word *A, const word *B)
00216 {
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00246 unsigned int ai = A[1] < A[0];
00247 unsigned int bi = B[0] < B[1];
00248 unsigned int di = ai & bi;
00249 dword d = (dword)D[di]*D[di+2];
00250 D[1] = D[3] = 0;
00251 unsigned int si = ai + !bi;
00252 word s = D[si];
00253
00254 dword A0B0 = (dword)A[0]*B[0];
00255 C[0] = LOW_WORD(A0B0);
00256
00257 dword A1B1 = (dword)A[1]*B[1];
00258 dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1);
00259 C[1] = LOW_WORD(t);
00260
00261 t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s;
00262 C[2] = LOW_WORD(t);
00263 C[3] = HIGH_WORD(t);
00264 }
00265
00266 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00267 {
00268 #ifdef IS_LITTLE_ENDIAN
00269 if (sizeof(dword) == sizeof(size_t))
00270 {
00271 dword a = *(const dword *)A, b = *(const dword *)B;
00272 ((dword *)C)[0] = a*b;
00273 }
00274 else
00275 #endif
00276 {
00277 dword t = (dword)A[0]*B[0];
00278 C[0] = LOW_WORD(t);
00279 C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0];
00280 }
00281 }
00282
00283 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00284 {
00285 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00286 unsigned int ai = A[1] < A[0];
00287 unsigned int bi = B[0] < B[1];
00288 unsigned int di = ai & bi;
00289 dword d = (dword)D[di]*D[di+2];
00290 D[1] = D[3] = 0;
00291 unsigned int si = ai + !bi;
00292 word s = D[si];
00293
00294 dword A0B0 = (dword)A[0]*B[0];
00295 dword t = A0B0 + C[0];
00296 C[0] = LOW_WORD(t);
00297
00298 dword A1B1 = (dword)A[1]*B[1];
00299 t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1];
00300 C[1] = LOW_WORD(t);
00301
00302 t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2];
00303 C[2] = LOW_WORD(t);
00304
00305 t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3];
00306 C[3] = LOW_WORD(t);
00307 return HIGH_WORD(t);
00308 }
00309
00310 #define MulAcc(x, y) \
00311 p = (dword)A[x] * B[y] + c; \
00312 c = LOW_WORD(p); \
00313 p = (dword)d + HIGH_WORD(p); \
00314 d = LOW_WORD(p); \
00315 e += HIGH_WORD(p);
00316
00317 #define SaveMulAcc(s, x, y) \
00318 R[s] = c; \
00319 p = (dword)A[x] * B[y] + d; \
00320 c = LOW_WORD(p); \
00321 p = (dword)e + HIGH_WORD(p); \
00322 d = LOW_WORD(p); \
00323 e = HIGH_WORD(p);
00324
00325 #define SquAcc(x, y) \
00326 q = (dword)A[x] * A[y]; \
00327 p = q + c; \
00328 c = LOW_WORD(p); \
00329 p = (dword)d + HIGH_WORD(p); \
00330 d = LOW_WORD(p); \
00331 e += HIGH_WORD(p); \
00332 p = q + c; \
00333 c = LOW_WORD(p); \
00334 p = (dword)d + HIGH_WORD(p); \
00335 d = LOW_WORD(p); \
00336 e += HIGH_WORD(p);
00337
00338 #define SaveSquAcc(s, x, y) \
00339 R[s] = c; \
00340 q = (dword)A[x] * A[y]; \
00341 p = q + d; \
00342 c = LOW_WORD(p); \
00343 p = (dword)e + HIGH_WORD(p); \
00344 d = LOW_WORD(p); \
00345 e = HIGH_WORD(p); \
00346 p = q + c; \
00347 c = LOW_WORD(p); \
00348 p = (dword)d + HIGH_WORD(p); \
00349 d = LOW_WORD(p); \
00350 e += HIGH_WORD(p);
00351
00352 void Portable::Multiply4(word *R, const word *A, const word *B)
00353 {
00354 dword p;
00355 word c, d, e;
00356
00357 p = (dword)A[0] * B[0];
00358 R[0] = LOW_WORD(p);
00359 c = HIGH_WORD(p);
00360 d = e = 0;
00361
00362 MulAcc(0, 1);
00363 MulAcc(1, 0);
00364
00365 SaveMulAcc(1, 2, 0);
00366 MulAcc(1, 1);
00367 MulAcc(0, 2);
00368
00369 SaveMulAcc(2, 0, 3);
00370 MulAcc(1, 2);
00371 MulAcc(2, 1);
00372 MulAcc(3, 0);
00373
00374 SaveMulAcc(3, 3, 1);
00375 MulAcc(2, 2);
00376 MulAcc(1, 3);
00377
00378 SaveMulAcc(4, 2, 3);
00379 MulAcc(3, 2);
00380
00381 R[5] = c;
00382 p = (dword)A[3] * B[3] + d;
00383 R[6] = LOW_WORD(p);
00384 R[7] = e + HIGH_WORD(p);
00385 }
00386
00387 void Portable::Square2(word *R, const word *A)
00388 {
00389 dword p, q;
00390 word c, d, e;
00391
00392 p = (dword)A[0] * A[0];
00393 R[0] = LOW_WORD(p);
00394 c = HIGH_WORD(p);
00395 d = e = 0;
00396
00397 SquAcc(0, 1);
00398
00399 R[1] = c;
00400 p = (dword)A[1] * A[1] + d;
00401 R[2] = LOW_WORD(p);
00402 R[3] = e + HIGH_WORD(p);
00403 }
00404
00405 void Portable::Square4(word *R, const word *A)
00406 {
00407 const word *B = A;
00408 dword p, q;
00409 word c, d, e;
00410
00411 p = (dword)A[0] * A[0];
00412 R[0] = LOW_WORD(p);
00413 c = HIGH_WORD(p);
00414 d = e = 0;
00415
00416 SquAcc(0, 1);
00417
00418 SaveSquAcc(1, 2, 0);
00419 MulAcc(1, 1);
00420
00421 SaveSquAcc(2, 0, 3);
00422 SquAcc(1, 2);
00423
00424 SaveSquAcc(3, 3, 1);
00425 MulAcc(2, 2);
00426
00427 SaveSquAcc(4, 2, 3);
00428
00429 R[5] = c;
00430 p = (dword)A[3] * A[3] + d;
00431 R[6] = LOW_WORD(p);
00432 R[7] = e + HIGH_WORD(p);
00433 }
00434
00435 void Portable::Multiply8(word *R, const word *A, const word *B)
00436 {
00437 dword p;
00438 word c, d, e;
00439
00440 p = (dword)A[0] * B[0];
00441 R[0] = LOW_WORD(p);
00442 c = HIGH_WORD(p);
00443 d = e = 0;
00444
00445 MulAcc(0, 1);
00446 MulAcc(1, 0);
00447
00448 SaveMulAcc(1, 2, 0);
00449 MulAcc(1, 1);
00450 MulAcc(0, 2);
00451
00452 SaveMulAcc(2, 0, 3);
00453 MulAcc(1, 2);
00454 MulAcc(2, 1);
00455 MulAcc(3, 0);
00456
00457 SaveMulAcc(3, 0, 4);
00458 MulAcc(1, 3);
00459 MulAcc(2, 2);
00460 MulAcc(3, 1);
00461 MulAcc(4, 0);
00462
00463 SaveMulAcc(4, 0, 5);
00464 MulAcc(1, 4);
00465 MulAcc(2, 3);
00466 MulAcc(3, 2);
00467 MulAcc(4, 1);
00468 MulAcc(5, 0);
00469
00470 SaveMulAcc(5, 0, 6);
00471 MulAcc(1, 5);
00472 MulAcc(2, 4);
00473 MulAcc(3, 3);
00474 MulAcc(4, 2);
00475 MulAcc(5, 1);
00476 MulAcc(6, 0);
00477
00478 SaveMulAcc(6, 0, 7);
00479 MulAcc(1, 6);
00480 MulAcc(2, 5);
00481 MulAcc(3, 4);
00482 MulAcc(4, 3);
00483 MulAcc(5, 2);
00484 MulAcc(6, 1);
00485 MulAcc(7, 0);
00486
00487 SaveMulAcc(7, 1, 7);
00488 MulAcc(2, 6);
00489 MulAcc(3, 5);
00490 MulAcc(4, 4);
00491 MulAcc(5, 3);
00492 MulAcc(6, 2);
00493 MulAcc(7, 1);
00494
00495 SaveMulAcc(8, 2, 7);
00496 MulAcc(3, 6);
00497 MulAcc(4, 5);
00498 MulAcc(5, 4);
00499 MulAcc(6, 3);
00500 MulAcc(7, 2);
00501
00502 SaveMulAcc(9, 3, 7);
00503 MulAcc(4, 6);
00504 MulAcc(5, 5);
00505 MulAcc(6, 4);
00506 MulAcc(7, 3);
00507
00508 SaveMulAcc(10, 4, 7);
00509 MulAcc(5, 6);
00510 MulAcc(6, 5);
00511 MulAcc(7, 4);
00512
00513 SaveMulAcc(11, 5, 7);
00514 MulAcc(6, 6);
00515 MulAcc(7, 5);
00516
00517 SaveMulAcc(12, 6, 7);
00518 MulAcc(7, 6);
00519
00520 R[13] = c;
00521 p = (dword)A[7] * B[7] + d;
00522 R[14] = LOW_WORD(p);
00523 R[15] = e + HIGH_WORD(p);
00524 }
00525
00526 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00527 {
00528 dword p;
00529 word c, d, e;
00530
00531 p = (dword)A[0] * B[0];
00532 R[0] = LOW_WORD(p);
00533 c = HIGH_WORD(p);
00534 d = e = 0;
00535
00536 MulAcc(0, 1);
00537 MulAcc(1, 0);
00538
00539 SaveMulAcc(1, 2, 0);
00540 MulAcc(1, 1);
00541 MulAcc(0, 2);
00542
00543 R[2] = c;
00544 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00545 }
00546
00547 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00548 {
00549 dword p;
00550 word c, d, e;
00551
00552 p = (dword)A[0] * B[0];
00553 R[0] = LOW_WORD(p);
00554 c = HIGH_WORD(p);
00555 d = e = 0;
00556
00557 MulAcc(0, 1);
00558 MulAcc(1, 0);
00559
00560 SaveMulAcc(1, 2, 0);
00561 MulAcc(1, 1);
00562 MulAcc(0, 2);
00563
00564 SaveMulAcc(2, 0, 3);
00565 MulAcc(1, 2);
00566 MulAcc(2, 1);
00567 MulAcc(3, 0);
00568
00569 SaveMulAcc(3, 0, 4);
00570 MulAcc(1, 3);
00571 MulAcc(2, 2);
00572 MulAcc(3, 1);
00573 MulAcc(4, 0);
00574
00575 SaveMulAcc(4, 0, 5);
00576 MulAcc(1, 4);
00577 MulAcc(2, 3);
00578 MulAcc(3, 2);
00579 MulAcc(4, 1);
00580 MulAcc(5, 0);
00581
00582 SaveMulAcc(5, 0, 6);
00583 MulAcc(1, 5);
00584 MulAcc(2, 4);
00585 MulAcc(3, 3);
00586 MulAcc(4, 2);
00587 MulAcc(5, 1);
00588 MulAcc(6, 0);
00589
00590 R[6] = c;
00591 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00592 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00593 }
00594
00595 #undef MulAcc
00596 #undef SaveMulAcc
00597 #undef SquAcc
00598 #undef SaveSquAcc
00599
00600
00601 #if defined(_MSC_VER) && !defined(__MWERKS__) && defined(_M_IX86) && (_M_IX86<=700)
00602
00603 class PentiumOptimized : public Portable
00604 {
00605 public:
00606 static word __fastcall Add(word *C, const word *A, const word *B, unsigned int N);
00607 static word __fastcall Subtract(word *C, const word *A, const word *B, unsigned int N);
00608 static inline void Square4(word *R, const word *A)
00609 {
00610
00611
00612
00613
00614 Multiply4(R, A, A);
00615 }
00616 };
00617
00618 typedef PentiumOptimized LowLevel;
00619
00620 __declspec(naked) word __fastcall PentiumOptimized::Add(word *C, const word *A, const word *B, unsigned int N)
00621 {
00622 __asm
00623 {
00624 push ebp
00625 push ebx
00626 push esi
00627 push edi
00628
00629 mov esi, [esp+24] ; N
00630 mov ebx, [esp+20] ; B
00631
00632
00633
00634 sub ecx, edx
00635 xor eax, eax
00636
00637 sub eax, esi
00638 lea ebx, [ebx+4*esi]
00639
00640 sar eax, 1
00641 jz loopend
00642
00643 loopstart:
00644 mov esi,[edx]
00645 mov ebp,[edx+4]
00646
00647 mov edi,[ebx+8*eax]
00648 lea edx,[edx+8]
00649
00650 adc esi,edi
00651 mov edi,[ebx+8*eax+4]
00652
00653 adc ebp,edi
00654 inc eax
00655
00656 mov [edx+ecx-8],esi
00657 mov [edx+ecx-4],ebp
00658
00659 jnz loopstart
00660
00661 loopend:
00662 adc eax, 0
00663 pop edi
00664 pop esi
00665 pop ebx
00666 pop ebp
00667 ret 8
00668 }
00669 }
00670
00671 __declspec(naked) word __fastcall PentiumOptimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
00672 {
00673 __asm
00674 {
00675 push ebp
00676 push ebx
00677 push esi
00678 push edi
00679
00680 mov esi, [esp+24] ; N
00681 mov ebx, [esp+20] ; B
00682
00683 sub ecx, edx
00684 xor eax, eax
00685
00686 sub eax, esi
00687 lea ebx, [ebx+4*esi]
00688
00689 sar eax, 1
00690 jz loopend
00691
00692 loopstart:
00693 mov esi,[edx]
00694 mov ebp,[edx+4]
00695
00696 mov edi,[ebx+8*eax]
00697 lea edx,[edx+8]
00698
00699 sbb esi,edi
00700 mov edi,[ebx+8*eax+4]
00701
00702 sbb ebp,edi
00703 inc eax
00704
00705 mov [edx+ecx-8],esi
00706 mov [edx+ecx-4],ebp
00707
00708 jnz loopstart
00709
00710 loopend:
00711 adc eax, 0
00712 pop edi
00713 pop esi
00714 pop ebx
00715 pop ebp
00716 ret 8
00717 }
00718 }
00719
00720 #ifdef SSE2_INTRINSICS_AVAILABLE
00721
00722 static bool GetSSE2Capability()
00723 {
00724 word32 b;
00725
00726 __asm
00727 {
00728 mov eax, 1
00729 cpuid
00730 mov b, edx
00731 }
00732
00733 return (b & (1 << 26)) != 0;
00734 }
00735
00736 bool g_sse2DetectionDone = false, g_sse2Detected, g_sse2Enabled = true;
00737
00738 void DisableSSE2()
00739 {
00740 g_sse2Enabled = false;
00741 }
00742
00743 static inline bool HasSSE2()
00744 {
00745 if (g_sse2Enabled && !g_sse2DetectionDone)
00746 {
00747 g_sse2Detected = GetSSE2Capability();
00748 g_sse2DetectionDone = true;
00749 }
00750 return g_sse2Enabled && g_sse2Detected;
00751 }
00752
00753 class P4Optimized : public PentiumOptimized
00754 {
00755 public:
00756 static word __fastcall Add(word *C, const word *A, const word *B, unsigned int N);
00757 static word __fastcall Subtract(word *C, const word *A, const word *B, unsigned int N);
00758 static void Multiply4(word *C, const word *A, const word *B);
00759 static void Multiply8(word *C, const word *A, const word *B);
00760 static inline void Square4(word *R, const word *A)
00761 {
00762 Multiply4(R, A, A);
00763 }
00764 static void Multiply8Bottom(word *C, const word *A, const word *B);
00765 };
00766
00767 static void __fastcall P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
00768 {
00769 __m128i a3210 = _mm_load_si128(A);
00770 __m128i b3210 = _mm_load_si128(B);
00771
00772 __m128i sum;
00773
00774 __m128i z = _mm_setzero_si128();
00775 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
00776 C[0] = a2b2_a0b0;
00777
00778 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
00779 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
00780 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
00781 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
00782 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
00783 C[1] = _mm_add_epi64(a1b0, a0b1);
00784
00785 __m128i a31 = _mm_srli_epi64(a3210, 32);
00786 __m128i b31 = _mm_srli_epi64(b3210, 32);
00787 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
00788 C[6] = a3b3_a1b1;
00789
00790 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
00791 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
00792 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
00793 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
00794 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
00795 sum = _mm_add_epi64(a1b1, a0b2);
00796 C[2] = _mm_add_epi64(sum, a2b0);
00797
00798 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
00799 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
00800 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
00801 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
00802 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
00803 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
00804 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
00805 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
00806 __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
00807 sum = _mm_add_epi64(a2b1, a0b3);
00808 C[3] = _mm_add_epi64(sum, sum1);
00809
00810 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
00811 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
00812 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
00813 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
00814 sum = _mm_add_epi64(a2b2, a3b1);
00815 C[4] = _mm_add_epi64(sum, a1b3);
00816
00817 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
00818 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
00819 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
00820 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
00821 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
00822 C[5] = _mm_add_epi64(a3b2, a2b3);
00823 }
00824
00825 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
00826 {
00827 __m128i temp[7];
00828 const word *w = (word *)temp;
00829 const __m64 *mw = (__m64 *)w;
00830
00831 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
00832
00833 C[0] = w[0];
00834
00835 __m64 s1, s2;
00836
00837 __m64 w1 = _m_from_int(w[1]);
00838 __m64 w4 = mw[2];
00839 __m64 w6 = mw[3];
00840 __m64 w8 = mw[4];
00841 __m64 w10 = mw[5];
00842 __m64 w12 = mw[6];
00843 __m64 w14 = mw[7];
00844 __m64 w16 = mw[8];
00845 __m64 w18 = mw[9];
00846 __m64 w20 = mw[10];
00847 __m64 w22 = mw[11];
00848 __m64 w26 = _m_from_int(w[26]);
00849
00850 s1 = _mm_add_si64(w1, w4);
00851 C[1] = _m_to_int(s1);
00852 s1 = _m_psrlqi(s1, 32);
00853
00854 s2 = _mm_add_si64(w6, w8);
00855 s1 = _mm_add_si64(s1, s2);
00856 C[2] = _m_to_int(s1);
00857 s1 = _m_psrlqi(s1, 32);
00858
00859 s2 = _mm_add_si64(w10, w12);
00860 s1 = _mm_add_si64(s1, s2);
00861 C[3] = _m_to_int(s1);
00862 s1 = _m_psrlqi(s1, 32);
00863
00864 s2 = _mm_add_si64(w14, w16);
00865 s1 = _mm_add_si64(s1, s2);
00866 C[4] = _m_to_int(s1);
00867 s1 = _m_psrlqi(s1, 32);
00868
00869 s2 = _mm_add_si64(w18, w20);
00870 s1 = _mm_add_si64(s1, s2);
00871 C[5] = _m_to_int(s1);
00872 s1 = _m_psrlqi(s1, 32);
00873
00874 s2 = _mm_add_si64(w22, w26);
00875 s1 = _mm_add_si64(s1, s2);
00876 C[6] = _m_to_int(s1);
00877 s1 = _m_psrlqi(s1, 32);
00878
00879 C[7] = _m_to_int(s1) + w[27];
00880 _mm_empty();
00881 }
00882
00883 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
00884 {
00885 __m128i temp[28];
00886 const word *w = (word *)temp;
00887 const __m64 *mw = (__m64 *)w;
00888 const word *x = (word *)temp+7*4;
00889 const __m64 *mx = (__m64 *)x;
00890 const word *y = (word *)temp+7*4*2;
00891 const __m64 *my = (__m64 *)y;
00892 const word *z = (word *)temp+7*4*3;
00893 const __m64 *mz = (__m64 *)z;
00894
00895 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
00896
00897 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
00898
00899 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
00900
00901 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
00902
00903 C[0] = w[0];
00904
00905 __m64 s1, s2, s3, s4;
00906
00907 __m64 w1 = _m_from_int(w[1]);
00908 __m64 w4 = mw[2];
00909 __m64 w6 = mw[3];
00910 __m64 w8 = mw[4];
00911 __m64 w10 = mw[5];
00912 __m64 w12 = mw[6];
00913 __m64 w14 = mw[7];
00914 __m64 w16 = mw[8];
00915 __m64 w18 = mw[9];
00916 __m64 w20 = mw[10];
00917 __m64 w22 = mw[11];
00918 __m64 w26 = _m_from_int(w[26]);
00919 __m64 w27 = _m_from_int(w[27]);
00920
00921 __m64 x0 = _m_from_int(x[0]);
00922 __m64 x1 = _m_from_int(x[1]);
00923 __m64 x4 = mx[2];
00924 __m64 x6 = mx[3];
00925 __m64 x8 = mx[4];
00926 __m64 x10 = mx[5];
00927 __m64 x12 = mx[6];
00928 __m64 x14 = mx[7];
00929 __m64 x16 = mx[8];
00930 __m64 x18 = mx[9];
00931 __m64 x20 = mx[10];
00932 __m64 x22 = mx[11];
00933 __m64 x26 = _m_from_int(x[26]);
00934 __m64 x27 = _m_from_int(x[27]);
00935
00936 __m64 y0 = _m_from_int(y[0]);
00937 __m64 y1 = _m_from_int(y[1]);
00938 __m64 y4 = my[2];
00939 __m64 y6 = my[3];
00940 __m64 y8 = my[4];
00941 __m64 y10 = my[5];
00942 __m64 y12 = my[6];
00943 __m64 y14 = my[7];
00944 __m64 y16 = my[8];
00945 __m64 y18 = my[9];
00946 __m64 y20 = my[10];
00947 __m64 y22 = my[11];
00948 __m64 y26 = _m_from_int(y[26]);
00949 __m64 y27 = _m_from_int(y[27]);
00950
00951 __m64 z0 = _m_from_int(z[0]);
00952 __m64 z1 = _m_from_int(z[1]);
00953 __m64 z4 = mz[2];
00954 __m64 z6 = mz[3];
00955 __m64 z8 = mz[4];
00956 __m64 z10 = mz[5];
00957 __m64 z12 = mz[6];
00958 __m64 z14 = mz[7];
00959 __m64 z16 = mz[8];
00960 __m64 z18 = mz[9];
00961 __m64 z20 = mz[10];
00962 __m64 z22 = mz[11];
00963 __m64 z26 = _m_from_int(z[26]);
00964
00965 s1 = _mm_add_si64(w1, w4);
00966 C[1] = _m_to_int(s1);
00967 s1 = _m_psrlqi(s1, 32);
00968
00969 s2 = _mm_add_si64(w6, w8);
00970 s1 = _mm_add_si64(s1, s2);
00971 C[2] = _m_to_int(s1);
00972 s1 = _m_psrlqi(s1, 32);
00973
00974 s2 = _mm_add_si64(w10, w12);
00975 s1 = _mm_add_si64(s1, s2);
00976 C[3] = _m_to_int(s1);
00977 s1 = _m_psrlqi(s1, 32);
00978
00979 s3 = _mm_add_si64(x0, y0);
00980 s2 = _mm_add_si64(w14, w16);
00981 s1 = _mm_add_si64(s1, s3);
00982 s1 = _mm_add_si64(s1, s2);
00983 C[4] = _m_to_int(s1);
00984 s1 = _m_psrlqi(s1, 32);
00985
00986 s3 = _mm_add_si64(x1, y1);
00987 s4 = _mm_add_si64(x4, y4);
00988 s1 = _mm_add_si64(s1, w18);
00989 s3 = _mm_add_si64(s3, s4);
00990 s1 = _mm_add_si64(s1, w20);
00991 s1 = _mm_add_si64(s1, s3);
00992 C[5] = _m_to_int(s1);
00993 s1 = _m_psrlqi(s1, 32);
00994
00995 s3 = _mm_add_si64(x6, y6);
00996 s4 = _mm_add_si64(x8, y8);
00997 s1 = _mm_add_si64(s1, w22);
00998 s3 = _mm_add_si64(s3, s4);
00999 s1 = _mm_add_si64(s1, w26);
01000 s1 = _mm_add_si64(s1, s3);
01001 C[6] = _m_to_int(s1);
01002 s1 = _m_psrlqi(s1, 32);
01003
01004 s3 = _mm_add_si64(x10, y10);
01005 s4 = _mm_add_si64(x12, y12);
01006 s1 = _mm_add_si64(s1, w27);
01007 s3 = _mm_add_si64(s3, s4);
01008 s1 = _mm_add_si64(s1, s3);
01009 C[7] = _m_to_int(s1);
01010 s1 = _m_psrlqi(s1, 32);
01011
01012 s3 = _mm_add_si64(x14, y14);
01013 s4 = _mm_add_si64(x16, y16);
01014 s1 = _mm_add_si64(s1, z0);
01015 s3 = _mm_add_si64(s3, s4);
01016 s1 = _mm_add_si64(s1, s3);
01017 C[8] = _m_to_int(s1);
01018 s1 = _m_psrlqi(s1, 32);
01019
01020 s3 = _mm_add_si64(x18, y18);
01021 s4 = _mm_add_si64(x20, y20);
01022 s1 = _mm_add_si64(s1, z1);
01023 s3 = _mm_add_si64(s3, s4);
01024 s1 = _mm_add_si64(s1, z4);
01025 s1 = _mm_add_si64(s1, s3);
01026 C[9] = _m_to_int(s1);
01027 s1 = _m_psrlqi(s1, 32);
01028
01029 s3 = _mm_add_si64(x22, y22);
01030 s4 = _mm_add_si64(x26, y26);
01031 s1 = _mm_add_si64(s1, z6);
01032 s3 = _mm_add_si64(s3, s4);
01033 s1 = _mm_add_si64(s1, z8);
01034 s1 = _mm_add_si64(s1, s3);
01035 C[10] = _m_to_int(s1);
01036 s1 = _m_psrlqi(s1, 32);
01037
01038 s3 = _mm_add_si64(x27, y27);
01039 s1 = _mm_add_si64(s1, z10);
01040 s1 = _mm_add_si64(s1, z12);
01041 s1 = _mm_add_si64(s1, s3);
01042 C[11] = _m_to_int(s1);
01043 s1 = _m_psrlqi(s1, 32);
01044
01045 s3 = _mm_add_si64(z14, z16);
01046 s1 = _mm_add_si64(s1, s3);
01047 C[12] = _m_to_int(s1);
01048 s1 = _m_psrlqi(s1, 32);
01049
01050 s3 = _mm_add_si64(z18, z20);
01051 s1 = _mm_add_si64(s1, s3);
01052 C[13] = _m_to_int(s1);
01053 s1 = _m_psrlqi(s1, 32);
01054
01055 s3 = _mm_add_si64(z22, z26);
01056 s1 = _mm_add_si64(s1, s3);
01057 C[14] = _m_to_int(s1);
01058 s1 = _m_psrlqi(s1, 32);
01059
01060 C[15] = z[27] + _m_to_int(s1);
01061 _mm_empty();
01062 }
01063
01064 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01065 {
01066 __m128i temp[21];
01067 const word *w = (word *)temp;
01068 const __m64 *mw = (__m64 *)w;
01069 const word *x = (word *)temp+7*4;
01070 const __m64 *mx = (__m64 *)x;
01071 const word *y = (word *)temp+7*4*2;
01072 const __m64 *my = (__m64 *)y;
01073
01074 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01075
01076 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01077
01078 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01079
01080 C[0] = w[0];
01081
01082 __m64 s1, s2, s3, s4;
01083
01084 __m64 w1 = _m_from_int(w[1]);
01085 __m64 w4 = mw[2];
01086 __m64 w6 = mw[3];
01087 __m64 w8 = mw[4];
01088 __m64 w10 = mw[5];
01089 __m64 w12 = mw[6];
01090 __m64 w14 = mw[7];
01091 __m64 w16 = mw[8];
01092 __m64 w18 = mw[9];
01093 __m64 w20 = mw[10];
01094 __m64 w22 = mw[11];
01095 __m64 w26 = _m_from_int(w[26]);
01096
01097 __m64 x0 = _m_from_int(x[0]);
01098 __m64 x1 = _m_from_int(x[1]);
01099 __m64 x4 = mx[2];
01100 __m64 x6 = mx[3];
01101 __m64 x8 = mx[4];
01102
01103 __m64 y0 = _m_from_int(y[0]);
01104 __m64 y1 = _m_from_int(y[1]);
01105 __m64 y4 = my[2];
01106 __m64 y6 = my[3];
01107 __m64 y8 = my[4];
01108
01109 s1 = _mm_add_si64(w1, w4);
01110 C[1] = _m_to_int(s1);
01111 s1 = _m_psrlqi(s1, 32);
01112
01113 s2 = _mm_add_si64(w6, w8);
01114 s1 = _mm_add_si64(s1, s2);
01115 C[2] = _m_to_int(s1);
01116 s1 = _m_psrlqi(s1, 32);
01117
01118 s2 = _mm_add_si64(w10, w12);
01119 s1 = _mm_add_si64(s1, s2);
01120 C[3] = _m_to_int(s1);
01121 s1 = _m_psrlqi(s1, 32);
01122
01123 s3 = _mm_add_si64(x0, y0);
01124 s2 = _mm_add_si64(w14, w16);
01125 s1 = _mm_add_si64(s1, s3);
01126 s1 = _mm_add_si64(s1, s2);
01127 C[4] = _m_to_int(s1);
01128 s1 = _m_psrlqi(s1, 32);
01129
01130 s3 = _mm_add_si64(x1, y1);
01131 s4 = _mm_add_si64(x4, y4);
01132 s1 = _mm_add_si64(s1, w18);
01133 s3 = _mm_add_si64(s3, s4);
01134 s1 = _mm_add_si64(s1, w20);
01135 s1 = _mm_add_si64(s1, s3);
01136 C[5] = _m_to_int(s1);
01137 s1 = _m_psrlqi(s1, 32);
01138
01139 s3 = _mm_add_si64(x6, y6);
01140 s4 = _mm_add_si64(x8, y8);
01141 s1 = _mm_add_si64(s1, w22);
01142 s3 = _mm_add_si64(s3, s4);
01143 s1 = _mm_add_si64(s1, w26);
01144 s1 = _mm_add_si64(s1, s3);
01145 C[6] = _m_to_int(s1);
01146 s1 = _m_psrlqi(s1, 32);
01147
01148 C[7] = _m_to_int(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01149 _mm_empty();
01150 }
01151
01152 __declspec(naked) word __fastcall P4Optimized::Add(word *C, const word *A, const word *B, unsigned int N)
01153 {
01154 __asm
01155 {
01156 sub esp, 16
01157 xor eax, eax
01158 mov [esp], edi
01159 mov [esp+4], esi
01160 mov [esp+8], ebx
01161 mov [esp+12], ebp
01162
01163 mov ebx, [esp+20]
01164 mov esi, [esp+24]
01165
01166
01167
01168 neg esi
01169 jz loopend
01170
01171 mov edi, [edx]
01172 mov ebp, [ebx]
01173
01174 loopstart:
01175 add edi, eax
01176 jc carry1
01177
01178 xor eax, eax
01179
01180 carry1continue:
01181 add edi, ebp
01182 mov ebp, 1
01183 mov [ecx], edi
01184 mov edi, [edx+4]
01185 cmovc eax, ebp
01186 mov ebp, [ebx+4]
01187 lea ebx, [ebx+8]
01188 add edi, eax
01189 jc carry2
01190
01191 xor eax, eax
01192
01193 carry2continue:
01194 add edi, ebp
01195 mov ebp, 1
01196 cmovc eax, ebp
01197 mov [ecx+4], edi
01198 add ecx, 8
01199 mov edi, [edx+8]
01200 add edx, 8
01201 add esi, 2
01202 mov ebp, [ebx]
01203 jnz loopstart
01204
01205 loopend:
01206 mov edi, [esp]
01207 mov esi, [esp+4]
01208 mov ebx, [esp+8]
01209 mov ebp, [esp+12]
01210 add esp, 16
01211 ret 8
01212
01213 carry1:
01214 mov eax, 1
01215 jmp carry1continue
01216
01217 carry2:
01218 mov eax, 1
01219 jmp carry2continue
01220 }
01221 }
01222
01223 __declspec(naked) word __fastcall P4Optimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01224 {
01225 __asm
01226 {
01227 sub esp, 16
01228 xor eax, eax
01229 mov [esp], edi
01230 mov [esp+4], esi
01231 mov [esp+8], ebx
01232 mov [esp+12], ebp
01233
01234 mov ebx, [esp+20]
01235 mov esi, [esp+24]
01236
01237
01238
01239 neg esi
01240 jz loopend
01241
01242 mov edi, [edx]
01243 mov ebp, [ebx]
01244
01245 loopstart:
01246 sub edi, eax
01247 jc carry1
01248
01249 xor eax, eax
01250
01251 carry1continue:
01252 sub edi, ebp
01253 mov ebp, 1
01254 mov [ecx], edi
01255 mov edi, [edx+4]
01256 cmovc eax, ebp
01257 mov ebp, [ebx+4]
01258 lea ebx, [ebx+8]
01259 sub edi, eax
01260 jc carry2
01261
01262 xor eax, eax
01263
01264 carry2continue:
01265 sub edi, ebp
01266 mov ebp, 1
01267 cmovc eax, ebp
01268 mov [ecx+4], edi
01269 add ecx, 8
01270 mov edi, [edx+8]
01271 add edx, 8
01272 add esi, 2
01273 mov ebp, [ebx]
01274 jnz loopstart
01275
01276 loopend:
01277 mov edi, [esp]
01278 mov esi, [esp+4]
01279 mov ebx, [esp+8]
01280 mov ebp, [esp+12]
01281 add esp, 16
01282 ret 8
01283
01284 carry1:
01285 mov eax, 1
01286 jmp carry1continue
01287
01288 carry2:
01289 mov eax, 1
01290 jmp carry2continue
01291 }
01292 }
01293
01294 #endif // #ifdef SSE2_INTRINSICS_AVAILABLE
01295
01296 #elif defined(__GNUC__) && defined(__i386__)
01297
01298 class PentiumOptimized : public Portable
01299 {
01300 public:
01301 static word Add(word *C, const word *A, const word *B, unsigned int N);
01302 static word Subtract(word *C, const word *A, const word *B, unsigned int N);
01303 static void Square4(word *R, const word *A);
01304 static void Multiply4(word *C, const word *A, const word *B);
01305 static void Multiply8(word *C, const word *A, const word *B);
01306 };
01307
01308 typedef PentiumOptimized LowLevel;
01309
01310
01311
01312 __attribute__((regparm(3))) word PentiumOptimized::Add(word *C, const word *A, const word *B, unsigned int N)
01313 {
01314 assert (N%2 == 0);
01315
01316 register word carry, temp;
01317
01318 __asm__ __volatile__(
01319 "push %%ebp;"
01320 "sub %3, %2;"
01321 "xor %0, %0;"
01322 "sub %4, %0;"
01323 "lea (%1,%4,4), %1;"
01324 "sar $1, %0;"
01325 "jz 1f;"
01326
01327 "0:;"
01328 "mov 0(%3), %4;"
01329 "mov 4(%3), %%ebp;"
01330 "mov (%1,%0,8), %5;"
01331 "lea 8(%3), %3;"
01332 "adc %5, %4;"
01333 "mov 4(%1,%0,8), %5;"
01334 "adc %5, %%ebp;"
01335 "inc %0;"
01336 "mov %4, -8(%3, %2);"
01337 "mov %%ebp, -4(%3, %2);"
01338 "jnz 0b;"
01339
01340 "1:;"
01341 "adc $0, %0;"
01342 "pop %%ebp;"
01343
01344 : "=aSD" (carry), "+r" (B), "+r" (C), "+r" (A), "+r" (N), "=r" (temp)
01345 : : "cc", "memory");
01346
01347 return carry;
01348 }
01349
01350 __attribute__((regparm(3))) word PentiumOptimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01351 {
01352 assert (N%2 == 0);
01353
01354 register word carry, temp;
01355
01356 __asm__ __volatile__(
01357 "push %%ebp;"
01358 "sub %3, %2;"
01359 "xor %0, %0;"
01360 "sub %4, %0;"
01361 "lea (%1,%4,4), %1;"
01362 "sar $1, %0;"
01363 "jz 1f;"
01364
01365 "0:;"
01366 "mov 0(%3), %4;"
01367 "mov 4(%3), %%ebp;"
01368 "mov (%1,%0,8), %5;"
01369 "lea 8(%3), %3;"
01370 "sbb %5, %4;"
01371 "mov 4(%1,%0,8), %5;"
01372 "sbb %5, %%ebp;"
01373 "inc %0;"
01374 "mov %4, -8(%3, %2);"
01375 "mov %%ebp, -4(%3, %2);"
01376 "jnz 0b;"
01377
01378 "1:;"
01379 "adc $0, %0;"
01380 "pop %%ebp;"
01381
01382 : "=aSD" (carry), "+r" (B), "+r" (C), "+r" (A), "+r" (N), "=r" (temp)
01383 : : "cc", "memory");
01384
01385 return carry;
01386 }
01387
01388
01389
01390 #define SqrStartup \
01391 "push %%ebp\n\t" \
01392 "push %%esi\n\t" \
01393 "push %%ebx\n\t" \
01394 "xor %%ebp, %%ebp\n\t" \
01395 "xor %%ebx, %%ebx\n\t" \
01396 "xor %%ecx, %%ecx\n\t"
01397
01398 #define SqrShiftCarry \
01399 "mov %%ebx, %%ebp\n\t" \
01400 "mov %%ecx, %%ebx\n\t" \
01401 "xor %%ecx, %%ecx\n\t"
01402
01403 #define SqrAccumulate(i,j) \
01404 "mov 4*"#j"(%%esi), %%eax\n\t" \
01405 "mull 4*"#i"(%%esi)\n\t" \
01406 "add %%eax, %%ebp\n\t" \
01407 "adc %%edx, %%ebx\n\t" \
01408 "adc %%ch, %%cl\n\t" \
01409 "add %%eax, %%ebp\n\t" \
01410 "adc %%edx, %%ebx\n\t" \
01411 "adc %%ch, %%cl\n\t"
01412
01413 #define SqrAccumulateCentre(i) \
01414 "mov 4*"#i"(%%esi), %%eax\n\t" \
01415 "mull 4*"#i"(%%esi)\n\t" \
01416 "add %%eax, %%ebp\n\t" \
01417 "adc %%edx, %%ebx\n\t" \
01418 "adc %%ch, %%cl\n\t"
01419
01420 #define SqrStoreDigit(X) \
01421 "mov %%ebp, 4*"#X"(%%edi)\n\t" \
01422
01423 #define SqrLastDiagonal(digits) \
01424 "mov 4*("#digits"-1)(%%esi), %%eax\n\t" \
01425 "mull 4*("#digits"-1)(%%esi)\n\t" \
01426 "add %%eax, %%ebp\n\t" \
01427 "adc %%edx, %%ebx\n\t" \
01428 "mov %%ebp, 4*(2*"#digits"-2)(%%edi)\n\t" \
01429 "mov %%ebx, 4*(2*"#digits"-1)(%%edi)\n\t"
01430
01431 #define SqrCleanup \
01432 "pop %%ebx\n\t" \
01433 "pop %%esi\n\t" \
01434 "pop %%ebp\n\t"
01435
01436 void PentiumOptimized::Square4(word* Y, const word* X)
01437 {
01438 __asm__ __volatile__(
01439 SqrStartup
01440
01441 SqrAccumulateCentre(0)
01442 SqrStoreDigit(0)
01443 SqrShiftCarry
01444
01445 SqrAccumulate(1,0)
01446 SqrStoreDigit(1)
01447 SqrShiftCarry
01448
01449 SqrAccumulate(2,0)
01450 SqrAccumulateCentre(1)
01451 SqrStoreDigit(2)
01452 SqrShiftCarry
01453
01454 SqrAccumulate(3,0)
01455 SqrAccumulate(2,1)
01456 SqrStoreDigit(3)
01457 SqrShiftCarry
01458
01459 SqrAccumulate(3,1)
01460 SqrAccumulateCentre(2)
01461 SqrStoreDigit(4)
01462 SqrShiftCarry
01463
01464 SqrAccumulate(3,2)
01465 SqrStoreDigit(5)
01466 SqrShiftCarry
01467
01468 SqrLastDiagonal(4)
01469
01470 SqrCleanup
01471
01472 :
01473 : "D" (Y), "S" (X)
01474 : "eax", "ecx", "edx", "ebp", "memory"
01475 );
01476 }
01477
01478 #define MulStartup \
01479 "push %%ebp\n\t" \
01480 "push %%esi\n\t" \
01481 "push %%ebx\n\t" \
01482 "push %%edi\n\t" \
01483 "mov %%eax, %%ebx \n\t" \
01484 "xor %%ebp, %%ebp\n\t" \
01485 "xor %%edi, %%edi\n\t" \
01486 "xor %%ecx, %%ecx\n\t"
01487
01488 #define MulShiftCarry \
01489 "mov %%edx, %%ebp\n\t" \
01490 "mov %%ecx, %%edi\n\t" \
01491 "xor %%ecx, %%ecx\n\t"
01492
01493 #define MulAccumulate(i,j) \
01494 "mov 4*"#j"(%%ebx), %%eax\n\t" \
01495 "mull 4*"#i"(%%esi)\n\t" \
01496 "add %%eax, %%ebp\n\t" \
01497 "adc %%edx, %%edi\n\t" \
01498 "adc %%ch, %%cl\n\t"
01499
01500 #define MulStoreDigit(X) \
01501 "mov %%edi, %%edx \n\t" \
01502 "mov (%%esp), %%edi \n\t" \
01503 "mov %%ebp, 4*"#X"(%%edi)\n\t" \
01504 "mov %%edi, (%%esp)\n\t"
01505
01506 #define MulLastDiagonal(digits) \
01507 "mov 4*("#digits"-1)(%%ebx), %%eax\n\t" \
01508 "mull 4*("#digits"-1)(%%esi)\n\t" \
01509 "add %%eax, %%ebp\n\t" \
01510 "adc %%edi, %%edx\n\t" \
01511 "mov (%%esp), %%edi\n\t" \
01512 "mov %%ebp, 4*(2*"#digits"-2)(%%edi)\n\t" \
01513 "mov %%edx, 4*(2*"#digits"-1)(%%edi)\n\t"
01514
01515 #define MulCleanup \
01516 "pop %%edi\n\t" \
01517 "pop %%ebx\n\t" \
01518 "pop %%esi\n\t" \
01519 "pop %%ebp\n\t"
01520
01521 void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01522 {
01523 __asm__ __volatile__(
01524 MulStartup
01525 MulAccumulate(0,0)
01526 MulStoreDigit(0)
01527 MulShiftCarry
01528
01529 MulAccumulate(1,0)
01530 MulAccumulate(0,1)
01531 MulStoreDigit(1)
01532 MulShiftCarry
01533
01534 MulAccumulate(2,0)
01535 MulAccumulate(1,1)
01536 MulAccumulate(0,2)
01537 MulStoreDigit(2)
01538 MulShiftCarry
01539
01540 MulAccumulate(3,0)
01541 MulAccumulate(2,1)
01542 MulAccumulate(1,2)
01543 MulAccumulate(0,3)
01544 MulStoreDigit(3)
01545 MulShiftCarry
01546
01547 MulAccumulate(3,1)
01548 MulAccumulate(2,2)
01549 MulAccumulate(1,3)
01550 MulStoreDigit(4)
01551 MulShiftCarry
01552
01553 MulAccumulate(3,2)
01554 MulAccumulate(2,3)
01555 MulStoreDigit(5)
01556 MulShiftCarry
01557
01558 MulLastDiagonal(4)
01559
01560 MulCleanup
01561
01562 :
01563 : "D" (Z), "S" (X), "a" (Y)
01564 : "%ecx", "%edx", "memory"
01565 );
01566 }
01567
01568 void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01569 {
01570 __asm__ __volatile__(
01571 MulStartup
01572 MulAccumulate(0,0)
01573 MulStoreDigit(0)
01574 MulShiftCarry
01575
01576 MulAccumulate(1,0)
01577 MulAccumulate(0,1)
01578 MulStoreDigit(1)
01579 MulShiftCarry
01580
01581 MulAccumulate(2,0)
01582 MulAccumulate(1,1)
01583 MulAccumulate(0,2)
01584 MulStoreDigit(2)
01585 MulShiftCarry
01586
01587 MulAccumulate(3,0)
01588 MulAccumulate(2,1)
01589 MulAccumulate(1,2)
01590 MulAccumulate(0,3)
01591 MulStoreDigit(3)
01592 MulShiftCarry
01593
01594 MulAccumulate(4,0)
01595 MulAccumulate(3,1)
01596 MulAccumulate(2,2)
01597 MulAccumulate(1,3)
01598 MulAccumulate(0,4)
01599 MulStoreDigit(4)
01600 MulShiftCarry
01601
01602 MulAccumulate(5,0)
01603 MulAccumulate(4,1)
01604 MulAccumulate(3,2)
01605 MulAccumulate(2,3)
01606 MulAccumulate(1,4)
01607 MulAccumulate(0,5)
01608 MulStoreDigit(5)
01609 MulShiftCarry
01610
01611 MulAccumulate(6,0)
01612 MulAccumulate(5,1)
01613 MulAccumulate(4,2)
01614 MulAccumulate(3,3)
01615 MulAccumulate(2,4)
01616 MulAccumulate(1,5)
01617 MulAccumulate(0,6)
01618 MulStoreDigit(6)
01619 MulShiftCarry
01620
01621 MulAccumulate(7,0)
01622 MulAccumulate(6,1)
01623 MulAccumulate(5,2)
01624 MulAccumulate(4,3)
01625 MulAccumulate(3,4)
01626 MulAccumulate(2,5)
01627 MulAccumulate(1,6)
01628 MulAccumulate(0,7)
01629 MulStoreDigit(7)
01630 MulShiftCarry
01631
01632 MulAccumulate(7,1)
01633 MulAccumulate(6,2)
01634 MulAccumulate(5,3)
01635 MulAccumulate(4,4)
01636 MulAccumulate(3,5)
01637 MulAccumulate(2,6)
01638 MulAccumulate(1,7)
01639 MulStoreDigit(8)
01640 MulShiftCarry
01641
01642 MulAccumulate(7,2)
01643 MulAccumulate(6,3)
01644 MulAccumulate(5,4)
01645 MulAccumulate(4,5)
01646 MulAccumulate(3,6)
01647 MulAccumulate(2,7)
01648 MulStoreDigit(9)
01649 MulShiftCarry
01650
01651 MulAccumulate(7,3)
01652 MulAccumulate(6,4)
01653 MulAccumulate(5,5)
01654 MulAccumulate(4,6)
01655 MulAccumulate(3,7)
01656 MulStoreDigit(10)
01657 MulShiftCarry
01658
01659 MulAccumulate(7,4)
01660 MulAccumulate(6,5)
01661 MulAccumulate(5,6)
01662 MulAccumulate(4,7)
01663 MulStoreDigit(11)
01664 MulShiftCarry
01665
01666 MulAccumulate(7,5)
01667 MulAccumulate(6,6)
01668 MulAccumulate(5,7)
01669 MulStoreDigit(12)
01670 MulShiftCarry
01671
01672 MulAccumulate(7,6)
01673 MulAccumulate(6,7)
01674 MulStoreDigit(13)
01675 MulShiftCarry
01676
01677 MulLastDiagonal(8)
01678
01679 MulCleanup
01680
01681 :
01682 : "D" (Z), "S" (X), "a" (Y)
01683 : "%ecx", "%edx", "memory"
01684 );
01685 }
01686
01687 #elif defined(__GNUC__) && defined(__alpha__)
01688
01689 class AlphaOptimized : public Portable
01690 {
01691 public:
01692 static inline void Multiply2(word *C, const word *A, const word *B);
01693 static inline word Multiply2Add(word *C, const word *A, const word *B);
01694 static inline void Multiply4(word *C, const word *A, const word *B);
01695 static inline unsigned int MultiplyRecursionLimit() {return 4;}
01696
01697 static inline void Multiply4Bottom(word *C, const word *A, const word *B);
01698 static inline unsigned int MultiplyBottomRecursionLimit() {return 4;}
01699
01700 static inline void Square4(word *R, const word *A)
01701 {
01702 Multiply4(R, A, A);
01703 }
01704 };
01705
01706 typedef AlphaOptimized LowLevel;
01707
01708 inline void AlphaOptimized::Multiply2(word *C, const word *A, const word *B)
01709 {
01710 register dword c, a = *(const dword *)A, b = *(const dword *)B;
01711 ((dword *)C)[0] = a*b;
01712 __asm__("umulh %1,%2,%0" : "=r" (c) : "r" (a), "r" (b));
01713 ((dword *)C)[1] = c;
01714 }
01715
01716 inline word AlphaOptimized::Multiply2Add(word *C, const word *A, const word *B)
01717 {
01718 register dword c, d, e, a = *(const dword *)A, b = *(const dword *)B;
01719 c = ((dword *)C)[0];
01720 d = a*b + c;
01721 __asm__("umulh %1,%2,%0" : "=r" (e) : "r" (a), "r" (b));
01722 ((dword *)C)[0] = d;
01723 d = (d < c);
01724 c = ((dword *)C)[1] + d;
01725 d = (c < d);
01726 c += e;
01727 ((dword *)C)[1] = c;
01728 d |= (c < e);
01729 return d;
01730 }
01731
01732 inline void AlphaOptimized::Multiply4(word *R, const word *A, const word *B)
01733 {
01734 Multiply2(R, A, B);
01735 Multiply2(R+4, A+2, B+2);
01736 word carry = Multiply2Add(R+2, A+0, B+2);
01737 carry += Multiply2Add(R+2, A+2, B+0);
01738 Increment(R+6, 2, carry);
01739 }
01740
01741 static inline void Multiply2BottomAdd(word *C, const word *A, const word *B)
01742 {
01743 register dword a = *(const dword *)A, b = *(const dword *)B;
01744 ((dword *)C)[0] = a*b + ((dword *)C)[0];
01745 }
01746
01747 inline void AlphaOptimized::Multiply4Bottom(word *R, const word *A, const word *B)
01748 {
01749 Multiply2(R, A, B);
01750 Multiply2BottomAdd(R+2, A+0, B+2);
01751 Multiply2BottomAdd(R+2, A+2, B+0);
01752 }
01753
01754 #else // no processor specific code available
01755
01756 typedef Portable LowLevel;
01757
01758 #endif
01759
01760
01761
01762 #define A0 A
01763 #define A1 (A+N2)
01764 #define B0 B
01765 #define B1 (B+N2)
01766
01767 #define T0 T
01768 #define T1 (T+N2)
01769 #define T2 (T+N)
01770 #define T3 (T+N+N2)
01771
01772 #define R0 R
01773 #define R1 (R+N2)
01774 #define R2 (R+N)
01775 #define R3 (R+N+N2)
01776
01777
01778
01779
01780
01781
01782
01783
01784 template <class P>
01785 void DoRecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N, const P *dummy=NULL);
01786
01787 template <class P>
01788 inline void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N, const P *dummy=NULL)
01789 {
01790 assert(N>=2 && N%2==0);
01791
01792 if (P::MultiplyRecursionLimit() >= 8 && N==8)
01793 P::Multiply8(R, A, B);
01794 else if (P::MultiplyRecursionLimit() >= 4 && N==4)
01795 P::Multiply4(R, A, B);
01796 else if (N==2)
01797 P::Multiply2(R, A, B);
01798 else
01799 DoRecursiveMultiply<P>(R, T, A, B, N, NULL);
01800 }
01801
01802 template <class P>
01803 void DoRecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N, const P *dummy)
01804 {
01805 const unsigned int N2 = N/2;
01806 int carry;
01807
01808 int aComp = Compare(A0, A1, N2);
01809 int bComp = Compare(B0, B1, N2);
01810
01811 switch (2*aComp + aComp + bComp)
01812 {
01813 case -4:
01814 P::Subtract(R0, A1, A0, N2);
01815 P::Subtract(R1, B0, B1, N2);
01816 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01817 P::Subtract(T1, T1, R0, N2);
01818 carry = -1;
01819 break;
01820 case -2:
01821 P::Subtract(R0, A1, A0, N2);
01822 P::Subtract(R1, B0, B1, N2);
01823 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01824 carry = 0;
01825 break;
01826 case 2:
01827 P::Subtract(R0, A0, A1, N2);
01828 P::Subtract(R1, B1, B0, N2);
01829 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01830 carry = 0;
01831 break;
01832 case 4:
01833 P::Subtract(R0, A1, A0, N2);
01834 P::Subtract(R1, B0, B1, N2);
01835 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01836 P::Subtract(T1, T1, R1, N2);
01837 carry = -1;
01838 break;
01839 default:
01840 SetWords(T0, 0, N);
01841 carry = 0;
01842 }
01843
01844 RecursiveMultiply<P>(R0, T2, A0, B0, N2);
01845 RecursiveMultiply<P>(R2, T2, A1, B1, N2);
01846
01847
01848
01849 carry += P::Add(T0, T0, R0, N);
01850 carry += P::Add(T0, T0, R2, N);
01851 carry += P::Add(R1, R1, T0, N);
01852
01853 assert (carry >= 0 && carry <= 2);
01854 Increment(R3, N2, carry);
01855 }
01856
01857
01858
01859
01860
01861 template <class P>
01862 void DoRecursiveSquare(word *R, word *T, const word *A, unsigned int N, const P *dummy=NULL);
01863
01864 template <class P>
01865 inline void RecursiveSquare(word *R, word *T, const word *A, unsigned int N, const P *dummy=NULL)
01866 {
01867 assert(N && N%2==0);
01868 if (P::SquareRecursionLimit() >= 8 && N==8)
01869 P::Square8(R, A);
01870 if (P::SquareRecursionLimit() >= 4 && N==4)
01871 P::Square4(R, A);
01872 else if (N==2)
01873 P::Square2(R, A);
01874 else
01875 DoRecursiveSquare<P>(R, T, A, N, NULL);
01876 }
01877
01878 template <class P>
01879 void DoRecursiveSquare(word *R, word *T, const word *A, unsigned int N, const P *dummy)
01880 {
01881 const unsigned int N2 = N/2;
01882
01883 RecursiveSquare<P>(R0, T2, A0, N2);
01884 RecursiveSquare<P>(R2, T2, A1, N2);
01885 RecursiveMultiply<P>(T0, T2, A0, A1, N2);
01886
01887 word carry = P::Add(R1, R1, T0, N);
01888 carry += P::Add(R1, R1, T0, N);
01889 Increment(R3, N2, carry);
01890 }
01891
01892
01893
01894
01895
01896
01897 template <class P>
01898 void DoRecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N, const P *dummy=NULL);
01899
01900 template <class P>
01901 inline void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N, const P *dummy=NULL)
01902 {
01903 assert(N>=2 && N%2==0);
01904 if (P::MultiplyBottomRecursionLimit() >= 8 && N==8)
01905 P::Multiply8Bottom(R, A, B);
01906 else if (P::MultiplyBottomRecursionLimit() >= 4 && N==4)
01907 P::Multiply4Bottom(R, A, B);
01908 else if (N==2)
01909 P::Multiply2Bottom(R, A, B);
01910 else
01911 DoRecursiveMultiplyBottom<P>(R, T, A, B, N, NULL);
01912 }
01913
01914 template <class P>
01915 void DoRecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N, const P *dummy)
01916 {
01917 const unsigned int N2 = N/2;
01918
01919 RecursiveMultiply<P>(R, T, A0, B0, N2);
01920 RecursiveMultiplyBottom<P>(T0, T1, A1, B0, N2);
01921 P::Add(R1, R1, T0, N2);
01922 RecursiveMultiplyBottom<P>(T0, T1, A0, B1, N2);
01923 P::Add(R1, R1, T0, N2);
01924 }
01925
01926
01927
01928
01929
01930
01931
01932 template <class P>
01933 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N, const P *dummy=NULL)
01934 {
01935 assert(N>=2 && N%2==0);
01936
01937 if (N==4)
01938 {
01939 P::Multiply4(T, A, B);
01940 ((dword *)R)[0] = ((dword *)T)[2];
01941 ((dword *)R)[1] = ((dword *)T)[3];
01942 }
01943 else if (N==2)
01944 {
01945 P::Multiply2(T, A, B);
01946 ((dword *)R)[0] = ((dword *)T)[1];
01947 }
01948 else
01949 {
01950 const unsigned int N2 = N/2;
01951 int carry;
01952
01953 int aComp = Compare(A0, A1, N2);
01954 int bComp = Compare(B0, B1, N2);
01955
01956 switch (2*aComp + aComp + bComp)
01957 {
01958 case -4:
01959 P::Subtract(R0, A1, A0, N2);
01960 P::Subtract(R1, B0, B1, N2);
01961 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01962 P::Subtract(T1, T1, R0, N2);
01963 carry = -1;
01964 break;
01965 case -2:
01966 P::Subtract(R0, A1, A0, N2);
01967 P::Subtract(R1, B0, B1, N2);
01968 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01969 carry = 0;
01970 break;
01971 case 2:
01972 P::Subtract(R0, A0, A1, N2);
01973 P::Subtract(R1, B1, B0, N2);
01974 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01975 carry = 0;
01976 break;
01977 case 4:
01978 P::Subtract(R0, A1, A0, N2);
01979 P::Subtract(R1, B0, B1, N2);
01980 RecursiveMultiply<P>(T0, T2, R0, R1, N2);
01981 P::Subtract(T1, T1, R1, N2);
01982 carry = -1;
01983 break;
01984 default:
01985 SetWords(T0, 0, N);
01986 carry = 0;
01987 }
01988
01989 RecursiveMultiply<P>(T2, R0, A1, B1, N2);
01990
01991
01992
01993 word c2 = P::Subtract(R0, L+N2, L, N2);
01994 c2 += P::Subtract(R0, R0, T0, N2);
01995 word t = (Compare(R0, T2, N2) == -1);
01996
01997 carry += t;
01998 carry += Increment(R0, N2, c2+t);
01999 carry += P::Add(R0, R0, T1, N2);
02000 carry += P::Add(R0, R0, T3, N2);
02001 assert (carry >= 0 && carry <= 2);
02002
02003 CopyWords(R1, T3, N2);
02004 Increment(R1, N2, carry);
02005 }
02006 }
02007
02008 inline word Add(word *C, const word *A, const word *B, unsigned int N)
02009 {
02010 return LowLevel::Add(C, A, B, N);
02011 }
02012
02013 inline word Subtract(word *C, const word *A, const word *B, unsigned int N)
02014 {
02015 return LowLevel::Subtract(C, A, B, N);
02016 }
02017
02018 inline void Multiply(word *R, word *T, const word *A, const word *B, unsigned int N)
02019 {
02020 #ifdef SSE2_INTRINSICS_AVAILABLE
02021 if (HasSSE2())
02022 RecursiveMultiply<P4Optimized>(R, T, A, B, N);
02023 else
02024 #endif
02025 RecursiveMultiply<LowLevel>(R, T, A, B, N);
02026 }
02027
02028 inline void Square(word *R, word *T, const word *A, unsigned int N)
02029 {
02030 #ifdef SSE2_INTRINSICS_AVAILABLE
02031 if (HasSSE2())
02032 RecursiveSquare<P4Optimized>(R, T, A, N);
02033 else
02034 #endif
02035 RecursiveSquare<LowLevel>(R, T, A, N);
02036 }
02037
02038 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
02039 {
02040 #ifdef SSE2_INTRINSICS_AVAILABLE
02041 if (HasSSE2())
02042 RecursiveMultiplyBottom<P4Optimized>(R, T, A, B, N);
02043 else
02044 #endif
02045 RecursiveMultiplyBottom<LowLevel>(R, T, A, B, N);
02046 }
02047
02048 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
02049 {
02050 #ifdef SSE2_INTRINSICS_AVAILABLE
02051 if (HasSSE2())
02052 RecursiveMultiplyTop<P4Optimized>(R, T, L, A, B, N);
02053 else
02054 #endif
02055 RecursiveMultiplyTop<LowLevel>(R, T, L, A, B, N);
02056 }
02057
02058
02059
02060
02061
02062
02063 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02064 {
02065 if (NA == NB)
02066 {
02067 if (A == B)
02068 Square(R, T, A, NA);
02069 else
02070 Multiply(R, T, A, B, NA);
02071
02072 return;
02073 }
02074
02075 if (NA > NB)
02076 {
02077 std::swap(A, B);
02078 std::swap(NA, NB);
02079 }
02080
02081 assert(NB % NA == 0);
02082 assert((NB/NA)%2 == 0);
02083
02084 if (NA==2 && !A[1])
02085 {
02086 switch (A[0])
02087 {
02088 case 0:
02089 SetWords(R, 0, NB+2);
02090 return;
02091 case 1:
02092 CopyWords(R, B, NB);
02093 R[NB] = R[NB+1] = 0;
02094 return;
02095 default:
02096 R[NB] = LinearMultiply(R, B, A[0], NB);
02097 R[NB+1] = 0;
02098 return;
02099 }
02100 }
02101
02102 Multiply(R, T, A, B, NA);
02103 CopyWords(T+2*NA, R+NA, NA);
02104
02105 unsigned i;
02106
02107 for (i=2*NA; i<NB; i+=2*NA)
02108 Multiply(T+NA+i, T, A, B+i, NA);
02109 for (i=NA; i<NB; i+=2*NA)
02110 Multiply(R+i, T, A, B+i, NA);
02111
02112 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02113 Increment(R+NB, NA);
02114 }
02115
02116
02117
02118
02119
02120 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)
02121 {
02122 if (N==2)
02123 AtomicInverseModPower2(R, A[0], A[1]);
02124 else
02125 {
02126 const unsigned int N2 = N/2;
02127 RecursiveInverseModPower2(R0, T0, A0, N2);
02128 T0[0] = 1;
02129 SetWords(T0+1, 0, N2-1);
02130 MultiplyTop(R1, T1, T0, R0, A0, N2);
02131 MultiplyBottom(T0, T1, R0, A1, N2);
02132 Add(T0, R1, T0, N2);
02133 TwosComplement(T0, N2);
02134 MultiplyBottom(R1, T1, R0, T0, N2);
02135 }
02136 }
02137
02138
02139
02140
02141
02142
02143
02144 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N)
02145 {
02146 MultiplyBottom(R, T, X, U, N);
02147 MultiplyTop(T, T+N, X, R, M, N);
02148 if (Subtract(R, X+N, T, N))
02149 {
02150 word carry = Add(R, R, M, N);
02151 assert(carry);
02152 }
02153 }
02154
02155
02156
02157
02158
02159
02160
02161
02162 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, unsigned int N)
02163 {
02164 assert(N%2==0 && N>=4);
02165
02166 #define M0 M
02167 #define M1 (M+N2)
02168 #define V0 V
02169 #define V1 (V+N2)
02170
02171 #define X0 X
02172 #define X1 (X+N2)
02173 #define X2 (X+N)
02174 #define X3 (X+N+N2)
02175
02176 const unsigned int N2 = N/2;
02177 Multiply(T0, T2, V0, X3, N2);
02178 int c2 = Add(T0, T0, X0, N);
02179 MultiplyBottom(T3, T2, T0, U, N2);
02180 MultiplyTop(T2, R, T0, T3, M0, N2);
02181 c2 -= Subtract(T2, T1, T2, N2);
02182 Multiply(T0, R, T3, M1, N2);
02183 c2 -= Subtract(T0, T2, T0, N2);
02184 int c3 = -(int)Subtract(T1, X2, T1, N2);
02185 Multiply(R0, T2, V1, X3, N2);
02186 c3 += Add(R, R, T, N);
02187
02188 if (c2>0)
02189 c3 += Increment(R1, N2);
02190 else if (c2<0)
02191 c3 -= Decrement(R1, N2, -c2);
02192
02193 assert(c3>=-1 && c3<=1);
02194 if (c3>0)
02195 Subtract(R, R, M, N);
02196 else if (c3<0)
02197 Add(R, R, M, N);
02198
02199 #undef M0
02200 #undef M1
02201 #undef V0
02202 #undef V1
02203
02204 #undef X0
02205 #undef X1
02206 #undef X2
02207 #undef X3
02208 }
02209
02210 #undef A0
02211 #undef A1
02212 #undef B0
02213 #undef B1
02214
02215 #undef T0
02216 #undef T1
02217 #undef T2
02218 #undef T3
02219
02220 #undef R0
02221 #undef R1
02222 #undef R2
02223 #undef R3
02224
02225
02226 static word SubatomicDivide(word *A, word B0, word B1)
02227 {
02228
02229 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
02230
02231 dword p, u;
02232 word Q;
02233
02234
02235 if (B1+1 == 0)
02236 Q = A[2];
02237 else
02238 Q = word(MAKE_DWORD(A[1], A[2]) / (B1+1));
02239
02240
02241 p = (dword) B0*Q;
02242 u = (dword) A[0] - LOW_WORD(p);
02243 A[0] = LOW_WORD(u);
02244 u = (dword) A[1] - HIGH_WORD(p) - (word)(0-HIGH_WORD(u)) - (dword)B1*Q;
02245 A[1] = LOW_WORD(u);
02246 A[2] += HIGH_WORD(u);
02247
02248
02249 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
02250 {
02251 u = (dword) A[0] - B0;
02252 A[0] = LOW_WORD(u);
02253 u = (dword) A[1] - B1 - (word)(0-HIGH_WORD(u));
02254 A[1] = LOW_WORD(u);
02255 A[2] += HIGH_WORD(u);
02256 Q++;
02257 assert(Q);
02258 }
02259
02260 return Q;
02261 }
02262
02263
02264 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02265 {
02266 if (!B[0] && !B[1])
02267 {
02268 Q[0] = A[2];
02269 Q[1] = A[3];
02270 }
02271 else
02272 {
02273 word T[4];
02274 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
02275 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
02276 Q[0] = SubatomicDivide(T, B[0], B[1]);
02277
02278 #ifndef NDEBUG
02279
02280 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02281 word P[4];
02282 LowLevel::Multiply2(P, Q, B);
02283 Add(P, P, T, 4);
02284 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02285 #endif
02286 }
02287 }
02288
02289
02290 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N)
02291 {
02292 assert(N && N%2==0);
02293
02294 if (Q[1])
02295 {
02296 T[N] = T[N+1] = 0;
02297 unsigned i;
02298 for (i=0; i<N; i+=4)
02299 LowLevel::Multiply2(T+i, Q, B+i);
02300 for (i=2; i<N; i+=4)
02301 if (LowLevel::Multiply2Add(T+i, Q, B+i))
02302 T[i+5] += (++T[i+4]==0);
02303 }
02304 else
02305 {
02306 T[N] = LinearMultiply(T, B, Q[0], N);
02307 T[N+1] = 0;
02308 }
02309
02310 word borrow = Subtract(R, R, T, N+2);
02311 assert(!borrow && !R[N+1]);
02312
02313 while (R[N] || Compare(R, B, N) >= 0)
02314 {
02315 R[N] -= Subtract(R, R, B, N);
02316 Q[1] += (++Q[0]==0);
02317 assert(Q[0] || Q[1]);
02318 }
02319 }
02320
02321
02322
02323
02324
02325
02326
02327 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02328 {
02329 assert(NA && NB && NA%2==0 && NB%2==0);
02330 assert(B[NB-1] || B[NB-2]);
02331 assert(NB <= NA);
02332
02333
02334 word *const TA=T;
02335 word *const TB=T+NA+2;
02336 word *const TP=T+NA+2+NB;
02337
02338
02339 unsigned shiftWords = (B[NB-1]==0);
02340 TB[0] = TB[NB-1] = 0;
02341 CopyWords(TB+shiftWords, B, NB-shiftWords);
02342 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02343 assert(shiftBits < WORD_BITS);
02344 ShiftWordsLeftByBits(TB, NB, shiftBits);
02345
02346
02347 TA[0] = TA[NA] = TA[NA+1] = 0;
02348 CopyWords(TA+shiftWords, A, NA);
02349 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02350
02351 if (TA[NA+1]==0 && TA[NA] <= 1)
02352 {
02353 Q[NA-NB+1] = Q[NA-NB] = 0;
02354 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02355 {
02356 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02357 ++Q[NA-NB];
02358 }
02359 }
02360 else
02361 {
02362 NA+=2;
02363 assert(Compare(TA+NA-NB, TB, NB) < 0);
02364 }
02365
02366 word BT[2];
02367 BT[0] = TB[NB-2] + 1;
02368 BT[1] = TB[NB-1] + (BT[0]==0);
02369
02370
02371 for (unsigned i=NA-2; i>=NB; i-=2)
02372 {
02373 AtomicDivide(Q+i-NB, TA+i-2, BT);
02374 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02375 }
02376
02377
02378 CopyWords(R, TA+shiftWords, NB);
02379 ShiftWordsRightByBits(R, NB, shiftBits);
02380 }
02381
02382 static inline unsigned int EvenWordCount(const word *X, unsigned int N)
02383 {
02384 while (N && X[N-2]==0 && X[N-1]==0)
02385 N-=2;
02386 return N;
02387 }
02388
02389
02390
02391
02392
02393
02394
02395 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N)
02396 {
02397 assert(NA<=N && N && N%2==0);
02398
02399 word *b = T;
02400 word *c = T+N;
02401 word *f = T+2*N;
02402 word *g = T+3*N;
02403 unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
02404 unsigned int k=0, s=0;
02405
02406 SetWords(T, 0, 3*N);
02407 b[0]=1;
02408 CopyWords(f, A, NA);
02409 CopyWords(g, M, N);
02410
02411 while (1)
02412 {
02413 word t=f[0];
02414 while (!t)
02415 {
02416 if (EvenWordCount(f, fgLen)==0)
02417 {
02418 SetWords(R, 0, N);
02419 return 0;
02420 }
02421
02422 ShiftWordsRightByWords(f, fgLen, 1);
02423 if (c[bcLen-1]) bcLen+=2;
02424 assert(bcLen <= N);
02425 ShiftWordsLeftByWords(c, bcLen, 1);
02426 k+=WORD_BITS;
02427 t=f[0];
02428 }
02429
02430 unsigned int i=0;
02431 while (t%2 == 0)
02432 {
02433 t>>=1;
02434 i++;
02435 }
02436 k+=i;
02437
02438 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02439 {
02440 if (s%2==0)
02441 CopyWords(R, b, N);
02442 else
02443 Subtract(R, M, b, N);
02444 return k;
02445 }
02446
02447 ShiftWordsRightByBits(f, fgLen, i);
02448 t=ShiftWordsLeftByBits(c, bcLen, i);
02449 if (t)
02450 {
02451 c[bcLen] = t;
02452 bcLen+=2;
02453 assert(bcLen <= N);
02454 }
02455
02456 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02457 fgLen-=2;
02458
02459 if (Compare(f, g, fgLen)==-1)
02460 {
02461 std::swap(f, g);
02462 std::swap(b, c);
02463 s++;
02464 }
02465
02466 Subtract(f, f, g, fgLen);
02467
02468 if (Add(b, b, c, bcLen))
02469 {
02470 b[bcLen] = 1;
02471 bcLen+=2;
02472 assert(bcLen <= N);
02473 }
02474 }
02475 }
02476
02477
02478
02479
02480
02481 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02482 {
02483 CopyWords(R, A, N);
02484
02485 while (k--)
02486 {
02487 if (R[0]%2==0)
02488 ShiftWordsRightByBits(R, N, 1);
02489 else
02490 {
02491 word carry = Add(R, R, M, N);
02492 ShiftWordsRightByBits(R, N, 1);
02493 R[N-1] += carry<<(WORD_BITS-1);
02494 }
02495 }
02496 }
02497
02498
02499
02500
02501
02502 void MultiplyByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02503 {
02504 CopyWords(R, A, N);
02505
02506 while (k--)
02507 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02508 Subtract(R, R, M, N);
02509 }
02510
02511
02512
02513 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02514
02515 static inline unsigned int RoundupSize(unsigned int n)
02516 {
02517 if (n<=8)
02518 return RoundupSizeTable[n];
02519 else if (n<=16)
02520 return 16;
02521 else if (n<=32)
02522 return 32;
02523 else if (n<=64)
02524 return 64;
02525 else return 1U << BitPrecision(n-1);
02526 }
02527
02528 Integer::Integer()
02529 : reg(2), sign(POSITIVE)
02530 {
02531 reg[0] = reg[1] = 0;
02532 }
02533
02534 Integer::Integer(const Integer& t)
02535 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02536 {
02537 CopyWords(reg, t.reg, reg.size());
02538 }
02539
02540 Integer::Integer(signed long value)
02541 : reg(2)
02542 {
02543 if (value >= 0)
02544 sign = POSITIVE;
02545 else
02546 {
02547 sign = NEGATIVE;
02548 value = -value;
02549 }
02550 reg[0] = word(value);
02551 reg[1] = word(SafeRightShift<WORD_BITS, unsigned long>(value));
02552 }
02553
02554 bool Integer::IsConvertableToLong() const
02555 {
02556 if (ByteCount() > sizeof(long))
02557 return false;
02558
02559 unsigned long value = reg[0];
02560 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02561
02562 if (sign==POSITIVE)
02563 return (signed long)value >= 0;
02564 else
02565 return -(signed long)value < 0;
02566 }
02567
02568 signed long Integer::ConvertToLong() const
02569 {
02570 assert(IsConvertableToLong());
02571
02572 unsigned long value = reg[0];
02573 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02574 return sign==POSITIVE ? value : -(signed long)value;
02575 }
02576
02577 Integer::Integer(BufferedTransformation &encodedInteger, unsigned int byteCount, Signedness s)
02578 {
02579 Decode(encodedInteger, byteCount, s);
02580 }
02581
02582 Integer::Integer(const byte *encodedInteger, unsigned int byteCount, Signedness s)
02583 {
02584 Decode(encodedInteger, byteCount, s);
02585 }
02586
02587 Integer::Integer(BufferedTransformation &bt)
02588 {
02589 BERDecode(bt);
02590 }
02591
02592 Integer::Integer(RandomNumberGenerator &rng, unsigned int bitcount)
02593 {
02594 Randomize(rng, bitcount);
02595 }
02596
02597 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02598 {
02599 if (!Randomize(rng, min, max, rnType, equiv, mod))
02600 throw Integer::RandomNumberNotFound();
02601 }
02602
02603 Integer Integer::Power2(unsigned int e)
02604 {
02605 Integer r((word)0, BitsToWords(e+1));
02606 r.SetBit(e);
02607 return r;
02608 }
02609
02610 const Integer &Integer::Zero()
02611 {
02612 static const Integer zero;
02613 return zero;
02614 }
02615
02616 const Integer &Integer::One()
02617 {
02618 static const Integer one(1,2);
02619 return one;
02620 }
02621
02622 const Integer &Integer::Two()
02623 {
02624 static const Integer two(2,2);
02625 return two;
02626 }
02627
02628 bool Integer::operator!() const
02629 {
02630 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02631 }
02632
02633 Integer& Integer::operator=(const Integer& t)
02634 {
02635 if (this != &t)
02636 {
02637 reg.New(RoundupSize(t.WordCount()));
02638 CopyWords(reg, t.reg, reg.size());
02639 sign = t.sign;
02640 }
02641 return *this;
02642 }
02643
02644 bool Integer::GetBit(unsigned int n) const
02645 {
02646 if (n/WORD_BITS >= reg.size())
02647 return 0;
02648 else
02649 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02650 }
02651
02652 void Integer::SetBit(unsigned int n, bool value)
02653 {
02654 if (value)
02655 {
02656 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02657 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02658 }
02659 else
02660 {
02661 if (n/WORD_BITS < reg.size())
02662 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02663 }
02664 }
02665
02666 byte Integer::GetByte(unsigned int n) const
02667 {
02668 if (n/WORD_SIZE >= reg.size())
02669 return 0;
02670 else
02671 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02672 }
02673
02674 void Integer::SetByte(unsigned int n, byte value)
02675 {
02676 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02677 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02678 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02679 }
02680
02681 unsigned long Integer::GetBits(unsigned int i, unsigned int n) const
02682 {
02683 assert(n <= sizeof(unsigned long)*8);
02684 unsigned long v = 0;
02685 for (unsigned int j=0; j<n; j++)
02686 v |= GetBit(i+j) << j;
02687 return v;
02688 }
02689
02690 Integer Integer::operator-() const
02691 {
02692 Integer result(*this);
02693 result.Negate();
02694 return result;
02695 }
02696
02697 Integer Integer::AbsoluteValue() const
02698 {
02699 Integer result(*this);
02700 result.sign = POSITIVE;
02701 return result;
02702 }
02703
02704 void Integer::swap(Integer &a)
02705 {
02706 reg.swap(a.reg);
02707 std::swap(sign, a.sign);
02708 }
02709
02710 Integer::Integer(word value, unsigned int length)
02711 : reg(RoundupSize(length)), sign(POSITIVE)
02712 {
02713 reg[0] = value;
02714 SetWords(reg+1, 0, reg.size()-1);
02715 }
02716
02717 template <class T>
02718 static Integer StringToInteger(const T *str)
02719 {
02720 word radix;
02721 #if (defined(__GNUC__) && __GNUC__ <= 3) // GCC workaround
02722
02723
02724 unsigned int length;
02725 for (length = 0; str[length] != 0; length++) {}
02726 #else
02727 unsigned int length = std::char_traits<T>::length(str);
02728 #endif
02729
02730 Integer v;
02731
02732 if (length == 0)
02733 return v;
02734
02735 switch (str[length-1])
02736 {
02737 case 'h':
02738 case 'H':
02739 radix=16;
02740 break;
02741 case 'o':
02742 case 'O':
02743 radix=8;
02744 break;
02745 case 'b':
02746 case 'B':
02747 radix=2;
02748 break;
02749 default:
02750 radix=10;
02751 }
02752
02753 if (length > 2 && str[0] == '0' && str[1] == 'x')
02754 radix = 16;
02755
02756 for (unsigned i=0; i<length; i++)
02757 {
02758 word digit;
02759
02760 if (str[i] >= '0' && str[i] <= '9')
02761 digit = str[i] - '0';
02762 else if (str[i] >= 'A' && str[i] <= 'F')
02763 digit = str[i] - 'A' + 10;
02764 else if (str[i] >= 'a' && str[i] <= 'f')
02765 digit = str[i] - 'a' + 10;
02766 else
02767 digit = radix;
02768
02769 if (digit < radix)
02770 {
02771 v *= radix;
02772 v += digit;
02773 }
02774 }
02775
02776 if (str[0] == '-')
02777 v.Negate();
02778
02779 return v;
02780 }
02781
02782 Integer::Integer(const char *str)
02783 : reg(2), sign(POSITIVE)
02784 {
02785 *this = StringToInteger(str);
02786 }
02787
02788 Integer::Integer(const wchar_t *str)
02789 : reg(2), sign(POSITIVE)
02790 {
02791 *this = StringToInteger(str);
02792 }
02793
02794 unsigned int Integer::WordCount() const
02795 {
02796 return CountWords(reg, reg.size());
02797 }
02798
02799 unsigned int Integer::ByteCount() const
02800 {
02801 unsigned wordCount = WordCount();
02802 if (wordCount)
02803 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
02804 else
02805 return 0;
02806 }
02807
02808 unsigned int Integer::BitCount() const
02809 {
02810 unsigned wordCount = WordCount();
02811 if (wordCount)
02812 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
02813 else
02814 return 0;
02815 }
02816
02817 void Integer::Decode(const byte *input, unsigned int inputLen, Signedness s)
02818 {
02819 StringStore store(input, inputLen);
02820 Decode(store, inputLen, s);
02821 }
02822
02823 void Integer::Decode(BufferedTransformation &bt, unsigned int inputLen, Signedness s)
02824 {
02825 assert(bt.MaxRetrievable() >= inputLen);
02826
02827 byte b;
02828 bt.Peek(b);
02829 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
02830
02831 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
02832 {
02833 bt.Skip(1);
02834 inputLen--;
02835 bt.Peek(b);
02836 }
02837
02838 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
02839
02840 for (unsigned int i=inputLen; i > 0; i--)
02841 {
02842 bt.Get(b);
02843 reg[(i-1)/WORD_SIZE] |= b << ((i-1)%WORD_SIZE)*8;
02844 }
02845
02846 if (sign == NEGATIVE)
02847 {
02848 for (unsigned i=inputLen; i<reg.size()*WORD_SIZE; i++)
02849 reg[i/WORD_SIZE] |= 0xff << (i%WORD_SIZE)*8;
02850 TwosComplement(reg, reg.size());
02851 }
02852 }
02853
02854 unsigned int Integer::MinEncodedSize(Signedness signedness) const
02855 {
02856 unsigned int outputLen = STDMAX(1U, ByteCount());
02857 if (signedness == UNSIGNED)
02858 return outputLen;
02859 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
02860 outputLen++;
02861 if (IsNegative() && *this < -Power2(outputLen*8-1))
02862 outputLen++;
02863 return outputLen;
02864 }
02865
02866 unsigned int Integer::Encode(byte *output, unsigned int outputLen, Signedness signedness) const
02867 {
02868 ArraySink sink(output, outputLen);
02869 return Encode(sink, outputLen, signedness);
02870 }
02871
02872 unsigned int Integer::Encode(BufferedTransformation &bt, unsigned int outputLen, Signedness signedness) const
02873 {
02874 if (signedness == UNSIGNED || NotNegative())
02875 {
02876 for (unsigned int i=outputLen; i > 0; i--)
02877 bt.Put(GetByte(i-1));
02878 }
02879 else
02880 {
02881
02882 Integer temp = Integer::Power2(8*STDMAX(ByteCount(), outputLen)) + *this;
02883 for (unsigned i=0; i<outputLen; i++)
02884 bt.Put(temp.GetByte(outputLen-i-1));
02885 }
02886 return outputLen;
02887 }
02888
02889 void Integer::DEREncode(BufferedTransformation &bt) const
02890 {
02891 DERGeneralEncoder enc(bt, INTEGER);
02892 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
02893 enc.MessageEnd();
02894 }
02895
02896 void Integer::BERDecode(const byte *input, unsigned int len)
02897 {
02898 StringStore store(input, len);
02899 BERDecode(store);
02900 }
02901
02902 void Integer::BERDecode(BufferedTransformation &bt)
02903 {
02904 BERGeneralDecoder dec(bt, INTEGER);
02905 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
02906 BERDecodeError();
02907 Decode(dec, dec.RemainingLength(), SIGNED);
02908 dec.MessageEnd();
02909 }
02910
02911 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, unsigned int length) const
02912 {
02913 DERGeneralEncoder enc(bt, OCTET_STRING);
02914 Encode(enc, length);
02915 enc.MessageEnd();
02916 }
02917
02918 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, unsigned int length)
02919 {
02920 BERGeneralDecoder dec(bt, OCTET_STRING);
02921 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
02922 BERDecodeError();
02923 Decode(dec, length);
02924 dec.MessageEnd();
02925 }
02926
02927 unsigned int Integer::OpenPGPEncode(byte *output, unsigned int len) const
02928 {
02929 ArraySink sink(output, len);
02930 return OpenPGPEncode(sink);
02931 }
02932
02933 unsigned int Integer::OpenPGPEncode(BufferedTransformation &bt) const
02934 {
02935 word16 bitCount = BitCount();
02936 bt.PutWord16(bitCount);
02937 return 2 + Encode(bt, BitsToBytes(bitCount));
02938 }
02939
02940 void Integer::OpenPGPDecode(const byte *input, unsigned int len)
02941 {
02942 StringStore store(input, len);
02943 OpenPGPDecode(store);
02944 }
02945
02946 void Integer::OpenPGPDecode(BufferedTransformation &bt)
02947 {
02948 word16 bitCount;
02949 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
02950 throw OpenPGPDecodeErr();
02951 Decode(bt, BitsToBytes(bitCount));
02952 }
02953
02954 void Integer::Randomize(RandomNumberGenerator &rng, unsigned int nbits)
02955 {
02956 const unsigned int nbytes = nbits/8 + 1;
02957 SecByteBlock buf(nbytes);
02958 rng.GenerateBlock(buf, nbytes);
02959 if (nbytes)
02960 buf[0] = (byte)Crop(buf[0], nbits % 8);
02961 Decode(buf, nbytes, UNSIGNED);
02962 }
02963
02964 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
02965 {
02966 if (min > max)
02967 throw InvalidArgument("Integer: Min must be no greater than Max");
02968
02969 Integer range = max - min;
02970 const unsigned int nbits = range.BitCount();
02971
02972 do
02973 {
02974 Randomize(rng, nbits);
02975 }
02976 while (*this > range);
02977
02978 *this += min;
02979 }
02980
02981 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02982 {
02983 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
02984 }
02985
02986 class KDF2_RNG : public RandomNumberGenerator
02987 {
02988 public:
02989 KDF2_RNG(const byte *seed, unsigned int seedSize)
02990 : m_counter(0), m_counterAndSeed(seedSize + 4)
02991 {
02992 memcpy(m_counterAndSeed + 4, seed, seedSize);
02993 }
02994
02995 byte GenerateByte()
02996 {
02997 byte b;
02998 GenerateBlock(&b, 1);
02999 return b;
03000 }
03001
03002 void GenerateBlock(byte *output, unsigned int size)
03003 {
03004 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03005 ++m_counter;
03006 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size());
03007 }
03008
03009 private:
03010 word32 m_counter;
03011 SecByteBlock m_counterAndSeed;
03012 };
03013
03014 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03015 {
03016 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03017 Integer max;
03018 if (!params.GetValue("Max", max))
03019 {
03020 int bitLength;
03021 if (params.GetIntValue("BitLength", bitLength))
03022 max = Integer::Power2(bitLength);
03023 else
03024 throw InvalidArgument("Integer: missing Max argument");
03025 }
03026 if (min > max)
03027 throw InvalidArgument("Integer: Min must be no greater than Max");
03028
03029 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03030 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03031
03032 if (equiv.IsNegative() || equiv >= mod)
03033 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03034
03035 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03036
03037 member_ptr<KDF2_RNG> kdf2Rng;
03038 ConstByteArrayParameter seed;
03039 if (params.GetValue("Seed", seed))
03040 {
03041 ByteQueue bq;
03042 DERSequenceEncoder seq(bq);
03043 min.DEREncode(seq);
03044 max.DEREncode(seq);
03045 equiv.DEREncode(seq);
03046 mod.DEREncode(seq);
03047 DEREncodeUnsigned(seq, rnType);
03048 DEREncodeOctetString(seq, seed.begin(), seed.size());
03049 seq.MessageEnd();
03050
03051 SecByteBlock finalSeed(bq.MaxRetrievable());
03052 bq.Get(finalSeed, finalSeed.size());
03053 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03054 }
03055 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03056
03057 switch (rnType)
03058 {
03059 case ANY:
03060 if (mod == One())
03061 Randomize(rng, min, max);
03062 else
03063 {
03064 Integer min1 = min + (equiv-min)%mod;
03065 if (max < min1)
03066 return false;
03067 Randomize(rng, Zero(), (max - min1) / mod);
03068 *this *= mod;
03069 *this += min1;
03070 }
03071 return true;
03072
03073 case PRIME:
03074 {
03075 const PrimeSelector *pSelector = params.GetValueWithDefault("PointerToPrimeSelector", (const PrimeSelector *)NULL);
03076
03077 int i;
03078 i = 0;
03079 while (1)
03080 {
03081 if (++i==16)
03082 {
03083
03084 Integer first = min;
03085 if (FirstPrime(first, max, equiv, mod, pSelector))
03086 {
03087
03088 *this = first;
03089 if (!FirstPrime(first, max, equiv, mod, pSelector))
03090 return true;
03091 }
03092 else
03093 return false;
03094 }
03095
03096 Randomize(rng, min, max);
03097 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03098 return true;
03099 }
03100 }
03101
03102 default:
03103 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03104 }
03105 }
03106
03107 std::istream& operator>>(std::istream& in, Integer &a)
03108 {
03109 char c;
03110 unsigned int length = 0;
03111 SecBlock<char> str(length + 16);
03112
03113 std::ws(in);
03114
03115 do
03116 {
03117 in.read(&c, 1);
03118 str[length++] = c;
03119 if (length >= str.size())
03120 str.Grow(length + 16);
03121 }
03122 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03123
03124 if (in.gcount())
03125 in.putback(c);
03126 str[length-1] = '\0';
03127 a = Integer(str);
03128
03129 return in;
03130 }
03131
03132 std::ostream& operator<<(std::ostream& out, const Integer &a)
03133 {
03134
03135 long f = out.flags() & std::ios::basefield;
03136 int base, block;
03137 char suffix;
03138 switch(f)
03139 {
03140 case std::ios::oct :
03141 base = 8;
03142 block = 8;
03143 suffix = 'o';
03144 break;
03145 case std::ios::hex :
03146 base = 16;
03147 block = 4;
03148 suffix = 'h';
03149 break;
03150 default :
03151 base = 10;
03152 block = 3;
03153 suffix = '.';
03154 }
03155
03156 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03157 Integer temp1=a, temp2;
03158 unsigned i=0;
03159 const char vec[]="0123456789ABCDEF";
03160
03161 if (a.IsNegative())
03162 {
03163 out << '-';
03164 temp1.Negate();
03165 }
03166
03167 if (!a)
03168 out << '0';
03169
03170 while (!!temp1)
03171 {
03172 word digit;
03173 Integer::Divide(digit, temp2, temp1, base);
03174 s[i++]=vec[digit];
03175 temp1=temp2;
03176 }
03177
03178 while (i--)
03179 {
03180 out << s[i];
03181
03182
03183 }
03184 return out << suffix;
03185 }
03186
03187 Integer& Integer::operator++()
03188 {
03189 if (NotNegative())
03190 {
03191 if (Increment(reg, reg.size()))
03192 {
03193 reg.CleanGrow(2*reg.size());
03194 reg[reg.size()/2]=1;
03195 }
03196 }
03197 else
03198 {
03199 word borrow = Decrement(reg, reg.size());
03200 assert(!borrow);
03201 if (WordCount()==0)
03202 *this = Zero();
03203 }
03204 return *this;
03205 }
03206
03207 Integer& Integer::operator--()
03208 {
03209 if (IsNegative())
03210 {
03211 if (Increment(reg, reg.size()))
03212 {
03213 reg.CleanGrow(2*reg.size());
03214 reg[reg.size()/2]=1;
03215 }
03216 }
03217 else
03218 {
03219 if (Decrement(reg, reg.size()))
03220 *this = -One();
03221 }
03222 return *this;
03223 }
03224
03225 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03226 {
03227 word carry;
03228 if (a.reg.size() == b.reg.size())
03229 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03230 else if (a.reg.size() > b.reg.size())
03231 {
03232 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03233 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03234 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03235 }
03236 else
03237 {
03238 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03239 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03240 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03241 }
03242
03243 if (carry)
03244 {
03245 sum.reg.CleanGrow(2*sum.reg.size());
03246 sum.reg[sum.reg.size()/2] = 1;
03247 }
03248 sum.sign = Integer::POSITIVE;
03249 }
03250
03251 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03252 {
03253 unsigned aSize = a.WordCount();
03254 aSize += aSize%2;
03255 unsigned bSize = b.WordCount();
03256 bSize += bSize%2;
03257
03258 if (aSize == bSize)
03259 {
03260 if (Compare(a.reg, b.reg, aSize) >= 0)
03261 {
03262 Subtract(diff.reg, a.reg, b.reg, aSize);
03263 diff.sign = Integer::POSITIVE;
03264 }
03265 else
03266 {
03267 Subtract(diff.reg, b.reg, a.reg, aSize);
03268 diff.sign = Integer::NEGATIVE;
03269 }
03270 }
03271 else if (aSize > bSize)
03272 {
03273 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03274 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03275 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03276 assert(!borrow);
03277 diff.sign = Integer::POSITIVE;
03278 }
03279 else
03280 {
03281 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03282 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03283 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03284 assert(!borrow);
03285 diff.sign = Integer::NEGATIVE;
03286 }
03287 }
03288
03289 Integer Integer::Plus(const Integer& b) const
03290 {
03291 Integer sum((word)0, STDMAX(reg.size(), b.reg.size()));
03292 if (NotNegative())
03293 {
03294 if (b.NotNegative())
03295 PositiveAdd(sum, *this, b);
03296 else
03297 PositiveSubtract(sum, *this, b);
03298 }
03299 else
03300 {
03301 if (b.NotNegative())
03302 PositiveSubtract(sum, b, *this);
03303 else
03304 {
03305 PositiveAdd(sum, *this, b);
03306 sum.sign = Integer::NEGATIVE;
03307 }
03308 }
03309 return sum;
03310 }
03311
03312 Integer& Integer::operator+=(const Integer& t)
03313 {
03314 reg.CleanGrow(t.reg.size());
03315 if (NotNegative())
03316 {
03317 if (t.NotNegative())
03318 PositiveAdd(*this, *this, t);
03319 else
03320 PositiveSubtract(*this, *this, t);
03321 }
03322 else
03323 {
03324 if (t.NotNegative())
03325 PositiveSubtract(*this, t, *this);
03326 else
03327 {
03328 PositiveAdd(*this, *this, t);
03329 sign = Integer::NEGATIVE;
03330 }
03331 }
03332 return *this;
03333 }
03334
03335 Integer Integer::Minus(const Integer& b) const
03336 {
03337 Integer diff((word)0, STDMAX(reg.size(), b.reg.size()));
03338 if (NotNegative())
03339 {
03340 if (b.NotNegative())
03341 PositiveSubtract(diff, *this, b);
03342 else
03343 PositiveAdd(diff, *this, b);
03344 }
03345 else
03346 {
03347 if (b.NotNegative())
03348 {
03349 PositiveAdd(diff, *this, b);
03350 diff.sign = Integer::NEGATIVE;
03351 }
03352 else
03353 PositiveSubtract(diff, b, *this);
03354 }
03355 return diff;
03356 }
03357
03358 Integer& Integer::operator-=(const Integer& t)
03359 {
03360 reg.CleanGrow(t.reg.size());
03361 if (NotNegative())
03362 {
03363 if (t.NotNegative())
03364 PositiveSubtract(*this, *this, t);
03365 else
03366 PositiveAdd(*this, *this, t);
03367 }
03368 else
03369 {
03370 if (t.NotNegative())
03371 {
03372 PositiveAdd(*this, *this, t);
03373 sign = Integer::NEGATIVE;
03374 }
03375 else
03376 PositiveSubtract(*this, t, *this);
03377 }
03378 return *this;
03379 }
03380
03381 Integer& Integer::operator<<=(unsigned int n)
03382 {
03383 const unsigned int wordCount = WordCount();
03384 const unsigned int shiftWords = n / WORD_BITS;
03385 const unsigned int shiftBits = n % WORD_BITS;
03386
03387 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03388 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03389 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03390 return *this;
03391 }
03392
03393 Integer& Integer::operator>>=(unsigned int n)
03394 {
03395 const unsigned int wordCount = WordCount();
03396 const unsigned int shiftWords = n / WORD_BITS;
03397 const unsigned int shiftBits = n % WORD_BITS;
03398
03399 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03400 if (wordCount > shiftWords)
03401 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03402 if (IsNegative() && WordCount()==0)
03403 *this = Zero();
03404 return *this;
03405 }
03406
03407 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03408 {
03409 unsigned aSize = RoundupSize(a.WordCount());
03410 unsigned bSize = RoundupSize(b.WordCount());
03411
03412 product.reg.CleanNew(RoundupSize(aSize+bSize));
03413 product.sign = Integer::POSITIVE;
03414
03415 SecAlignedWordBlock workspace(aSize + bSize);
03416 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03417 }
03418
03419 void Multiply(Integer &product, const Integer &a, const Integer &b)
03420 {
03421 PositiveMultiply(product, a, b);
03422
03423 if (a.NotNegative() != b.NotNegative())
03424 product.Negate();
03425 }
03426
03427 Integer Integer::Times(const Integer &b) const
03428 {
03429 Integer product;
03430 Multiply(product, *this, b);
03431 return product;
03432 }
03433
03434
03435
03436
03437
03438
03439
03440
03441
03442
03443
03444
03445
03446
03447
03448
03449
03450
03451
03452
03453
03454
03455
03456 void PositiveDivide(Integer &remainder, Integer "ient,
03457 const Integer &a, const Integer &b)
03458 {
03459 unsigned aSize = a.WordCount();
03460 unsigned bSize = b.WordCount();
03461
03462 if (!bSize)
03463 throw Integer::DivideByZero();
03464
03465 if (a.PositiveCompare(b) == -1)
03466 {
03467 remainder = a;
03468 remainder.sign = Integer::POSITIVE;
03469 quotient = Integer::Zero();
03470 return;
03471 }
03472
03473 aSize += aSize%2;
03474 bSize += bSize%2;
03475
03476 remainder.reg.CleanNew(RoundupSize(bSize));
03477 remainder.sign = Integer::POSITIVE;
03478 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03479 quotient.sign = Integer::POSITIVE;
03480
03481 SecAlignedWordBlock T(aSize+2*bSize+4);
03482 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03483 }
03484
03485 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03486 {
03487 PositiveDivide(remainder, quotient, dividend, divisor);
03488
03489 if (dividend.IsNegative())
03490 {
03491 quotient.Negate();
03492 if (remainder.NotZero())
03493 {
03494 --quotient;
03495 remainder = divisor.AbsoluteValue() - remainder;
03496 }
03497 }
03498
03499 if (divisor.IsNegative())
03500 quotient.Negate();
03501 }
03502
03503 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03504 {
03505 q = a;
03506 q >>= n;
03507
03508 const unsigned int wordCount = BitsToWords(n);
03509 if (wordCount <= a.WordCount())
03510 {
03511 r.reg.resize(RoundupSize(wordCount));
03512 CopyWords(r.reg, a.reg, wordCount);
03513 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03514 if (n % WORD_BITS != 0)
03515 r.reg[wordCount-1] %= (1 << (n % WORD_BITS));
03516 }
03517 else
03518 {
03519 r.reg.resize(RoundupSize(a.WordCount()));
03520 CopyWords(r.reg, a.reg, r.reg.size());
03521 }
03522 r.sign = POSITIVE;
03523
03524 if (a.IsNegative() && r.NotZero())
03525 {
03526 --q;
03527 r = Power2(n) - r;
03528 }
03529 }
03530
03531 Integer Integer::DividedBy(const Integer &b) const
03532 {
03533 Integer remainder, quotient;
03534 Integer::Divide(remainder, quotient, *this, b);
03535 return quotient;
03536 }
03537
03538 Integer Integer::Modulo(const Integer &b) const
03539 {
03540 Integer remainder, quotient;
03541 Integer::Divide(remainder, quotient, *this, b);
03542 return remainder;
03543 }
03544
03545 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03546 {
03547 if (!divisor)
03548 throw Integer::DivideByZero();
03549
03550 assert(divisor);
03551
03552 if ((divisor & (divisor-1)) == 0)
03553 {
03554 quotient = dividend >> (BitPrecision(divisor)-1);
03555 remainder = dividend.reg[0] & (divisor-1);
03556 return;
03557 }
03558
03559 unsigned int i = dividend.WordCount();
03560 quotient.reg.CleanNew(RoundupSize(i));
03561 remainder = 0;
03562 while (i--)
03563 {
03564 quotient.reg[i] = word(MAKE_DWORD(dividend.reg[i], remainder) / divisor);
03565 remainder = word(MAKE_DWORD(dividend.reg[i], remainder) % divisor);
03566 }
03567
03568 if (dividend.NotNegative())
03569 quotient.sign = POSITIVE;
03570 else
03571 {
03572 quotient.sign = NEGATIVE;
03573 if (remainder)
03574 {
03575 --quotient;
03576 remainder = divisor - remainder;
03577 }
03578 }
03579 }
03580
03581 Integer Integer::DividedBy(word b) const
03582 {
03583 word remainder;
03584 Integer quotient;
03585 Integer::Divide(remainder, quotient, *this, b);
03586 return quotient;
03587 }
03588
03589 word Integer::Modulo(word divisor) const
03590 {
03591 if (!divisor)
03592 throw Integer::DivideByZero();
03593
03594 assert(divisor);
03595
03596 word remainder;
03597
03598 if ((divisor & (divisor-1)) == 0)
03599 remainder = reg[0] & (divisor-1);
03600 else
03601 {
03602 unsigned int i = WordCount();
03603
03604 if (divisor <= 5)
03605 {
03606 dword sum=0;
03607 while (i--)
03608 sum += reg[i];
03609 remainder = word(sum%divisor);
03610 }
03611 else
03612 {
03613 remainder = 0;
03614 while (i--)
03615 remainder = word(MAKE_DWORD(reg[i], remainder) % divisor);
03616 }
03617 }
03618
03619 if (IsNegative() && remainder)
03620 remainder = divisor - remainder;
03621
03622 return remainder;
03623 }
03624
03625 void Integer::Negate()
03626 {
03627 if (!!(*this))
03628 sign = Sign(1-sign);
03629 }
03630
03631 int Integer::PositiveCompare(const Integer& t) const
03632 {
03633 unsigned size = WordCount(), tSize = t.WordCount();
03634
03635 if (size == tSize)
03636 return CryptoPP::Compare(reg, t.reg, size);
03637 else
03638 return size > tSize ? 1 : -1;
03639 }
03640
03641 int Integer::Compare(const Integer& t) const
03642 {
03643 if (NotNegative())
03644 {
03645 if (t.NotNegative())
03646 return PositiveCompare(t);
03647 else
03648 return 1;
03649 }
03650 else
03651 {
03652 if (t.NotNegative())
03653 return -1;
03654 else
03655 return -PositiveCompare(t);
03656 }
03657 }
03658
03659 Integer Integer::SquareRoot() const
03660 {
03661 if (!IsPositive())
03662 return Zero();
03663
03664
03665 Integer x, y = Power2((BitCount()+1)/2);
03666 assert(y*y >= *this);
03667
03668 do
03669 {
03670 x = y;
03671 y = (x + *this/x) >> 1;
03672 } while (y<x);
03673
03674 return x;
03675 }
03676
03677 bool Integer::IsSquare() const
03678 {
03679 Integer r = SquareRoot();
03680 return *this == r.Squared();
03681 }
03682
03683 bool Integer::IsUnit() const
03684 {
03685 return (WordCount() == 1) && (reg[0] == 1);
03686 }
03687
03688 Integer Integer::MultiplicativeInverse() const
03689 {
03690 return IsUnit() ? *this : Zero();
03691 }
03692
03693 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03694 {
03695 return x*y%m;
03696 }
03697
03698 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03699 {
03700 ModularArithmetic mr(m);
03701 return mr.Exponentiate(x, e);
03702 }
03703
03704 Integer Integer::Gcd(const Integer &a, const Integer &b)
03705 {
03706 return EuclideanDomainOf<Integer>().Gcd(a, b);
03707 }
03708
03709 Integer Integer::InverseMod(const Integer &m) const
03710 {
03711 assert(m.NotNegative());
03712
03713 if (IsNegative() || *this>=m)
03714 return (*this%m).InverseMod(m);
03715
03716 if (m.IsEven())
03717 {
03718 if (!m || IsEven())
03719 return Zero();
03720 if (*this == One())
03721 return One();
03722
03723 Integer u = m.InverseMod(*this);
03724 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03725 }
03726
03727 SecBlock<word> T(m.reg.size() * 4);
03728 Integer r((word)0, m.reg.size());
03729 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03730 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03731 return r;
03732 }
03733
03734 word Integer::InverseMod(const word mod) const
03735 {
03736 word g0 = mod, g1 = *this % mod;
03737 word v0 = 0, v1 = 1;
03738 word y;
03739
03740 while (g1)
03741 {
03742 if (g1 == 1)
03743 return v1;
03744 y = g0 / g1;
03745 g0 = g0 % g1;
03746 v0 += y * v1;
03747
03748 if (!g0)
03749 break;
03750 if (g0 == 1)
03751 return mod-v0;
03752 y = g1 / g0;
03753 g1 = g1 % g0;
03754 v1 += y * v0;
03755 }
03756 return 0;
03757 }
03758
03759
03760
03761 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
03762 {
03763 BERSequenceDecoder seq(bt);
03764 OID oid(seq);
03765 if (oid != ASN1::prime_field())
03766 BERDecodeError();
03767 modulus.BERDecode(seq);
03768 seq.MessageEnd();
03769 result.reg.resize(modulus.reg.size());
03770 }
03771
03772 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
03773 {
03774 DERSequenceEncoder seq(bt);
03775 ASN1::prime_field().DEREncode(seq);
03776 modulus.DEREncode(seq);
03777 seq.MessageEnd();
03778 }
03779
03780 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
03781 {
03782 a.DEREncodeAsOctetString(out, MaxElementByteLength());
03783 }
03784
03785 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
03786 {
03787 a.BERDecodeAsOctetString(in, MaxElementByteLength());
03788 }
03789
03790 const Integer& ModularArithmetic::Half(const Integer &a) const
03791 {
03792 if (a.reg.size()==modulus.reg.size())
03793 {
03794 CryptoPP::DivideByPower2Mod(result.reg.begin(), a.reg, 1, modulus.reg, a.reg.size());
03795 return result;
03796 }
03797 else
03798 return result1 = (a.IsEven() ? (a >> 1) : ((a+modulus) >> 1));
03799 }
03800
03801 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
03802 {
03803 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
03804 {
03805 if (CryptoPP::Add(result.reg.begin(), a.reg, b.reg, a.reg.size())
03806 || Compare(result.reg, modulus.reg, a.reg.size()) >= 0)
03807 {
03808 CryptoPP::Subtract(result.reg.begin(), result.reg, modulus.reg, a.reg.size());
03809 }
03810 return result;
03811 }
03812 else
03813 {
03814 result1 = a+b;
03815 if (result1 >= modulus)
03816 result1 -= modulus;
03817 return result1;
03818 }
03819 }
03820
03821 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
03822 {
03823 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
03824 {
03825 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
03826 || Compare(a.reg, modulus.reg, a.reg.size()) >= 0)
03827 {
03828 CryptoPP::Subtract(a.reg, a.reg, modulus.reg, a.reg.size());
03829 }
03830 }
03831 else
03832 {
03833 a+=b;
03834 if (a>=modulus)
03835 a-=modulus;
03836 }
03837
03838 return a;
03839 }
03840
03841 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
03842 {
03843 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
03844 {
03845 if (CryptoPP::Subtract(result.reg.begin(), a.reg, b.reg, a.reg.size()))
03846 CryptoPP::Add(result.reg.begin(), result.reg, modulus.reg, a.reg.size());
03847 return result;
03848 }
03849 else
03850 {
03851 result1 = a-b;
03852 if (result1.IsNegative())
03853 result1 += modulus;
03854 return result1;
03855 }
03856 }
03857
03858 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
03859 {
03860 if (a.reg.size()==modulus.reg.size() && b.reg.size()==modulus.reg.size())
03861 {
03862 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
03863 CryptoPP::Add(a.reg, a.reg, modulus.reg, a.reg.size());
03864 }
03865 else
03866 {
03867 a-=b;
03868 if (a.IsNegative())
03869 a+=modulus;
03870 }
03871
03872 return a;
03873 }
03874
03875 const Integer& ModularArithmetic::Inverse(const Integer &a) const
03876 {
03877 if (!a)
03878 return a;
03879
03880 CopyWords(result.reg.begin(), modulus.reg, modulus.reg.size());
03881 if (CryptoPP::Subtract(result.reg.begin(), result.reg, a.reg, a.reg.size()))
03882 Decrement(result.reg.begin()+a.reg.size(), 1, modulus.reg.size()-a.reg.size());
03883
03884 return result;
03885 }
03886
03887 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
03888 {
03889 if (modulus.IsOdd())
03890 {
03891 MontgomeryRepresentation dr(modulus);
03892 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
03893 }
03894 else
03895 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
03896 }
03897
03898 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
03899 {
03900 if (modulus.IsOdd())
03901 {
03902 MontgomeryRepresentation dr(modulus);
03903 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
03904 for (unsigned int i=0; i<exponentsCount; i++)
03905 results[i] = dr.ConvertOut(results[i]);
03906 }
03907 else
03908 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
03909 }
03910
03911 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
03912 : ModularArithmetic(m),
03913 u((word)0, modulus.reg.size()),
03914 workspace(5*modulus.reg.size())
03915 {
03916 if (!modulus.IsOdd())
03917 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
03918
03919 RecursiveInverseModPower2(u.reg, workspace, modulus.reg, modulus.reg.size());
03920 }
03921
03922 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
03923 {
03924 word *const T = workspace.begin();
03925 word *const R = result.reg.begin();
03926 const unsigned int N = modulus.reg.size();
03927 assert(a.reg.size()<=N && b.reg.size()<=N);
03928
03929 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
03930 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
03931 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
03932 return result;
03933 }
03934
03935 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
03936 {
03937 word *const T = workspace.begin();
03938 word *const R = result.reg.begin();
03939 const unsigned int N = modulus.reg.size();
03940 assert(a.reg.size()<=N);
03941
03942 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
03943 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
03944 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
03945 return result;
03946 }
03947
03948 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
03949 {
03950 word *const T = workspace.begin();
03951 word *const R = result.reg.begin();
03952 const unsigned int N = modulus.reg.size();
03953 assert(a.reg.size()<=N);
03954
03955 CopyWords(T, a.reg, a.reg.size());
03956 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
03957 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
03958 return result;
03959 }
03960
03961 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
03962 {
03963
03964 word *const T = workspace.begin();
03965 word *const R = result.reg.begin();
03966 const unsigned int N = modulus.reg.size();
03967 assert(a.reg.size()<=N);
03968
03969 CopyWords(T, a.reg, a.reg.size());
03970 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
03971 MontgomeryReduce(R, T+2*N, T, modulus.reg, u.reg, N);
03972 unsigned k = AlmostInverse(R, T, R, N, modulus.reg, N);
03973
03974
03975
03976 if (k>N*WORD_BITS)
03977 DivideByPower2Mod(R, R, k-N*WORD_BITS, modulus.reg, N);
03978 else
03979 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, modulus.reg, N);
03980
03981 return result;
03982 }
03983
03984 NAMESPACE_END
03985
03986 #endif