00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017
00018 #include <iostream>
00019
00020 #ifdef SSE2_INTRINSICS_AVAILABLE
00021 #ifdef __GNUC__
00022 #include <xmmintrin.h>
00023 #include <signal.h>
00024 #include <setjmp.h>
00025 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00026 #include <malloc.h>
00027 #else
00028 #include <stdlib.h>
00029 #endif
00030 #else
00031 #include <emmintrin.h>
00032 #endif
00033 #elif defined(_MSC_VER) && defined(_M_IX86)
00034 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00035 #elif defined(__GNUC__) && defined(__i386__)
00036 #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00037 #endif
00038
00039 NAMESPACE_BEGIN(CryptoPP)
00040
00041 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00042 {
00043 if (valueType != typeid(Integer))
00044 return false;
00045 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00046 return true;
00047 }
00048
00049 static const char s_RunAtStartup = (g_pAssignIntToInteger = AssignIntToInteger, 0);
00050
00051 #ifdef SSE2_INTRINSICS_AVAILABLE
00052 template <class T>
00053 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00054 {
00055 CheckSize(n);
00056 if (n == 0)
00057 return NULL;
00058 if (n >= 4)
00059 {
00060 void *p;
00061 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00062 while (!(p = _mm_malloc(sizeof(T)*n, 16)))
00063 #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00064 while (!(p = memalign(16, sizeof(T)*n)))
00065 #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00066 while (!(p = malloc(sizeof(T)*n)))
00067 #else
00068 while (!(p = (byte *)malloc(sizeof(T)*n + 8)))
00069 #endif
00070 CallNewHandler();
00071
00072 #ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00073 assert(m_pBlock == NULL);
00074 m_pBlock = p;
00075 if (!IsAlignedOn(p, 16))
00076 {
00077 assert(IsAlignedOn(p, 8));
00078 p = (byte *)p + 8;
00079 }
00080 #endif
00081
00082 assert(IsAlignedOn(p, 16));
00083 return (T*)p;
00084 }
00085 return new T[n];
00086 }
00087
00088 template <class T>
00089 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00090 {
00091 memset(p, 0, n*sizeof(T));
00092 if (n >= 4)
00093 {
00094 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00095 _mm_free(p);
00096 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00097 assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00098 free(m_pBlock);
00099 m_pBlock = NULL;
00100 #else
00101 free(p);
00102 #endif
00103 }
00104 else
00105 delete [] (T *)p;
00106 }
00107 #endif
00108
00109 static int Compare(const word *A, const word *B, unsigned int N)
00110 {
00111 while (N--)
00112 if (A[N] > B[N])
00113 return 1;
00114 else if (A[N] < B[N])
00115 return -1;
00116
00117 return 0;
00118 }
00119
00120 static word Increment(word *A, unsigned int N, word B=1)
00121 {
00122 assert(N);
00123 word t = A[0];
00124 A[0] = t+B;
00125 if (A[0] >= t)
00126 return 0;
00127 for (unsigned i=1; i<N; i++)
00128 if (++A[i])
00129 return 0;
00130 return 1;
00131 }
00132
00133 static word Decrement(word *A, unsigned int N, word B=1)
00134 {
00135 assert(N);
00136 word t = A[0];
00137 A[0] = t-B;
00138 if (A[0] <= t)
00139 return 0;
00140 for (unsigned i=1; i<N; i++)
00141 if (A[i]--)
00142 return 0;
00143 return 1;
00144 }
00145
00146 static void TwosComplement(word *A, unsigned int N)
00147 {
00148 Decrement(A, N);
00149 for (unsigned i=0; i<N; i++)
00150 A[i] = ~A[i];
00151 }
00152
00153 static word AtomicInverseModPower2(word A)
00154 {
00155 assert(A%2==1);
00156
00157 word R=A%8;
00158
00159 for (unsigned i=3; i<WORD_BITS; i*=2)
00160 R = R*(2-R*A);
00161
00162 assert(R*A==1);
00163 return R;
00164 }
00165
00166
00167
00168 class DWord
00169 {
00170 public:
00171 DWord() {}
00172
00173 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00174 explicit DWord(word low)
00175 {
00176 m_whole = low;
00177 }
00178 #else
00179 explicit DWord(word low)
00180 {
00181 m_halfs.low = low;
00182 m_halfs.high = 0;
00183 }
00184 #endif
00185
00186 DWord(word low, word high)
00187 {
00188 m_halfs.low = low;
00189 m_halfs.high = high;
00190 }
00191
00192 static DWord Multiply(word a, word b)
00193 {
00194 DWord r;
00195 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00196 r.m_whole = (dword)a * b;
00197 #elif defined(__alpha__)
00198 r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b));
00199 #elif defined(__ia64__)
00200 r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b));
00201 #elif defined(_ARCH_PPC64)
00202 r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc");
00203 #elif defined(__x86_64__)
00204 __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc");
00205 #elif defined(__mips64)
00206 __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b));
00207 #elif defined(_M_IX86)
00208
00209 word64 t = (word64)a * b;
00210 r.m_halfs.high = ((word32 *)(&t))[1];
00211 r.m_halfs.low = (word32)t;
00212 #else
00213 #error can not implement DWord
00214 #endif
00215 return r;
00216 }
00217
00218 static DWord MultiplyAndAdd(word a, word b, word c)
00219 {
00220 DWord r = Multiply(a, b);
00221 return r += c;
00222 }
00223
00224 DWord & operator+=(word a)
00225 {
00226 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00227 m_whole = m_whole + a;
00228 #else
00229 m_halfs.low += a;
00230 m_halfs.high += (m_halfs.low < a);
00231 #endif
00232 return *this;
00233 }
00234
00235 DWord operator+(word a)
00236 {
00237 DWord r;
00238 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00239 r.m_whole = m_whole + a;
00240 #else
00241 r.m_halfs.low = m_halfs.low + a;
00242 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00243 #endif
00244 return r;
00245 }
00246
00247 DWord operator-(DWord a)
00248 {
00249 DWord r;
00250 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00251 r.m_whole = m_whole - a.m_whole;
00252 #else
00253 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00254 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00255 #endif
00256 return r;
00257 }
00258
00259 DWord operator-(word a)
00260 {
00261 DWord r;
00262 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00263 r.m_whole = m_whole - a;
00264 #else
00265 r.m_halfs.low = m_halfs.low - a;
00266 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00267 #endif
00268 return r;
00269 }
00270
00271
00272 word operator/(word divisor);
00273
00274 word operator%(word a);
00275
00276 bool operator!() const
00277 {
00278 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00279 return !m_whole;
00280 #else
00281 return !m_halfs.high && !m_halfs.low;
00282 #endif
00283 }
00284
00285 word GetLowHalf() const {return m_halfs.low;}
00286 word GetHighHalf() const {return m_halfs.high;}
00287 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00288
00289 private:
00290 union
00291 {
00292 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00293 dword m_whole;
00294 #endif
00295 struct
00296 {
00297 #ifdef IS_LITTLE_ENDIAN
00298 word low;
00299 word high;
00300 #else
00301 word high;
00302 word low;
00303 #endif
00304 } m_halfs;
00305 };
00306 };
00307
00308 class Word
00309 {
00310 public:
00311 Word() {}
00312
00313 Word(word value)
00314 {
00315 m_whole = value;
00316 }
00317
00318 Word(hword low, hword high)
00319 {
00320 m_whole = low | (word(high) << (WORD_BITS/2));
00321 }
00322
00323 static Word Multiply(hword a, hword b)
00324 {
00325 Word r;
00326 r.m_whole = (word)a * b;
00327 return r;
00328 }
00329
00330 Word operator-(Word a)
00331 {
00332 Word r;
00333 r.m_whole = m_whole - a.m_whole;
00334 return r;
00335 }
00336
00337 Word operator-(hword a)
00338 {
00339 Word r;
00340 r.m_whole = m_whole - a;
00341 return r;
00342 }
00343
00344
00345 hword operator/(hword divisor)
00346 {
00347 return hword(m_whole / divisor);
00348 }
00349
00350 bool operator!() const
00351 {
00352 return !m_whole;
00353 }
00354
00355 word GetWhole() const {return m_whole;}
00356 hword GetLowHalf() const {return hword(m_whole);}
00357 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00358 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00359
00360 private:
00361 word m_whole;
00362 };
00363
00364
00365 template <class S, class D>
00366 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00367 {
00368
00369 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00370
00371
00372 S Q;
00373 if (S(B1+1) == 0)
00374 Q = A[2];
00375 else
00376 Q = D(A[1], A[2]) / S(B1+1);
00377
00378
00379 D p = D::Multiply(B0, Q);
00380 D u = (D) A[0] - p.GetLowHalf();
00381 A[0] = u.GetLowHalf();
00382 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00383 A[1] = u.GetLowHalf();
00384 A[2] += u.GetHighHalf();
00385
00386
00387 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00388 {
00389 u = (D) A[0] - B0;
00390 A[0] = u.GetLowHalf();
00391 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00392 A[1] = u.GetLowHalf();
00393 A[2] += u.GetHighHalf();
00394 Q++;
00395 assert(Q);
00396 }
00397
00398 return Q;
00399 }
00400
00401
00402 template <class S, class D>
00403 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00404 {
00405 if (!B)
00406 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00407 else
00408 {
00409 S Q[2];
00410 T[0] = Al.GetLowHalf();
00411 T[1] = Al.GetHighHalf();
00412 T[2] = Ah.GetLowHalf();
00413 T[3] = Ah.GetHighHalf();
00414 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00415 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00416 return D(Q[0], Q[1]);
00417 }
00418 }
00419
00420
00421 inline word DWord::operator/(word a)
00422 {
00423 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00424 return word(m_whole / a);
00425 #else
00426 hword r[4];
00427 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00428 #endif
00429 }
00430
00431 inline word DWord::operator%(word a)
00432 {
00433 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00434 return word(m_whole % a);
00435 #else
00436 if (a < (word(1) << (WORD_BITS/2)))
00437 {
00438 hword h = hword(a);
00439 word r = m_halfs.high % h;
00440 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00441 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00442 }
00443 else
00444 {
00445 hword r[4];
00446 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00447 return Word(r[0], r[1]).GetWhole();
00448 }
00449 #endif
00450 }
00451
00452
00453
00454 class Portable
00455 {
00456 public:
00457 static word Add(word *C, const word *A, const word *B, unsigned int N);
00458 static word Subtract(word *C, const word *A, const word *B, unsigned int N);
00459
00460 static inline void Multiply2(word *C, const word *A, const word *B);
00461 static inline word Multiply2Add(word *C, const word *A, const word *B);
00462 static void Multiply4(word *C, const word *A, const word *B);
00463 static void Multiply8(word *C, const word *A, const word *B);
00464 static inline unsigned int MultiplyRecursionLimit() {return 8;}
00465
00466 static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00467 static void Multiply4Bottom(word *C, const word *A, const word *B);
00468 static void Multiply8Bottom(word *C, const word *A, const word *B);
00469 static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00470
00471 static void Square2(word *R, const word *A);
00472 static void Square4(word *R, const word *A);
00473 static void Square8(word *R, const word *A) {assert(false);}
00474 static inline unsigned int SquareRecursionLimit() {return 4;}
00475 };
00476
00477 word Portable::Add(word *C, const word *A, const word *B, unsigned int N)
00478 {
00479 assert (N%2 == 0);
00480
00481 DWord u(0, 0);
00482 for (unsigned int i = 0; i < N; i+=2)
00483 {
00484 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00485 C[i] = u.GetLowHalf();
00486 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00487 C[i+1] = u.GetLowHalf();
00488 }
00489 return u.GetHighHalf();
00490 }
00491
00492 word Portable::Subtract(word *C, const word *A, const word *B, unsigned int N)
00493 {
00494 assert (N%2 == 0);
00495
00496 DWord u(0, 0);
00497 for (unsigned int i = 0; i < N; i+=2)
00498 {
00499 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00500 C[i] = u.GetLowHalf();
00501 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00502 C[i+1] = u.GetLowHalf();
00503 }
00504 return 0-u.GetHighHalf();
00505 }
00506
00507 void Portable::Multiply2(word *C, const word *A, const word *B)
00508 {
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00538 unsigned int ai = A[1] < A[0];
00539 unsigned int bi = B[0] < B[1];
00540 unsigned int di = ai & bi;
00541 DWord d = DWord::Multiply(D[di], D[di+2]);
00542 D[1] = D[3] = 0;
00543 unsigned int si = ai + !bi;
00544 word s = D[si];
00545
00546 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00547 C[0] = A0B0.GetLowHalf();
00548
00549 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00550 DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00551 C[1] = t.GetLowHalf();
00552
00553 t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00554 C[2] = t.GetLowHalf();
00555 C[3] = t.GetHighHalf();
00556 }
00557
00558 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00559 {
00560 DWord t = DWord::Multiply(A[0], B[0]);
00561 C[0] = t.GetLowHalf();
00562 C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00563 }
00564
00565 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00566 {
00567 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00568 unsigned int ai = A[1] < A[0];
00569 unsigned int bi = B[0] < B[1];
00570 unsigned int di = ai & bi;
00571 DWord d = DWord::Multiply(D[di], D[di+2]);
00572 D[1] = D[3] = 0;
00573 unsigned int si = ai + !bi;
00574 word s = D[si];
00575
00576 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00577 DWord t = A0B0 + C[0];
00578 C[0] = t.GetLowHalf();
00579
00580 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00581 t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00582 C[1] = t.GetLowHalf();
00583
00584 t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00585 C[2] = t.GetLowHalf();
00586
00587 t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00588 C[3] = t.GetLowHalf();
00589 return t.GetHighHalf();
00590 }
00591
00592 #define MulAcc(x, y) \
00593 p = DWord::MultiplyAndAdd(A[x], B[y], c); \
00594 c = p.GetLowHalf(); \
00595 p = (DWord) d + p.GetHighHalf(); \
00596 d = p.GetLowHalf(); \
00597 e += p.GetHighHalf();
00598
00599 #define SaveMulAcc(s, x, y) \
00600 R[s] = c; \
00601 p = DWord::MultiplyAndAdd(A[x], B[y], d); \
00602 c = p.GetLowHalf(); \
00603 p = (DWord) e + p.GetHighHalf(); \
00604 d = p.GetLowHalf(); \
00605 e = p.GetHighHalf();
00606
00607 #define SquAcc(x, y) \
00608 q = DWord::Multiply(A[x], A[y]); \
00609 p = q + c; \
00610 c = p.GetLowHalf(); \
00611 p = (DWord) d + p.GetHighHalf(); \
00612 d = p.GetLowHalf(); \
00613 e += p.GetHighHalf(); \
00614 p = q + c; \
00615 c = p.GetLowHalf(); \
00616 p = (DWord) d + p.GetHighHalf(); \
00617 d = p.GetLowHalf(); \
00618 e += p.GetHighHalf();
00619
00620 #define SaveSquAcc(s, x, y) \
00621 R[s] = c; \
00622 q = DWord::Multiply(A[x], A[y]); \
00623 p = q + d; \
00624 c = p.GetLowHalf(); \
00625 p = (DWord) e + p.GetHighHalf(); \
00626 d = p.GetLowHalf(); \
00627 e = p.GetHighHalf(); \
00628 p = q + c; \
00629 c = p.GetLowHalf(); \
00630 p = (DWord) d + p.GetHighHalf(); \
00631 d = p.GetLowHalf(); \
00632 e += p.GetHighHalf();
00633
00634 void Portable::Multiply4(word *R, const word *A, const word *B)
00635 {
00636 DWord p;
00637 word c, d, e;
00638
00639 p = DWord::Multiply(A[0], B[0]);
00640 R[0] = p.GetLowHalf();
00641 c = p.GetHighHalf();
00642 d = e = 0;
00643
00644 MulAcc(0, 1);
00645 MulAcc(1, 0);
00646
00647 SaveMulAcc(1, 2, 0);
00648 MulAcc(1, 1);
00649 MulAcc(0, 2);
00650
00651 SaveMulAcc(2, 0, 3);
00652 MulAcc(1, 2);
00653 MulAcc(2, 1);
00654 MulAcc(3, 0);
00655
00656 SaveMulAcc(3, 3, 1);
00657 MulAcc(2, 2);
00658 MulAcc(1, 3);
00659
00660 SaveMulAcc(4, 2, 3);
00661 MulAcc(3, 2);
00662
00663 R[5] = c;
00664 p = DWord::MultiplyAndAdd(A[3], B[3], d);
00665 R[6] = p.GetLowHalf();
00666 R[7] = e + p.GetHighHalf();
00667 }
00668
00669 void Portable::Square2(word *R, const word *A)
00670 {
00671 DWord p, q;
00672 word c, d, e;
00673
00674 p = DWord::Multiply(A[0], A[0]);
00675 R[0] = p.GetLowHalf();
00676 c = p.GetHighHalf();
00677 d = e = 0;
00678
00679 SquAcc(0, 1);
00680
00681 R[1] = c;
00682 p = DWord::MultiplyAndAdd(A[1], A[1], d);
00683 R[2] = p.GetLowHalf();
00684 R[3] = e + p.GetHighHalf();
00685 }
00686
00687 void Portable::Square4(word *R, const word *A)
00688 {
00689 #ifdef _MSC_VER
00690
00691
00692
00693
00694 Multiply4(R, A, A);
00695 #else
00696 const word *B = A;
00697 DWord p, q;
00698 word c, d, e;
00699
00700 p = DWord::Multiply(A[0], A[0]);
00701 R[0] = p.GetLowHalf();
00702 c = p.GetHighHalf();
00703 d = e = 0;
00704
00705 SquAcc(0, 1);
00706
00707 SaveSquAcc(1, 2, 0);
00708 MulAcc(1, 1);
00709
00710 SaveSquAcc(2, 0, 3);
00711 SquAcc(1, 2);
00712
00713 SaveSquAcc(3, 3, 1);
00714 MulAcc(2, 2);
00715
00716 SaveSquAcc(4, 2, 3);
00717
00718 R[5] = c;
00719 p = DWord::MultiplyAndAdd(A[3], A[3], d);
00720 R[6] = p.GetLowHalf();
00721 R[7] = e + p.GetHighHalf();
00722 #endif
00723 }
00724
00725 void Portable::Multiply8(word *R, const word *A, const word *B)
00726 {
00727 DWord p;
00728 word c, d, e;
00729
00730 p = DWord::Multiply(A[0], B[0]);
00731 R[0] = p.GetLowHalf();
00732 c = p.GetHighHalf();
00733 d = e = 0;
00734
00735 MulAcc(0, 1);
00736 MulAcc(1, 0);
00737
00738 SaveMulAcc(1, 2, 0);
00739 MulAcc(1, 1);
00740 MulAcc(0, 2);
00741
00742 SaveMulAcc(2, 0, 3);
00743 MulAcc(1, 2);
00744 MulAcc(2, 1);
00745 MulAcc(3, 0);
00746
00747 SaveMulAcc(3, 0, 4);
00748 MulAcc(1, 3);
00749 MulAcc(2, 2);
00750 MulAcc(3, 1);
00751 MulAcc(4, 0);
00752
00753 SaveMulAcc(4, 0, 5);
00754 MulAcc(1, 4);
00755 MulAcc(2, 3);
00756 MulAcc(3, 2);
00757 MulAcc(4, 1);
00758 MulAcc(5, 0);
00759
00760 SaveMulAcc(5, 0, 6);
00761 MulAcc(1, 5);
00762 MulAcc(2, 4);
00763 MulAcc(3, 3);
00764 MulAcc(4, 2);
00765 MulAcc(5, 1);
00766 MulAcc(6, 0);
00767
00768 SaveMulAcc(6, 0, 7);
00769 MulAcc(1, 6);
00770 MulAcc(2, 5);
00771 MulAcc(3, 4);
00772 MulAcc(4, 3);
00773 MulAcc(5, 2);
00774 MulAcc(6, 1);
00775 MulAcc(7, 0);
00776
00777 SaveMulAcc(7, 1, 7);
00778 MulAcc(2, 6);
00779 MulAcc(3, 5);
00780 MulAcc(4, 4);
00781 MulAcc(5, 3);
00782 MulAcc(6, 2);
00783 MulAcc(7, 1);
00784
00785 SaveMulAcc(8, 2, 7);
00786 MulAcc(3, 6);
00787 MulAcc(4, 5);
00788 MulAcc(5, 4);
00789 MulAcc(6, 3);
00790 MulAcc(7, 2);
00791
00792 SaveMulAcc(9, 3, 7);
00793 MulAcc(4, 6);
00794 MulAcc(5, 5);
00795 MulAcc(6, 4);
00796 MulAcc(7, 3);
00797
00798 SaveMulAcc(10, 4, 7);
00799 MulAcc(5, 6);
00800 MulAcc(6, 5);
00801 MulAcc(7, 4);
00802
00803 SaveMulAcc(11, 5, 7);
00804 MulAcc(6, 6);
00805 MulAcc(7, 5);
00806
00807 SaveMulAcc(12, 6, 7);
00808 MulAcc(7, 6);
00809
00810 R[13] = c;
00811 p = DWord::MultiplyAndAdd(A[7], B[7], d);
00812 R[14] = p.GetLowHalf();
00813 R[15] = e + p.GetHighHalf();
00814 }
00815
00816 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00817 {
00818 DWord p;
00819 word c, d, e;
00820
00821 p = DWord::Multiply(A[0], B[0]);
00822 R[0] = p.GetLowHalf();
00823 c = p.GetHighHalf();
00824 d = e = 0;
00825
00826 MulAcc(0, 1);
00827 MulAcc(1, 0);
00828
00829 SaveMulAcc(1, 2, 0);
00830 MulAcc(1, 1);
00831 MulAcc(0, 2);
00832
00833 R[2] = c;
00834 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00835 }
00836
00837 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00838 {
00839 DWord p;
00840 word c, d, e;
00841
00842 p = DWord::Multiply(A[0], B[0]);
00843 R[0] = p.GetLowHalf();
00844 c = p.GetHighHalf();
00845 d = e = 0;
00846
00847 MulAcc(0, 1);
00848 MulAcc(1, 0);
00849
00850 SaveMulAcc(1, 2, 0);
00851 MulAcc(1, 1);
00852 MulAcc(0, 2);
00853
00854 SaveMulAcc(2, 0, 3);
00855 MulAcc(1, 2);
00856 MulAcc(2, 1);
00857 MulAcc(3, 0);
00858
00859 SaveMulAcc(3, 0, 4);
00860 MulAcc(1, 3);
00861 MulAcc(2, 2);
00862 MulAcc(3, 1);
00863 MulAcc(4, 0);
00864
00865 SaveMulAcc(4, 0, 5);
00866 MulAcc(1, 4);
00867 MulAcc(2, 3);
00868 MulAcc(3, 2);
00869 MulAcc(4, 1);
00870 MulAcc(5, 0);
00871
00872 SaveMulAcc(5, 0, 6);
00873 MulAcc(1, 5);
00874 MulAcc(2, 4);
00875 MulAcc(3, 3);
00876 MulAcc(4, 2);
00877 MulAcc(5, 1);
00878 MulAcc(6, 0);
00879
00880 R[6] = c;
00881 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00882 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00883 }
00884
00885 #undef MulAcc
00886 #undef SaveMulAcc
00887 #undef SquAcc
00888 #undef SaveSquAcc
00889
00890 #ifdef CRYPTOPP_X86ASM_AVAILABLE
00891
00892
00893
00894 static bool s_sse2Enabled = true;
00895
00896 static void CpuId(word32 input, word32 *output)
00897 {
00898 #ifdef __GNUC__
00899 __asm__
00900 (
00901
00902 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00903 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
00904 : "a" (input)
00905 );
00906 #else
00907 __asm
00908 {
00909 mov eax, input
00910 cpuid
00911 mov edi, output
00912 mov [edi], eax
00913 mov [edi+4], ebx
00914 mov [edi+8], ecx
00915 mov [edi+12], edx
00916 }
00917 #endif
00918 }
00919
00920 #ifdef SSE2_INTRINSICS_AVAILABLE
00921 #ifndef _MSC_VER
00922 static jmp_buf s_env;
00923 static void SigIllHandler(int)
00924 {
00925 longjmp(s_env, 1);
00926 }
00927 #endif
00928
00929 static bool HasSSE2()
00930 {
00931 if (!s_sse2Enabled)
00932 return false;
00933
00934 word32 cpuid[4];
00935 CpuId(1, cpuid);
00936 if ((cpuid[3] & (1 << 26)) == 0)
00937 return false;
00938
00939 #ifdef _MSC_VER
00940 __try
00941 {
00942 __asm xorpd xmm0, xmm0
00943 }
00944 __except (1)
00945 {
00946 return false;
00947 }
00948 return true;
00949 #else
00950 typedef void (*SigHandler)(int);
00951
00952 SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00953 if (oldHandler == SIG_ERR)
00954 return false;
00955
00956 bool result = true;
00957 if (setjmp(s_env))
00958 result = false;
00959 else
00960 __asm __volatile ("xorps %xmm0, %xmm0");
00961
00962 signal(SIGILL, oldHandler);
00963 return result;
00964 #endif
00965 }
00966 #endif
00967
00968 static bool IsP4()
00969 {
00970 word32 cpuid[4];
00971
00972 CpuId(0, cpuid);
00973 std::swap(cpuid[2], cpuid[3]);
00974 if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
00975 return false;
00976
00977 CpuId(1, cpuid);
00978 return ((cpuid[0] >> 8) & 0xf) == 0xf;
00979 }
00980
00981
00982
00983 class PentiumOptimized : public Portable
00984 {
00985 public:
00986 static word Add(word *C, const word *A, const word *B, unsigned int N);
00987 static word Subtract(word *C, const word *A, const word *B, unsigned int N);
00988 static void Multiply4(word *C, const word *A, const word *B);
00989 static void Multiply8(word *C, const word *A, const word *B);
00990 static void Multiply8Bottom(word *C, const word *A, const word *B);
00991 };
00992
00993 class P4Optimized
00994 {
00995 public:
00996 static word Add(word *C, const word *A, const word *B, unsigned int N);
00997 static word Subtract(word *C, const word *A, const word *B, unsigned int N);
00998 #ifdef SSE2_INTRINSICS_AVAILABLE
00999 static void Multiply4(word *C, const word *A, const word *B);
01000 static void Multiply8(word *C, const word *A, const word *B);
01001 static void Multiply8Bottom(word *C, const word *A, const word *B);
01002 #endif
01003 };
01004
01005 typedef word (* PAddSub)(word *C, const word *A, const word *B, unsigned int N);
01006 typedef void (* PMul)(word *C, const word *A, const word *B);
01007
01008 static PAddSub s_pAdd, s_pSub;
01009 #ifdef SSE2_INTRINSICS_AVAILABLE
01010 static PMul s_pMul4, s_pMul8, s_pMul8B;
01011 #endif
01012
01013 static void SetPentiumFunctionPointers()
01014 {
01015 if (IsP4())
01016 {
01017 s_pAdd = &P4Optimized::Add;
01018 s_pSub = &P4Optimized::Subtract;
01019 }
01020 else
01021 {
01022 s_pAdd = &PentiumOptimized::Add;
01023 s_pSub = &PentiumOptimized::Subtract;
01024 }
01025
01026 #ifdef SSE2_INTRINSICS_AVAILABLE
01027 if (HasSSE2())
01028 {
01029 s_pMul4 = &P4Optimized::Multiply4;
01030 s_pMul8 = &P4Optimized::Multiply8;
01031 s_pMul8B = &P4Optimized::Multiply8Bottom;
01032 }
01033 else
01034 {
01035 s_pMul4 = &PentiumOptimized::Multiply4;
01036 s_pMul8 = &PentiumOptimized::Multiply8;
01037 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01038 }
01039 #endif
01040 }
01041
01042 static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);
01043
01044 void DisableSSE2()
01045 {
01046 s_sse2Enabled = false;
01047 SetPentiumFunctionPointers();
01048 }
01049
01050 class LowLevel : public PentiumOptimized
01051 {
01052 public:
01053 inline static word Add(word *C, const word *A, const word *B, unsigned int N)
01054 {return s_pAdd(C, A, B, N);}
01055 inline static word Subtract(word *C, const word *A, const word *B, unsigned int N)
01056 {return s_pSub(C, A, B, N);}
01057 inline static void Square4(word *R, const word *A)
01058 {Multiply4(R, A, A);}
01059 #ifdef SSE2_INTRINSICS_AVAILABLE
01060 inline static void Multiply4(word *C, const word *A, const word *B)
01061 {s_pMul4(C, A, B);}
01062 inline static void Multiply8(word *C, const word *A, const word *B)
01063 {s_pMul8(C, A, B);}
01064 inline static void Multiply8Bottom(word *C, const word *A, const word *B)
01065 {s_pMul8B(C, A, B);}
01066 #endif
01067 };
01068
01069
01070 #ifdef _MSC_VER
01071 #define CRYPTOPP_NAKED __declspec(naked)
01072 #define AS1(x) __asm x
01073 #define AS2(x, y) __asm x, y
01074 #define AddPrologue \
01075 __asm push ebp \
01076 __asm push ebx \
01077 __asm push esi \
01078 __asm push edi \
01079 __asm mov ecx, [esp+20] \
01080 __asm mov edx, [esp+24] \
01081 __asm mov ebx, [esp+28] \
01082 __asm mov esi, [esp+32]
01083 #define AddEpilogue \
01084 __asm pop edi \
01085 __asm pop esi \
01086 __asm pop ebx \
01087 __asm pop ebp \
01088 __asm ret
01089 #define MulPrologue \
01090 __asm push ebp \
01091 __asm push ebx \
01092 __asm push esi \
01093 __asm push edi \
01094 __asm mov ecx, [esp+28] \
01095 __asm mov esi, [esp+24] \
01096 __asm push [esp+20]
01097 #define MulEpilogue \
01098 __asm add esp, 4 \
01099 __asm pop edi \
01100 __asm pop esi \
01101 __asm pop ebx \
01102 __asm pop ebp \
01103 __asm ret
01104 #else
01105 #define CRYPTOPP_NAKED
01106 #define AS1(x) #x ";"
01107 #define AS2(x, y) #x ", " #y ";"
01108 #define AddPrologue \
01109 __asm__ __volatile__ \
01110 ( \
01111 "push %%ebx;" \
01112 "mov %2, %%ebx;" \
01113 ".intel_syntax noprefix;" \
01114 "push ebp;"
01115 #define AddEpilogue \
01116 "pop ebp;" \
01117 ".att_syntax prefix;" \
01118 "pop %%ebx;" \
01119 : \
01120 : "c" (C), "d" (A), "m" (B), "S" (N) \
01121 : "%edi", "memory", "cc" \
01122 );
01123 #define MulPrologue \
01124 __asm__ __volatile__ \
01125 ( \
01126 "push %%ebx;" \
01127 "push %%ebp;" \
01128 "push %0;" \
01129 ".intel_syntax noprefix;"
01130 #define MulEpilogue \
01131 "add esp, 4;" \
01132 "pop ebp;" \
01133 "pop ebx;" \
01134 ".att_syntax prefix;" \
01135 : \
01136 : "rm" (Z), "S" (X), "c" (Y) \
01137 : "%eax", "%edx", "%edi", "memory", "cc" \
01138 );
01139 #endif
01140
01141 CRYPTOPP_NAKED word PentiumOptimized::Add(word *C, const word *A, const word *B, unsigned int N)
01142 {
01143 AddPrologue
01144
01145
01146 AS2( sub ecx, edx)
01147 AS2( xor eax, eax)
01148
01149 AS2( sub eax, esi)
01150 AS2( lea ebx, [ebx+4*esi])
01151
01152 AS2( sar eax, 1)
01153 AS1( jz loopendAdd)
01154
01155 AS1(loopstartAdd:)
01156 AS2( mov esi,[edx])
01157 AS2( mov ebp,[edx+4])
01158
01159 AS2( mov edi,[ebx+8*eax])
01160 AS2( lea edx,[edx+8])
01161
01162 AS2( adc esi,edi)
01163 AS2( mov edi,[ebx+8*eax+4])
01164
01165 AS2( adc ebp,edi)
01166 AS1( inc eax)
01167
01168 AS2( mov [edx+ecx-8],esi)
01169 AS2( mov [edx+ecx-4],ebp)
01170
01171 AS1( jnz loopstartAdd)
01172
01173 AS1(loopendAdd:)
01174 AS2( adc eax, 0)
01175
01176 AddEpilogue
01177 }
01178
01179 CRYPTOPP_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01180 {
01181 AddPrologue
01182
01183
01184 AS2( sub ecx, edx)
01185 AS2( xor eax, eax)
01186
01187 AS2( sub eax, esi)
01188 AS2( lea ebx, [ebx+4*esi])
01189
01190 AS2( sar eax, 1)
01191 AS1( jz loopendSub)
01192
01193 AS1(loopstartSub:)
01194 AS2( mov esi,[edx])
01195 AS2( mov ebp,[edx+4])
01196
01197 AS2( mov edi,[ebx+8*eax])
01198 AS2( lea edx,[edx+8])
01199
01200 AS2( sbb esi,edi)
01201 AS2( mov edi,[ebx+8*eax+4])
01202
01203 AS2( sbb ebp,edi)
01204 AS1( inc eax)
01205
01206 AS2( mov [edx+ecx-8],esi)
01207 AS2( mov [edx+ecx-4],ebp)
01208
01209 AS1( jnz loopstartSub)
01210
01211 AS1(loopendSub:)
01212 AS2( adc eax, 0)
01213
01214 AddEpilogue
01215 }
01216
01217
01218
01219 CRYPTOPP_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, unsigned int N)
01220 {
01221 AddPrologue
01222
01223
01224 AS2( xor eax, eax)
01225 AS1( neg esi)
01226 AS1( jz loopendAddP4)
01227
01228 AS2( mov edi, [edx])
01229 AS2( mov ebp, [ebx])
01230 AS1( jmp carry1AddP4)
01231
01232 AS1(loopstartAddP4:)
01233 AS2( mov edi, [edx+8])
01234 AS2( add ecx, 8)
01235 AS2( add edx, 8)
01236 AS2( mov ebp, [ebx])
01237 AS2( add edi, eax)
01238 AS1( jc carry1AddP4)
01239 AS2( xor eax, eax)
01240
01241 AS1(carry1AddP4:)
01242 AS2( add edi, ebp)
01243 AS2( mov ebp, 1)
01244 AS2( mov [ecx], edi)
01245 AS2( mov edi, [edx+4])
01246 AS2( cmovc eax, ebp)
01247 AS2( mov ebp, [ebx+4])
01248 AS2( add ebx, 8)
01249 AS2( add edi, eax)
01250 AS1( jc carry2AddP4)
01251 AS2( xor eax, eax)
01252
01253 AS1(carry2AddP4:)
01254 AS2( add edi, ebp)
01255 AS2( mov ebp, 1)
01256 AS2( cmovc eax, ebp)
01257 AS2( mov [ecx+4], edi)
01258 AS2( add esi, 2)
01259 AS1( jnz loopstartAddP4)
01260
01261 AS1(loopendAddP4:)
01262
01263 AddEpilogue
01264 }
01265
01266 CRYPTOPP_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B, unsigned int N)
01267 {
01268 AddPrologue
01269
01270
01271 AS2( xor eax, eax)
01272 AS1( neg esi)
01273 AS1( jz loopendSubP4)
01274
01275 AS2( mov edi, [edx])
01276 AS2( mov ebp, [ebx])
01277 AS1( jmp carry1SubP4)
01278
01279 AS1(loopstartSubP4:)
01280 AS2( mov edi, [edx+8])
01281 AS2( add edx, 8)
01282 AS2( add ecx, 8)
01283 AS2( mov ebp, [ebx])
01284 AS2( sub edi, eax)
01285 AS1( jc carry1SubP4)
01286 AS2( xor eax, eax)
01287
01288 AS1(carry1SubP4:)
01289 AS2( sub edi, ebp)
01290 AS2( mov ebp, 1)
01291 AS2( mov [ecx], edi)
01292 AS2( mov edi, [edx+4])
01293 AS2( cmovc eax, ebp)
01294 AS2( mov ebp, [ebx+4])
01295 AS2( add ebx, 8)
01296 AS2( sub edi, eax)
01297 AS1( jc carry2SubP4)
01298 AS2( xor eax, eax)
01299
01300 AS1(carry2SubP4:)
01301 AS2( sub edi, ebp)
01302 AS2( mov ebp, 1)
01303 AS2( cmovc eax, ebp)
01304 AS2( mov [ecx+4], edi)
01305 AS2( add esi, 2)
01306 AS1( jnz loopstartSubP4)
01307
01308 AS1(loopendSubP4:)
01309
01310 AddEpilogue
01311 }
01312
01313
01314
01315 #define MulStartup \
01316 AS2(xor ebp, ebp) \
01317 AS2(xor edi, edi) \
01318 AS2(xor ebx, ebx)
01319
01320 #define MulShiftCarry \
01321 AS2(mov ebp, edx) \
01322 AS2(mov edi, ebx) \
01323 AS2(xor ebx, ebx)
01324
01325 #define MulAccumulateBottom(i,j) \
01326 AS2(mov eax, [ecx+4*j]) \
01327 AS2(imul eax, dword ptr [esi+4*i]) \
01328 AS2(add ebp, eax)
01329
01330 #define MulAccumulate(i,j) \
01331 AS2(mov eax, [ecx+4*j]) \
01332 AS1(mul dword ptr [esi+4*i]) \
01333 AS2(add ebp, eax) \
01334 AS2(adc edi, edx) \
01335 AS2(adc bl, bh)
01336
01337 #define MulStoreDigit(i) \
01338 AS2(mov edx, edi) \
01339 AS2(mov edi, [esp]) \
01340 AS2(mov [edi+4*i], ebp)
01341
01342 #define MulLastDiagonal(digits) \
01343 AS2(mov eax, [ecx+4*(digits-1)]) \
01344 AS1(mul dword ptr [esi+4*(digits-1)]) \
01345 AS2(add ebp, eax) \
01346 AS2(adc edx, edi) \
01347 AS2(mov edi, [esp]) \
01348 AS2(mov [edi+4*(2*digits-2)], ebp) \
01349 AS2(mov [edi+4*(2*digits-1)], edx)
01350
01351 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01352 {
01353 MulPrologue
01354
01355 MulStartup
01356 MulAccumulate(0,0)
01357 MulStoreDigit(0)
01358 MulShiftCarry
01359
01360 MulAccumulate(1,0)
01361 MulAccumulate(0,1)
01362 MulStoreDigit(1)
01363 MulShiftCarry
01364
01365 MulAccumulate(2,0)
01366 MulAccumulate(1,1)
01367 MulAccumulate(0,2)
01368 MulStoreDigit(2)
01369 MulShiftCarry
01370
01371 MulAccumulate(3,0)
01372 MulAccumulate(2,1)
01373 MulAccumulate(1,2)
01374 MulAccumulate(0,3)
01375 MulStoreDigit(3)
01376 MulShiftCarry
01377
01378 MulAccumulate(3,1)
01379 MulAccumulate(2,2)
01380 MulAccumulate(1,3)
01381 MulStoreDigit(4)
01382 MulShiftCarry
01383
01384 MulAccumulate(3,2)
01385 MulAccumulate(2,3)
01386 MulStoreDigit(5)
01387 MulShiftCarry
01388
01389 MulLastDiagonal(4)
01390 MulEpilogue
01391 }
01392
01393 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01394 {
01395 MulPrologue
01396
01397 MulStartup
01398 MulAccumulate(0,0)
01399 MulStoreDigit(0)
01400 MulShiftCarry
01401
01402 MulAccumulate(1,0)
01403 MulAccumulate(0,1)
01404 MulStoreDigit(1)
01405 MulShiftCarry
01406
01407 MulAccumulate(2,0)
01408 MulAccumulate(1,1)
01409 MulAccumulate(0,2)
01410 MulStoreDigit(2)
01411 MulShiftCarry
01412
01413 MulAccumulate(3,0)
01414 MulAccumulate(2,1)
01415 MulAccumulate(1,2)
01416 MulAccumulate(0,3)
01417 MulStoreDigit(3)
01418 MulShiftCarry
01419
01420 MulAccumulate(4,0)
01421 MulAccumulate(3,1)
01422 MulAccumulate(2,2)
01423 MulAccumulate(1,3)
01424 MulAccumulate(0,4)
01425 MulStoreDigit(4)
01426 MulShiftCarry
01427
01428 MulAccumulate(5,0)
01429 MulAccumulate(4,1)
01430 MulAccumulate(3,2)
01431 MulAccumulate(2,3)
01432 MulAccumulate(1,4)
01433 MulAccumulate(0,5)
01434 MulStoreDigit(5)
01435 MulShiftCarry
01436
01437 MulAccumulate(6,0)
01438 MulAccumulate(5,1)
01439 MulAccumulate(4,2)
01440 MulAccumulate(3,3)
01441 MulAccumulate(2,4)
01442 MulAccumulate(1,5)
01443 MulAccumulate(0,6)
01444 MulStoreDigit(6)
01445 MulShiftCarry
01446
01447 MulAccumulate(7,0)
01448 MulAccumulate(6,1)
01449 MulAccumulate(5,2)
01450 MulAccumulate(4,3)
01451 MulAccumulate(3,4)
01452 MulAccumulate(2,5)
01453 MulAccumulate(1,6)
01454 MulAccumulate(0,7)
01455 MulStoreDigit(7)
01456 MulShiftCarry
01457
01458 MulAccumulate(7,1)
01459 MulAccumulate(6,2)
01460 MulAccumulate(5,3)
01461 MulAccumulate(4,4)
01462 MulAccumulate(3,5)
01463 MulAccumulate(2,6)
01464 MulAccumulate(1,7)
01465 MulStoreDigit(8)
01466 MulShiftCarry
01467
01468 MulAccumulate(7,2)
01469 MulAccumulate(6,3)
01470 MulAccumulate(5,4)
01471 MulAccumulate(4,5)
01472 MulAccumulate(3,6)
01473 MulAccumulate(2,7)
01474 MulStoreDigit(9)
01475 MulShiftCarry
01476
01477 MulAccumulate(7,3)
01478 MulAccumulate(6,4)
01479 MulAccumulate(5,5)
01480 MulAccumulate(4,6)
01481 MulAccumulate(3,7)
01482 MulStoreDigit(10)
01483 MulShiftCarry
01484
01485 MulAccumulate(7,4)
01486 MulAccumulate(6,5)
01487 MulAccumulate(5,6)
01488 MulAccumulate(4,7)
01489 MulStoreDigit(11)
01490 MulShiftCarry
01491
01492 MulAccumulate(7,5)
01493 MulAccumulate(6,6)
01494 MulAccumulate(5,7)
01495 MulStoreDigit(12)
01496 MulShiftCarry
01497
01498 MulAccumulate(7,6)
01499 MulAccumulate(6,7)
01500 MulStoreDigit(13)
01501 MulShiftCarry
01502
01503 MulLastDiagonal(8)
01504 MulEpilogue
01505 }
01506
01507 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01508 {
01509 MulPrologue
01510
01511 MulStartup
01512 MulAccumulate(0,0)
01513 MulStoreDigit(0)
01514 MulShiftCarry
01515
01516 MulAccumulate(1,0)
01517 MulAccumulate(0,1)
01518 MulStoreDigit(1)
01519 MulShiftCarry
01520
01521 MulAccumulate(2,0)
01522 MulAccumulate(1,1)
01523 MulAccumulate(0,2)
01524 MulStoreDigit(2)
01525 MulShiftCarry
01526
01527 MulAccumulate(3,0)
01528 MulAccumulate(2,1)
01529 MulAccumulate(1,2)
01530 MulAccumulate(0,3)
01531 MulStoreDigit(3)
01532 MulShiftCarry
01533
01534 MulAccumulate(4,0)
01535 MulAccumulate(3,1)
01536 MulAccumulate(2,2)
01537 MulAccumulate(1,3)
01538 MulAccumulate(0,4)
01539 MulStoreDigit(4)
01540 MulShiftCarry
01541
01542 MulAccumulate(5,0)
01543 MulAccumulate(4,1)
01544 MulAccumulate(3,2)
01545 MulAccumulate(2,3)
01546 MulAccumulate(1,4)
01547 MulAccumulate(0,5)
01548 MulStoreDigit(5)
01549 MulShiftCarry
01550
01551 MulAccumulate(6,0)
01552 MulAccumulate(5,1)
01553 MulAccumulate(4,2)
01554 MulAccumulate(3,3)
01555 MulAccumulate(2,4)
01556 MulAccumulate(1,5)
01557 MulAccumulate(0,6)
01558 MulStoreDigit(6)
01559 MulShiftCarry
01560
01561 MulAccumulateBottom(7,0)
01562 MulAccumulateBottom(6,1)
01563 MulAccumulateBottom(5,2)
01564 MulAccumulateBottom(4,3)
01565 MulAccumulateBottom(3,4)
01566 MulAccumulateBottom(2,5)
01567 MulAccumulateBottom(1,6)
01568 MulAccumulateBottom(0,7)
01569 MulStoreDigit(7)
01570 MulEpilogue
01571 }
01572
01573 #undef AS1
01574 #undef AS2
01575
01576 #else
01577
01578 typedef Portable LowLevel;
01579
01580 #endif
01581
01582 #ifdef SSE2_INTRINSICS_AVAILABLE
01583
01584 #ifdef __GNUC__
01585 #define CRYPTOPP_FASTCALL
01586 #else
01587 #define CRYPTOPP_FASTCALL __fastcall
01588 #endif
01589
01590 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01591 {
01592 __m128i a3210 = _mm_load_si128(A);
01593 __m128i b3210 = _mm_load_si128(B);
01594
01595 __m128i sum;
01596
01597 __m128i z = _mm_setzero_si128();
01598 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01599 C[0] = a2b2_a0b0;
01600
01601 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01602 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01603 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01604 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01605 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01606 C[1] = _mm_add_epi64(a1b0, a0b1);
01607
01608 __m128i a31 = _mm_srli_epi64(a3210, 32);
01609 __m128i b31 = _mm_srli_epi64(b3210, 32);
01610 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01611 C[6] = a3b3_a1b1;
01612
01613 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01614 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01615 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01616 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01617 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01618 sum = _mm_add_epi64(a1b1, a0b2);
01619 C[2] = _mm_add_epi64(sum, a2b0);
01620
01621 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01622 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01623 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01624 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01625 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01626 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01627 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01628 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01629 __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01630 sum = _mm_add_epi64(a2b1, a0b3);
01631 C[3] = _mm_add_epi64(sum, sum1);
01632
01633 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01634 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01635 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01636 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01637 sum = _mm_add_epi64(a2b2, a3b1);
01638 C[4] = _mm_add_epi64(sum, a1b3);
01639
01640 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01641 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01642 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01643 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01644 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01645 C[5] = _mm_add_epi64(a3b2, a2b3);
01646 }
01647
01648 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
01649 {
01650 __m128i temp[7];
01651 const word *w = (word *)temp;
01652 const __m64 *mw = (__m64 *)w;
01653
01654 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01655
01656 C[0] = w[0];
01657
01658 __m64 s1, s2;
01659
01660 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01661 __m64 w4 = mw[2];
01662 __m64 w6 = mw[3];
01663 __m64 w8 = mw[4];
01664 __m64 w10 = mw[5];
01665 __m64 w12 = mw[6];
01666 __m64 w14 = mw[7];
01667 __m64 w16 = mw[8];
01668 __m64 w18 = mw[9];
01669 __m64 w20 = mw[10];
01670 __m64 w22 = mw[11];
01671 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01672
01673 s1 = _mm_add_si64(w1, w4);
01674 C[1] = _mm_cvtsi64_si32(s1);
01675 s1 = _mm_srli_si64(s1, 32);
01676
01677 s2 = _mm_add_si64(w6, w8);
01678 s1 = _mm_add_si64(s1, s2);
01679 C[2] = _mm_cvtsi64_si32(s1);
01680 s1 = _mm_srli_si64(s1, 32);
01681
01682 s2 = _mm_add_si64(w10, w12);
01683 s1 = _mm_add_si64(s1, s2);
01684 C[3] = _mm_cvtsi64_si32(s1);
01685 s1 = _mm_srli_si64(s1, 32);
01686
01687 s2 = _mm_add_si64(w14, w16);
01688 s1 = _mm_add_si64(s1, s2);
01689 C[4] = _mm_cvtsi64_si32(s1);
01690 s1 = _mm_srli_si64(s1, 32);
01691
01692 s2 = _mm_add_si64(w18, w20);
01693 s1 = _mm_add_si64(s1, s2);
01694 C[5] = _mm_cvtsi64_si32(s1);
01695 s1 = _mm_srli_si64(s1, 32);
01696
01697 s2 = _mm_add_si64(w22, w26);
01698 s1 = _mm_add_si64(s1, s2);
01699 C[6] = _mm_cvtsi64_si32(s1);
01700 s1 = _mm_srli_si64(s1, 32);
01701
01702 C[7] = _mm_cvtsi64_si32(s1) + w[27];
01703 _mm_empty();
01704 }
01705
01706 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
01707 {
01708 __m128i temp[28];
01709 const word *w = (word *)temp;
01710 const __m64 *mw = (__m64 *)w;
01711 const word *x = (word *)temp+7*4;
01712 const __m64 *mx = (__m64 *)x;
01713 const word *y = (word *)temp+7*4*2;
01714 const __m64 *my = (__m64 *)y;
01715 const word *z = (word *)temp+7*4*3;
01716 const __m64 *mz = (__m64 *)z;
01717
01718 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01719
01720 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01721
01722 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01723
01724 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01725
01726 C[0] = w[0];
01727
01728 __m64 s1, s2, s3, s4;
01729
01730 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01731 __m64 w4 = mw[2];
01732 __m64 w6 = mw[3];
01733 __m64 w8 = mw[4];
01734 __m64 w10 = mw[5];
01735 __m64 w12 = mw[6];
01736 __m64 w14 = mw[7];
01737 __m64 w16 = mw[8];
01738 __m64 w18 = mw[9];
01739 __m64 w20 = mw[10];
01740 __m64 w22 = mw[11];
01741 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01742 __m64 w27 = _mm_cvtsi32_si64(w[27]);
01743
01744 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01745 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01746 __m64 x4 = mx[2];
01747 __m64 x6 = mx[3];
01748 __m64 x8 = mx[4];
01749 __m64 x10 = mx[5];
01750 __m64 x12 = mx[6];
01751 __m64 x14 = mx[7];
01752 __m64 x16 = mx[8];
01753 __m64 x18 = mx[9];
01754 __m64 x20 = mx[10];
01755 __m64 x22 = mx[11];
01756 __m64 x26 = _mm_cvtsi32_si64(x[26]);
01757 __m64 x27 = _mm_cvtsi32_si64(x[27]);
01758
01759 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01760 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01761 __m64 y4 = my[2];
01762 __m64 y6 = my[3];
01763 __m64 y8 = my[4];
01764 __m64 y10 = my[5];
01765 __m64 y12 = my[6];
01766 __m64 y14 = my[7];
01767 __m64 y16 = my[8];
01768 __m64 y18 = my[9];
01769 __m64 y20 = my[10];
01770 __m64 y22 = my[11];
01771 __m64 y26 = _mm_cvtsi32_si64(y[26]);
01772 __m64 y27 = _mm_cvtsi32_si64(y[27]);
01773
01774 __m64 z0 = _mm_cvtsi32_si64(z[0]);
01775 __m64 z1 = _mm_cvtsi32_si64(z[1]);
01776 __m64 z4 = mz[2];
01777 __m64 z6 = mz[3];
01778 __m64 z8 = mz[4];
01779 __m64 z10 = mz[5];
01780 __m64 z12 = mz[6];
01781 __m64 z14 = mz[7];
01782 __m64 z16 = mz[8];
01783 __m64 z18 = mz[9];
01784 __m64 z20 = mz[10];
01785 __m64 z22 = mz[11];
01786 __m64 z26 = _mm_cvtsi32_si64(z[26]);
01787
01788 s1 = _mm_add_si64(w1, w4);
01789 C[1] = _mm_cvtsi64_si32(s1);
01790 s1 = _mm_srli_si64(s1, 32);
01791
01792 s2 = _mm_add_si64(w6, w8);
01793 s1 = _mm_add_si64(s1, s2);
01794 C[2] = _mm_cvtsi64_si32(s1);
01795 s1 = _mm_srli_si64(s1, 32);
01796
01797 s2 = _mm_add_si64(w10, w12);
01798 s1 = _mm_add_si64(s1, s2);
01799 C[3] = _mm_cvtsi64_si32(s1);
01800 s1 = _mm_srli_si64(s1, 32);
01801
01802 s3 = _mm_add_si64(x0, y0);
01803 s2 = _mm_add_si64(w14, w16);
01804 s1 = _mm_add_si64(s1, s3);
01805 s1 = _mm_add_si64(s1, s2);
01806 C[4] = _mm_cvtsi64_si32(s1);
01807 s1 = _mm_srli_si64(s1, 32);
01808
01809 s3 = _mm_add_si64(x1, y1);
01810 s4 = _mm_add_si64(x4, y4);
01811 s1 = _mm_add_si64(s1, w18);
01812 s3 = _mm_add_si64(s3, s4);
01813 s1 = _mm_add_si64(s1, w20);
01814 s1 = _mm_add_si64(s1, s3);
01815 C[5] = _mm_cvtsi64_si32(s1);
01816 s1 = _mm_srli_si64(s1, 32);
01817
01818 s3 = _mm_add_si64(x6, y6);
01819 s4 = _mm_add_si64(x8, y8);
01820 s1 = _mm_add_si64(s1, w22);
01821 s3 = _mm_add_si64(s3, s4);
01822 s1 = _mm_add_si64(s1, w26);
01823 s1 = _mm_add_si64(s1, s3);
01824 C[6] = _mm_cvtsi64_si32(s1);
01825 s1 = _mm_srli_si64(s1, 32);
01826
01827 s3 = _mm_add_si64(x10, y10);
01828 s4 = _mm_add_si64(x12, y12);
01829 s1 = _mm_add_si64(s1, w27);
01830 s3 = _mm_add_si64(s3, s4);
01831 s1 = _mm_add_si64(s1, s3);
01832 C[7] = _mm_cvtsi64_si32(s1);
01833 s1 = _mm_srli_si64(s1, 32);
01834
01835 s3 = _mm_add_si64(x14, y14);
01836 s4 = _mm_add_si64(x16, y16);
01837 s1 = _mm_add_si64(s1, z0);
01838 s3 = _mm_add_si64(s3, s4);
01839 s1 = _mm_add_si64(s1, s3);
01840 C[8] = _mm_cvtsi64_si32(s1);
01841 s1 = _mm_srli_si64(s1, 32);
01842
01843 s3 = _mm_add_si64(x18, y18);
01844 s4 = _mm_add_si64(x20, y20);
01845 s1 = _mm_add_si64(s1, z1);
01846 s3 = _mm_add_si64(s3, s4);
01847 s1 = _mm_add_si64(s1, z4);
01848 s1 = _mm_add_si64(s1, s3);
01849 C[9] = _mm_cvtsi64_si32(s1);
01850 s1 = _mm_srli_si64(s1, 32);
01851
01852 s3 = _mm_add_si64(x22, y22);
01853 s4 = _mm_add_si64(x26, y26);
01854 s1 = _mm_add_si64(s1, z6);
01855 s3 = _mm_add_si64(s3, s4);
01856 s1 = _mm_add_si64(s1, z8);
01857 s1 = _mm_add_si64(s1, s3);
01858 C[10] = _mm_cvtsi64_si32(s1);
01859 s1 = _mm_srli_si64(s1, 32);
01860
01861 s3 = _mm_add_si64(x27, y27);
01862 s1 = _mm_add_si64(s1, z10);
01863 s1 = _mm_add_si64(s1, z12);
01864 s1 = _mm_add_si64(s1, s3);
01865 C[11] = _mm_cvtsi64_si32(s1);
01866 s1 = _mm_srli_si64(s1, 32);
01867
01868 s3 = _mm_add_si64(z14, z16);
01869 s1 = _mm_add_si64(s1, s3);
01870 C[12] = _mm_cvtsi64_si32(s1);
01871 s1 = _mm_srli_si64(s1, 32);
01872
01873 s3 = _mm_add_si64(z18, z20);
01874 s1 = _mm_add_si64(s1, s3);
01875 C[13] = _mm_cvtsi64_si32(s1);
01876 s1 = _mm_srli_si64(s1, 32);
01877
01878 s3 = _mm_add_si64(z22, z26);
01879 s1 = _mm_add_si64(s1, s3);
01880 C[14] = _mm_cvtsi64_si32(s1);
01881 s1 = _mm_srli_si64(s1, 32);
01882
01883 C[15] = z[27] + _mm_cvtsi64_si32(s1);
01884 _mm_empty();
01885 }
01886
01887 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01888 {
01889 __m128i temp[21];
01890 const word *w = (word *)temp;
01891 const __m64 *mw = (__m64 *)w;
01892 const word *x = (word *)temp+7*4;
01893 const __m64 *mx = (__m64 *)x;
01894 const word *y = (word *)temp+7*4*2;
01895 const __m64 *my = (__m64 *)y;
01896
01897 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01898
01899 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01900
01901 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01902
01903 C[0] = w[0];
01904
01905 __m64 s1, s2, s3, s4;
01906
01907 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01908 __m64 w4 = mw[2];
01909 __m64 w6 = mw[3];
01910 __m64 w8 = mw[4];
01911 __m64 w10 = mw[5];
01912 __m64 w12 = mw[6];
01913 __m64 w14 = mw[7];
01914 __m64 w16 = mw[8];
01915 __m64 w18 = mw[9];
01916 __m64 w20 = mw[10];
01917 __m64 w22 = mw[11];
01918 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01919
01920 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01921 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01922 __m64 x4 = mx[2];
01923 __m64 x6 = mx[3];
01924 __m64 x8 = mx[4];
01925
01926 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01927 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01928 __m64 y4 = my[2];
01929 __m64 y6 = my[3];
01930 __m64 y8 = my[4];
01931
01932 s1 = _mm_add_si64(w1, w4);
01933 C[1] = _mm_cvtsi64_si32(s1);
01934 s1 = _mm_srli_si64(s1, 32);
01935
01936 s2 = _mm_add_si64(w6, w8);
01937 s1 = _mm_add_si64(s1, s2);
01938 C[2] = _mm_cvtsi64_si32(s1);
01939 s1 = _mm_srli_si64(s1, 32);
01940
01941 s2 = _mm_add_si64(w10, w12);
01942 s1 = _mm_add_si64(s1, s2);
01943 C[3] = _mm_cvtsi64_si32(s1);
01944 s1 = _mm_srli_si64(s1, 32);
01945
01946 s3 = _mm_add_si64(x0, y0);
01947 s2 = _mm_add_si64(w14, w16);
01948 s1 = _mm_add_si64(s1, s3);
01949 s1 = _mm_add_si64(s1, s2);
01950 C[4] = _mm_cvtsi64_si32(s1);
01951 s1 = _mm_srli_si64(s1, 32);
01952
01953 s3 = _mm_add_si64(x1, y1);
01954 s4 = _mm_add_si64(x4, y4);
01955 s1 = _mm_add_si64(s1, w18);
01956 s3 = _mm_add_si64(s3, s4);
01957 s1 = _mm_add_si64(s1, w20);
01958 s1 = _mm_add_si64(s1, s3);
01959 C[5] = _mm_cvtsi64_si32(s1);
01960 s1 = _mm_srli_si64(s1, 32);
01961
01962 s3 = _mm_add_si64(x6, y6);
01963 s4 = _mm_add_si64(x8, y8);
01964 s1 = _mm_add_si64(s1, w22);
01965 s3 = _mm_add_si64(s3, s4);
01966 s1 = _mm_add_si64(s1, w26);
01967 s1 = _mm_add_si64(s1, s3);
01968 C[6] = _mm_cvtsi64_si32(s1);
01969 s1 = _mm_srli_si64(s1, 32);
01970
01971 C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01972 _mm_empty();
01973 }
01974
01975 #endif // #ifdef SSE2_INTRINSICS_AVAILABLE
01976
01977
01978
01979 #define A0 A
01980 #define A1 (A+N2)
01981 #define B0 B
01982 #define B1 (B+N2)
01983
01984 #define T0 T
01985 #define T1 (T+N2)
01986 #define T2 (T+N)
01987 #define T3 (T+N+N2)
01988
01989 #define R0 R
01990 #define R1 (R+N2)
01991 #define R2 (R+N)
01992 #define R3 (R+N+N2)
01993
01994
01995
01996
01997
01998
01999 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N)
02000 {
02001 assert(N>=2 && N%2==0);
02002
02003 if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02004 LowLevel::Multiply8(R, A, B);
02005 else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02006 LowLevel::Multiply4(R, A, B);
02007 else if (N==2)
02008 LowLevel::Multiply2(R, A, B);
02009 else
02010 {
02011 const unsigned int N2 = N/2;
02012 int carry;
02013
02014 int aComp = Compare(A0, A1, N2);
02015 int bComp = Compare(B0, B1, N2);
02016
02017 switch (2*aComp + aComp + bComp)
02018 {
02019 case -4:
02020 LowLevel::Subtract(R0, A1, A0, N2);
02021 LowLevel::Subtract(R1, B0, B1, N2);
02022 RecursiveMultiply(T0, T2, R0, R1, N2);
02023 LowLevel::Subtract(T1, T1, R0, N2);
02024 carry = -1;
02025 break;
02026 case -2:
02027 LowLevel::Subtract(R0, A1, A0, N2);
02028 LowLevel::Subtract(R1, B0, B1, N2);
02029 RecursiveMultiply(T0, T2, R0, R1, N2);
02030 carry = 0;
02031 break;
02032 case 2:
02033 LowLevel::Subtract(R0, A0, A1, N2);
02034 LowLevel::Subtract(R1, B1, B0, N2);
02035 RecursiveMultiply(T0, T2, R0, R1, N2);
02036 carry = 0;
02037 break;
02038 case 4:
02039 LowLevel::Subtract(R0, A1, A0, N2);
02040 LowLevel::Subtract(R1, B0, B1, N2);
02041 RecursiveMultiply(T0, T2, R0, R1, N2);
02042 LowLevel::Subtract(T1, T1, R1, N2);
02043 carry = -1;
02044 break;
02045 default:
02046 SetWords(T0, 0, N);
02047 carry = 0;
02048 }
02049
02050 RecursiveMultiply(R0, T2, A0, B0, N2);
02051 RecursiveMultiply(R2, T2, A1, B1, N2);
02052
02053
02054
02055 carry += LowLevel::Add(T0, T0, R0, N);
02056 carry += LowLevel::Add(T0, T0, R2, N);
02057 carry += LowLevel::Add(R1, R1, T0, N);
02058
02059 assert (carry >= 0 && carry <= 2);
02060 Increment(R3, N2, carry);
02061 }
02062 }
02063
02064
02065
02066
02067
02068 void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)
02069 {
02070 assert(N && N%2==0);
02071 if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02072 LowLevel::Square8(R, A);
02073 if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02074 LowLevel::Square4(R, A);
02075 else if (N==2)
02076 LowLevel::Square2(R, A);
02077 else
02078 {
02079 const unsigned int N2 = N/2;
02080
02081 RecursiveSquare(R0, T2, A0, N2);
02082 RecursiveSquare(R2, T2, A1, N2);
02083 RecursiveMultiply(T0, T2, A0, A1, N2);
02084
02085 word carry = LowLevel::Add(R1, R1, T0, N);
02086 carry += LowLevel::Add(R1, R1, T0, N);
02087 Increment(R3, N2, carry);
02088 }
02089 }
02090
02091
02092
02093
02094
02095
02096 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
02097 {
02098 assert(N>=2 && N%2==0);
02099 if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02100 LowLevel::Multiply8Bottom(R, A, B);
02101 else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02102 LowLevel::Multiply4Bottom(R, A, B);
02103 else if (N==2)
02104 LowLevel::Multiply2Bottom(R, A, B);
02105 else
02106 {
02107 const unsigned int N2 = N/2;
02108
02109 RecursiveMultiply(R, T, A0, B0, N2);
02110 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02111 LowLevel::Add(R1, R1, T0, N2);
02112 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02113 LowLevel::Add(R1, R1, T0, N2);
02114 }
02115 }
02116
02117
02118
02119
02120
02121
02122
02123 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
02124 {
02125 assert(N>=2 && N%2==0);
02126
02127 if (N==4)
02128 {
02129 LowLevel::Multiply4(T, A, B);
02130 memcpy(R, T+4, 4*WORD_SIZE);
02131 }
02132 else if (N==2)
02133 {
02134 LowLevel::Multiply2(T, A, B);
02135 memcpy(R, T+2, 2*WORD_SIZE);
02136 }
02137 else
02138 {
02139 const unsigned int N2 = N/2;
02140 int carry;
02141
02142 int aComp = Compare(A0, A1, N2);
02143 int bComp = Compare(B0, B1, N2);
02144
02145 switch (2*aComp + aComp + bComp)
02146 {
02147 case -4:
02148 LowLevel::Subtract(R0, A1, A0, N2);
02149 LowLevel::Subtract(R1, B0, B1, N2);
02150 RecursiveMultiply(T0, T2, R0, R1, N2);
02151 LowLevel::Subtract(T1, T1, R0, N2);
02152 carry = -1;
02153 break;
02154 case -2:
02155 LowLevel::Subtract(R0, A1, A0, N2);
02156 LowLevel::Subtract(R1, B0, B1, N2);
02157 RecursiveMultiply(T0, T2, R0, R1, N2);
02158 carry = 0;
02159 break;
02160 case 2:
02161 LowLevel::Subtract(R0, A0, A1, N2);
02162 LowLevel::Subtract(R1, B1, B0, N2);
02163 RecursiveMultiply(T0, T2, R0, R1, N2);
02164 carry = 0;
02165 break;
02166 case 4:
02167 LowLevel::Subtract(R0, A1, A0, N2);
02168 LowLevel::Subtract(R1, B0, B1, N2);
02169 RecursiveMultiply(T0, T2, R0, R1, N2);
02170 LowLevel::Subtract(T1, T1, R1, N2);
02171 carry = -1;
02172 break;
02173 default:
02174 SetWords(T0, 0, N);
02175 carry = 0;
02176 }
02177
02178 RecursiveMultiply(T2, R0, A1, B1, N2);
02179
02180
02181
02182 word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02183 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02184 word t = (Compare(R0, T2, N2) == -1);
02185
02186 carry += t;
02187 carry += Increment(R0, N2, c2+t);
02188 carry += LowLevel::Add(R0, R0, T1, N2);
02189 carry += LowLevel::Add(R0, R0, T3, N2);
02190 assert (carry >= 0 && carry <= 2);
02191
02192 CopyWords(R1, T3, N2);
02193 Increment(R1, N2, carry);
02194 }
02195 }
02196
02197 inline word Add(word *C, const word *A, const word *B, unsigned int N)
02198 {
02199 return LowLevel::Add(C, A, B, N);
02200 }
02201
02202 inline word Subtract(word *C, const word *A, const word *B, unsigned int N)
02203 {
02204 return LowLevel::Subtract(C, A, B, N);
02205 }
02206
02207 inline void Multiply(word *R, word *T, const word *A, const word *B, unsigned int N)
02208 {
02209 RecursiveMultiply(R, T, A, B, N);
02210 }
02211
02212 inline void Square(word *R, word *T, const word *A, unsigned int N)
02213 {
02214 RecursiveSquare(R, T, A, N);
02215 }
02216
02217 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
02218 {
02219 RecursiveMultiplyBottom(R, T, A, B, N);
02220 }
02221
02222 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
02223 {
02224 RecursiveMultiplyTop(R, T, L, A, B, N);
02225 }
02226
02227 static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
02228 {
02229 word carry=0;
02230 for(unsigned i=0; i<N; i++)
02231 {
02232 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02233 C[i] = p.GetLowHalf();
02234 carry = p.GetHighHalf();
02235 }
02236 return carry;
02237 }
02238
02239
02240
02241
02242
02243
02244 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02245 {
02246 if (NA == NB)
02247 {
02248 if (A == B)
02249 Square(R, T, A, NA);
02250 else
02251 Multiply(R, T, A, B, NA);
02252
02253 return;
02254 }
02255
02256 if (NA > NB)
02257 {
02258 std::swap(A, B);
02259 std::swap(NA, NB);
02260 }
02261
02262 assert(NB % NA == 0);
02263 assert((NB/NA)%2 == 0);
02264
02265 if (NA==2 && !A[1])
02266 {
02267 switch (A[0])
02268 {
02269 case 0:
02270 SetWords(R, 0, NB+2);
02271 return;
02272 case 1:
02273 CopyWords(R, B, NB);
02274 R[NB] = R[NB+1] = 0;
02275 return;
02276 default:
02277 R[NB] = LinearMultiply(R, B, A[0], NB);
02278 R[NB+1] = 0;
02279 return;
02280 }
02281 }
02282
02283 Multiply(R, T, A, B, NA);
02284 CopyWords(T+2*NA, R+NA, NA);
02285
02286 unsigned i;
02287
02288 for (i=2*NA; i<NB; i+=2*NA)
02289 Multiply(T+NA+i, T, A, B+i, NA);
02290 for (i=NA; i<NB; i+=2*NA)
02291 Multiply(R+i, T, A, B+i, NA);
02292
02293 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02294 Increment(R+NB, NA);
02295 }
02296
02297
02298
02299
02300
02301 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)
02302 {
02303 if (N==2)
02304 {
02305 T[0] = AtomicInverseModPower2(A[0]);
02306 T[1] = 0;
02307 LowLevel::Multiply2Bottom(T+2, T, A);
02308 TwosComplement(T+2, 2);
02309 Increment(T+2, 2, 2);
02310 LowLevel::Multiply2Bottom(R, T, T+2);
02311 }
02312 else
02313 {
02314 const unsigned int N2 = N/2;
02315 RecursiveInverseModPower2(R0, T0, A0, N2);
02316 T0[0] = 1;
02317 SetWords(T0+1, 0, N2-1);
02318 MultiplyTop(R1, T1, T0, R0, A0, N2);
02319 MultiplyBottom(T0, T1, R0, A1, N2);
02320 Add(T0, R1, T0, N2);
02321 TwosComplement(T0, N2);
02322 MultiplyBottom(R1, T1, R0, T0, N2);
02323 }
02324 }
02325
02326
02327
02328
02329
02330
02331
02332 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N)
02333 {
02334 MultiplyBottom(R, T, X, U, N);
02335 MultiplyTop(T, T+N, X, R, M, N);
02336 word borrow = Subtract(T, X+N, T, N);
02337
02338 word carry = Add(T+N, T, M, N);
02339 assert(carry || !borrow);
02340 CopyWords(R, T + (borrow ? N : 0), N);
02341 }
02342
02343
02344
02345
02346
02347
02348
02349
02350 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, unsigned int N)
02351 {
02352 assert(N%2==0 && N>=4);
02353
02354 #define M0 M
02355 #define M1 (M+N2)
02356 #define V0 V
02357 #define V1 (V+N2)
02358
02359 #define X0 X
02360 #define X1 (X+N2)
02361 #define X2 (X+N)
02362 #define X3 (X+N+N2)
02363
02364 const unsigned int N2 = N/2;
02365 Multiply(T0, T2, V0, X3, N2);
02366 int c2 = Add(T0, T0, X0, N);
02367 MultiplyBottom(T3, T2, T0, U, N2);
02368 MultiplyTop(T2, R, T0, T3, M0, N2);
02369 c2 -= Subtract(T2, T1, T2, N2);
02370 Multiply(T0, R, T3, M1, N2);
02371 c2 -= Subtract(T0, T2, T0, N2);
02372 int c3 = -(int)Subtract(T1, X2, T1, N2);
02373 Multiply(R0, T2, V1, X3, N2);
02374 c3 += Add(R, R, T, N);
02375
02376 if (c2>0)
02377 c3 += Increment(R1, N2);
02378 else if (c2<0)
02379 c3 -= Decrement(R1, N2, -c2);
02380
02381 assert(c3>=-1 && c3<=1);
02382 if (c3>0)
02383 Subtract(R, R, M, N);
02384 else if (c3<0)
02385 Add(R, R, M, N);
02386
02387 #undef M0
02388 #undef M1
02389 #undef V0
02390 #undef V1
02391
02392 #undef X0
02393 #undef X1
02394 #undef X2
02395 #undef X3
02396 }
02397
02398 #undef A0
02399 #undef A1
02400 #undef B0
02401 #undef B1
02402
02403 #undef T0
02404 #undef T1
02405 #undef T2
02406 #undef T3
02407
02408 #undef R0
02409 #undef R1
02410 #undef R2
02411 #undef R3
02412
02413
02414
02415
02416
02417
02418
02419
02420
02421
02422
02423
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02478 {
02479 word T[4];
02480 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02481 Q[0] = q.GetLowHalf();
02482 Q[1] = q.GetHighHalf();
02483
02484 #ifndef NDEBUG
02485 if (B[0] || B[1])
02486 {
02487
02488 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02489 word P[4];
02490 Portable::Multiply2(P, Q, B);
02491 Add(P, P, T, 4);
02492 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02493 }
02494 #endif
02495 }
02496
02497
02498 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N)
02499 {
02500 assert(N && N%2==0);
02501
02502 if (Q[1])
02503 {
02504 T[N] = T[N+1] = 0;
02505 unsigned i;
02506 for (i=0; i<N; i+=4)
02507 LowLevel::Multiply2(T+i, Q, B+i);
02508 for (i=2; i<N; i+=4)
02509 if (LowLevel::Multiply2Add(T+i, Q, B+i))
02510 T[i+5] += (++T[i+4]==0);
02511 }
02512 else
02513 {
02514 T[N] = LinearMultiply(T, B, Q[0], N);
02515 T[N+1] = 0;
02516 }
02517
02518 word borrow = Subtract(R, R, T, N+2);
02519 assert(!borrow && !R[N+1]);
02520
02521 while (R[N] || Compare(R, B, N) >= 0)
02522 {
02523 R[N] -= Subtract(R, R, B, N);
02524 Q[1] += (++Q[0]==0);
02525 assert(Q[0] || Q[1]);
02526 }
02527 }
02528
02529
02530
02531
02532
02533
02534
02535 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
02536 {
02537 assert(NA && NB && NA%2==0 && NB%2==0);
02538 assert(B[NB-1] || B[NB-2]);
02539 assert(NB <= NA);
02540
02541
02542 word *const TA=T;
02543 word *const TB=T+NA+2;
02544 word *const TP=T+NA+2+NB;
02545
02546
02547 unsigned shiftWords = (B[NB-1]==0);
02548 TB[0] = TB[NB-1] = 0;
02549 CopyWords(TB+shiftWords, B, NB-shiftWords);
02550 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02551 assert(shiftBits < WORD_BITS);
02552 ShiftWordsLeftByBits(TB, NB, shiftBits);
02553
02554
02555 TA[0] = TA[NA] = TA[NA+1] = 0;
02556 CopyWords(TA+shiftWords, A, NA);
02557 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02558
02559 if (TA[NA+1]==0 && TA[NA] <= 1)
02560 {
02561 Q[NA-NB+1] = Q[NA-NB] = 0;
02562 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02563 {
02564 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02565 ++Q[NA-NB];
02566 }
02567 }
02568 else
02569 {
02570 NA+=2;
02571 assert(Compare(TA+NA-NB, TB, NB) < 0);
02572 }
02573
02574 word BT[2];
02575 BT[0] = TB[NB-2] + 1;
02576 BT[1] = TB[NB-1] + (BT[0]==0);
02577
02578
02579 for (unsigned i=NA-2; i>=NB; i-=2)
02580 {
02581 AtomicDivide(Q+i-NB, TA+i-2, BT);
02582 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02583 }
02584
02585
02586 CopyWords(R, TA+shiftWords, NB);
02587 ShiftWordsRightByBits(R, NB, shiftBits);
02588 }
02589
02590 static inline unsigned int EvenWordCount(const word *X, unsigned int N)
02591 {
02592 while (N && X[N-2]==0 && X[N-1]==0)
02593 N-=2;
02594 return N;
02595 }
02596
02597
02598
02599
02600
02601
02602
02603 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N)
02604 {
02605 assert(NA<=N && N && N%2==0);
02606
02607 word *b = T;
02608 word *c = T+N;
02609 word *f = T+2*N;
02610 word *g = T+3*N;
02611 unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
02612 unsigned int k=0, s=0;
02613
02614 SetWords(T, 0, 3*N);
02615 b[0]=1;
02616 CopyWords(f, A, NA);
02617 CopyWords(g, M, N);
02618
02619 while (1)
02620 {
02621 word t=f[0];
02622 while (!t)
02623 {
02624 if (EvenWordCount(f, fgLen)==0)
02625 {
02626 SetWords(R, 0, N);
02627 return 0;
02628 }
02629
02630 ShiftWordsRightByWords(f, fgLen, 1);
02631 if (c[bcLen-1]) bcLen+=2;
02632 assert(bcLen <= N);
02633 ShiftWordsLeftByWords(c, bcLen, 1);
02634 k+=WORD_BITS;
02635 t=f[0];
02636 }
02637
02638 unsigned int i=0;
02639 while (t%2 == 0)
02640 {
02641 t>>=1;
02642 i++;
02643 }
02644 k+=i;
02645
02646 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02647 {
02648 if (s%2==0)
02649 CopyWords(R, b, N);
02650 else
02651 Subtract(R, M, b, N);
02652 return k;
02653 }
02654
02655 ShiftWordsRightByBits(f, fgLen, i);
02656 t=ShiftWordsLeftByBits(c, bcLen, i);
02657 if (t)
02658 {
02659 c[bcLen] = t;
02660 bcLen+=2;
02661 assert(bcLen <= N);
02662 }
02663
02664 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02665 fgLen-=2;
02666
02667 if (Compare(f, g, fgLen)==-1)
02668 {
02669 std::swap(f, g);
02670 std::swap(b, c);
02671 s++;
02672 }
02673
02674 Subtract(f, f, g, fgLen);
02675
02676 if (Add(b, b, c, bcLen))
02677 {
02678 b[bcLen] = 1;
02679 bcLen+=2;
02680 assert(bcLen <= N);
02681 }
02682 }
02683 }
02684
02685
02686
02687
02688
02689 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02690 {
02691 CopyWords(R, A, N);
02692
02693 while (k--)
02694 {
02695 if (R[0]%2==0)
02696 ShiftWordsRightByBits(R, N, 1);
02697 else
02698 {
02699 word carry = Add(R, R, M, N);
02700 ShiftWordsRightByBits(R, N, 1);
02701 R[N-1] += carry<<(WORD_BITS-1);
02702 }
02703 }
02704 }
02705
02706
02707
02708
02709
02710 void MultiplyByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
02711 {
02712 CopyWords(R, A, N);
02713
02714 while (k--)
02715 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02716 Subtract(R, R, M, N);
02717 }
02718
02719
02720
02721 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02722
02723 static inline unsigned int RoundupSize(unsigned int n)
02724 {
02725 if (n<=8)
02726 return RoundupSizeTable[n];
02727 else if (n<=16)
02728 return 16;
02729 else if (n<=32)
02730 return 32;
02731 else if (n<=64)
02732 return 64;
02733 else return 1U << BitPrecision(n-1);
02734 }
02735
02736 Integer::Integer()
02737 : reg(2), sign(POSITIVE)
02738 {
02739 reg[0] = reg[1] = 0;
02740 }
02741
02742 Integer::Integer(const Integer& t)
02743 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02744 {
02745 CopyWords(reg, t.reg, reg.size());
02746 }
02747
02748 Integer::Integer(Sign s, lword value)
02749 : reg(2), sign(s)
02750 {
02751 reg[0] = word(value);
02752 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02753 }
02754
02755 Integer::Integer(signed long value)
02756 : reg(2)
02757 {
02758 if (value >= 0)
02759 sign = POSITIVE;
02760 else
02761 {
02762 sign = NEGATIVE;
02763 value = -value;
02764 }
02765 reg[0] = word(value);
02766 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02767 }
02768
02769 Integer::Integer(Sign s, word high, word low)
02770 : reg(2), sign(s)
02771 {
02772 reg[0] = low;
02773 reg[1] = high;
02774 }
02775
02776 bool Integer::IsConvertableToLong() const
02777 {
02778 if (ByteCount() > sizeof(long))
02779 return false;
02780
02781 unsigned long value = reg[0];
02782 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02783
02784 if (sign==POSITIVE)
02785 return (signed long)value >= 0;
02786 else
02787 return -(signed long)value < 0;
02788 }
02789
02790 signed long Integer::ConvertToLong() const
02791 {
02792 assert(IsConvertableToLong());
02793
02794 unsigned long value = reg[0];
02795 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02796 return sign==POSITIVE ? value : -(signed long)value;
02797 }
02798
02799 Integer::Integer(BufferedTransformation &encodedInteger, unsigned int byteCount, Signedness s)
02800 {
02801 Decode(encodedInteger, byteCount, s);
02802 }
02803
02804 Integer::Integer(const byte *encodedInteger, unsigned int byteCount, Signedness s)
02805 {
02806 Decode(encodedInteger, byteCount, s);
02807 }
02808
02809 Integer::Integer(BufferedTransformation &bt)
02810 {
02811 BERDecode(bt);
02812 }
02813
02814 Integer::Integer(RandomNumberGenerator &rng, unsigned int bitcount)
02815 {
02816 Randomize(rng, bitcount);
02817 }
02818
02819 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02820 {
02821 if (!Randomize(rng, min, max, rnType, equiv, mod))
02822 throw Integer::RandomNumberNotFound();
02823 }
02824
02825 Integer Integer::Power2(unsigned int e)
02826 {
02827 Integer r((word)0, BitsToWords(e+1));
02828 r.SetBit(e);
02829 return r;
02830 }
02831
02832 template <long i>
02833 struct NewInteger
02834 {
02835 Integer * operator()() const
02836 {
02837 return new Integer(i);
02838 }
02839 };
02840
02841 const Integer &Integer::Zero()
02842 {
02843 return Singleton<Integer>().Ref();
02844 }
02845
02846 const Integer &Integer::One()
02847 {
02848 return Singleton<Integer, NewInteger<1> >().Ref();
02849 }
02850
02851 const Integer &Integer::Two()
02852 {
02853 return Singleton<Integer, NewInteger<2> >().Ref();
02854 }
02855
02856 bool Integer::operator!() const
02857 {
02858 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02859 }
02860
02861 Integer& Integer::operator=(const Integer& t)
02862 {
02863 if (this != &t)
02864 {
02865 reg.New(RoundupSize(t.WordCount()));
02866 CopyWords(reg, t.reg, reg.size());
02867 sign = t.sign;
02868 }
02869 return *this;
02870 }
02871
02872 bool Integer::GetBit(unsigned int n) const
02873 {
02874 if (n/WORD_BITS >= reg.size())
02875 return 0;
02876 else
02877 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02878 }
02879
02880 void Integer::SetBit(unsigned int n, bool value)
02881 {
02882 if (value)
02883 {
02884 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02885 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02886 }
02887 else
02888 {
02889 if (n/WORD_BITS < reg.size())
02890 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02891 }
02892 }
02893
02894 byte Integer::GetByte(unsigned int n) const
02895 {
02896 if (n/WORD_SIZE >= reg.size())
02897 return 0;
02898 else
02899 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02900 }
02901
02902 void Integer::SetByte(unsigned int n, byte value)
02903 {
02904 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02905 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02906 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02907 }
02908
02909 unsigned long Integer::GetBits(unsigned int i, unsigned int n) const
02910 {
02911 assert(n <= sizeof(unsigned long)*8);
02912 unsigned long v = 0;
02913 for (unsigned int j=0; j<n; j++)
02914 v |= GetBit(i+j) << j;
02915 return v;
02916 }
02917
02918 Integer Integer::operator-() const
02919 {
02920 Integer result(*this);
02921 result.Negate();
02922 return result;
02923 }
02924
02925 Integer Integer::AbsoluteValue() const
02926 {
02927 Integer result(*this);
02928 result.sign = POSITIVE;
02929 return result;
02930 }
02931
02932 void Integer::swap(Integer &a)
02933 {
02934 reg.swap(a.reg);
02935 std::swap(sign, a.sign);
02936 }
02937
02938 Integer::Integer(word value, unsigned int length)
02939 : reg(RoundupSize(length)), sign(POSITIVE)
02940 {
02941 reg[0] = value;
02942 SetWords(reg+1, 0, reg.size()-1);
02943 }
02944
02945 template <class T>
02946 static Integer StringToInteger(const T *str)
02947 {
02948 word radix;
02949
02950
02951 unsigned int length;
02952 for (length = 0; str[length] != 0; length++) {}
02953
02954 Integer v;
02955
02956 if (length == 0)
02957 return v;
02958
02959 switch (str[length-1])
02960 {
02961 case 'h':
02962 case 'H':
02963 radix=16;
02964 break;
02965 case 'o':
02966 case 'O':
02967 radix=8;
02968 break;
02969 case 'b':
02970 case 'B':
02971 radix=2;
02972 break;
02973 default:
02974 radix=10;
02975 }
02976
02977 if (length > 2 && str[0] == '0' && str[1] == 'x')
02978 radix = 16;
02979
02980 for (unsigned i=0; i<length; i++)
02981 {
02982 word digit;
02983
02984 if (str[i] >= '0' && str[i] <= '9')
02985 digit = str[i] - '0';
02986 else if (str[i] >= 'A' && str[i] <= 'F')
02987 digit = str[i] - 'A' + 10;
02988 else if (str[i] >= 'a' && str[i] <= 'f')
02989 digit = str[i] - 'a' + 10;
02990 else
02991 digit = radix;
02992
02993 if (digit < radix)
02994 {
02995 v *= radix;
02996 v += digit;
02997 }
02998 }
02999
03000 if (str[0] == '-')
03001 v.Negate();
03002
03003 return v;
03004 }
03005
03006 Integer::Integer(const char *str)
03007 : reg(2), sign(POSITIVE)
03008 {
03009 *this = StringToInteger(str);
03010 }
03011
03012 Integer::Integer(const wchar_t *str)
03013 : reg(2), sign(POSITIVE)
03014 {
03015 *this = StringToInteger(str);
03016 }
03017
03018 unsigned int Integer::WordCount() const
03019 {
03020 return CountWords(reg, reg.size());
03021 }
03022
03023 unsigned int Integer::ByteCount() const
03024 {
03025 unsigned wordCount = WordCount();
03026 if (wordCount)
03027 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03028 else
03029 return 0;
03030 }
03031
03032 unsigned int Integer::BitCount() const
03033 {
03034 unsigned wordCount = WordCount();
03035 if (wordCount)
03036 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03037 else
03038 return 0;
03039 }
03040
03041 void Integer::Decode(const byte *input, unsigned int inputLen, Signedness s)
03042 {
03043 StringStore store(input, inputLen);
03044 Decode(store, inputLen, s);
03045 }
03046
03047 void Integer::Decode(BufferedTransformation &bt, unsigned int inputLen, Signedness s)
03048 {
03049 assert(bt.MaxRetrievable() >= inputLen);
03050
03051 byte b;
03052 bt.Peek(b);
03053 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03054
03055 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03056 {
03057 bt.Skip(1);
03058 inputLen--;
03059 bt.Peek(b);
03060 }
03061
03062 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03063
03064 for (unsigned int i=inputLen; i > 0; i--)
03065 {
03066 bt.Get(b);
03067 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03068 }
03069
03070 if (sign == NEGATIVE)
03071 {
03072 for (unsigned i=inputLen; i<reg.size()*WORD_SIZE; i++)
03073 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03074 TwosComplement(reg, reg.size());
03075 }
03076 }
03077
03078 unsigned int Integer::MinEncodedSize(Signedness signedness) const
03079 {
03080 unsigned int outputLen = STDMAX(1U, ByteCount());
03081 if (signedness == UNSIGNED)
03082 return outputLen;
03083 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03084 outputLen++;
03085 if (IsNegative() && *this < -Power2(outputLen*8-1))
03086 outputLen++;
03087 return outputLen;
03088 }
03089
03090 unsigned int Integer::Encode(byte *output, unsigned int outputLen, Signedness signedness) const
03091 {
03092 ArraySink sink(output, outputLen);
03093 return Encode(sink, outputLen, signedness);
03094 }
03095
03096 unsigned int Integer::Encode(BufferedTransformation &bt, unsigned int outputLen, Signedness signedness) const
03097 {
03098 if (signedness == UNSIGNED || NotNegative())
03099 {
03100 for (unsigned int i=outputLen; i > 0; i--)
03101 bt.Put(GetByte(i-1));
03102 }
03103 else
03104 {
03105
03106 Integer temp = Integer::Power2(8*STDMAX(ByteCount(), outputLen)) + *this;
03107 for (unsigned i=0; i<outputLen; i++)
03108 bt.Put(temp.GetByte(outputLen-i-1));
03109 }
03110 return outputLen;
03111 }
03112
03113 void Integer::DEREncode(BufferedTransformation &bt) const
03114 {
03115 DERGeneralEncoder enc(bt, INTEGER);
03116 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03117 enc.MessageEnd();
03118 }
03119
03120 void Integer::BERDecode(const byte *input, unsigned int len)
03121 {
03122 StringStore store(input, len);
03123 BERDecode(store);
03124 }
03125
03126 void Integer::BERDecode(BufferedTransformation &bt)
03127 {
03128 BERGeneralDecoder dec(bt, INTEGER);
03129 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03130 BERDecodeError();
03131 Decode(dec, dec.RemainingLength(), SIGNED);
03132 dec.MessageEnd();
03133 }
03134
03135 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, unsigned int length) const
03136 {
03137 DERGeneralEncoder enc(bt, OCTET_STRING);
03138 Encode(enc, length);
03139 enc.MessageEnd();
03140 }
03141
03142 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, unsigned int length)
03143 {
03144 BERGeneralDecoder dec(bt, OCTET_STRING);
03145 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03146 BERDecodeError();
03147 Decode(dec, length);
03148 dec.MessageEnd();
03149 }
03150
03151 unsigned int Integer::OpenPGPEncode(byte *output, unsigned int len) const
03152 {
03153 ArraySink sink(output, len);
03154 return OpenPGPEncode(sink);
03155 }
03156
03157 unsigned int Integer::OpenPGPEncode(BufferedTransformation &bt) const
03158 {
03159 word16 bitCount = BitCount();
03160 bt.PutWord16(bitCount);
03161 return 2 + Encode(bt, BitsToBytes(bitCount));
03162 }
03163
03164 void Integer::OpenPGPDecode(const byte *input, unsigned int len)
03165 {
03166 StringStore store(input, len);
03167 OpenPGPDecode(store);
03168 }
03169
03170 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03171 {
03172 word16 bitCount;
03173 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03174 throw OpenPGPDecodeErr();
03175 Decode(bt, BitsToBytes(bitCount));
03176 }
03177
03178 void Integer::Randomize(RandomNumberGenerator &rng, unsigned int nbits)
03179 {
03180 const unsigned int nbytes = nbits/8 + 1;
03181 SecByteBlock buf(nbytes);
03182 rng.GenerateBlock(buf, nbytes);
03183 if (nbytes)
03184 buf[0] = (byte)Crop(buf[0], nbits % 8);
03185 Decode(buf, nbytes, UNSIGNED);
03186 }
03187
03188 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03189 {
03190 if (min > max)
03191 throw InvalidArgument("Integer: Min must be no greater than Max");
03192
03193 Integer range = max - min;
03194 const unsigned int nbits = range.BitCount();
03195
03196 do
03197 {
03198 Randomize(rng, nbits);
03199 }
03200 while (*this > range);
03201
03202 *this += min;
03203 }
03204
03205 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03206 {
03207 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03208 }
03209
03210 class KDF2_RNG : public RandomNumberGenerator
03211 {
03212 public:
03213 KDF2_RNG(const byte *seed, unsigned int seedSize)
03214 : m_counter(0), m_counterAndSeed(seedSize + 4)
03215 {
03216 memcpy(m_counterAndSeed + 4, seed, seedSize);
03217 }
03218
03219 byte GenerateByte()
03220 {
03221 byte b;
03222 GenerateBlock(&b, 1);
03223 return b;
03224 }
03225
03226 void GenerateBlock(byte *output, unsigned int size)
03227 {
03228 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03229 ++m_counter;
03230 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03231 }
03232
03233 private:
03234 word32 m_counter;
03235 SecByteBlock m_counterAndSeed;
03236 };
03237
03238 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03239 {
03240 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03241 Integer max;
03242 if (!params.GetValue("Max", max))
03243 {
03244 int bitLength;
03245 if (params.GetIntValue("BitLength", bitLength))
03246 max = Integer::Power2(bitLength);
03247 else
03248 throw InvalidArgument("Integer: missing Max argument");
03249 }
03250 if (min > max)
03251 throw InvalidArgument("Integer: Min must be no greater than Max");
03252
03253 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03254 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03255
03256 if (equiv.IsNegative() || equiv >= mod)
03257 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03258
03259 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03260
03261 member_ptr<KDF2_RNG> kdf2Rng;
03262 ConstByteArrayParameter seed;
03263 if (params.GetValue("Seed", seed))
03264 {
03265 ByteQueue bq;
03266 DERSequenceEncoder seq(bq);
03267 min.DEREncode(seq);
03268 max.DEREncode(seq);
03269 equiv.DEREncode(seq);
03270 mod.DEREncode(seq);
03271 DEREncodeUnsigned(seq, rnType);
03272 DEREncodeOctetString(seq, seed.begin(), seed.size());
03273 seq.MessageEnd();
03274
03275 SecByteBlock finalSeed(bq.MaxRetrievable());
03276 bq.Get(finalSeed, finalSeed.size());
03277 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03278 }
03279 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03280
03281 switch (rnType)
03282 {
03283 case ANY:
03284 if (mod == One())
03285 Randomize(rng, min, max);
03286 else
03287 {
03288 Integer min1 = min + (equiv-min)%mod;
03289 if (max < min1)
03290 return false;
03291 Randomize(rng, Zero(), (max - min1) / mod);
03292 *this *= mod;
03293 *this += min1;
03294 }
03295 return true;
03296
03297 case PRIME:
03298 {
03299 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03300
03301 int i;
03302 i = 0;
03303 while (1)
03304 {
03305 if (++i==16)
03306 {
03307
03308 Integer first = min;
03309 if (FirstPrime(first, max, equiv, mod, pSelector))
03310 {
03311
03312 *this = first;
03313 if (!FirstPrime(first, max, equiv, mod, pSelector))
03314 return true;
03315 }
03316 else
03317 return false;
03318 }
03319
03320 Randomize(rng, min, max);
03321 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03322 return true;
03323 }
03324 }
03325
03326 default:
03327 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03328 }
03329 }
03330
03331 std::istream& operator>>(std::istream& in, Integer &a)
03332 {
03333 char c;
03334 unsigned int length = 0;
03335 SecBlock<char> str(length + 16);
03336
03337 std::ws(in);
03338
03339 do
03340 {
03341 in.read(&c, 1);
03342 str[length++] = c;
03343 if (length >= str.size())
03344 str.Grow(length + 16);
03345 }
03346 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03347
03348 if (in.gcount())
03349 in.putback(c);
03350 str[length-1] = '\0';
03351 a = Integer(str);
03352
03353 return in;
03354 }
03355
03356 std::ostream& operator<<(std::ostream& out, const Integer &a)
03357 {
03358
03359 long f = out.flags() & std::ios::basefield;
03360 int base, block;
03361 char suffix;
03362 switch(f)
03363 {
03364 case std::ios::oct :
03365 base = 8;
03366 block = 8;
03367 suffix = 'o';
03368 break;
03369 case std::ios::hex :
03370 base = 16;
03371 block = 4;
03372 suffix = 'h';
03373 break;
03374 default :
03375 base = 10;
03376 block = 3;
03377 suffix = '.';
03378 }
03379
03380 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03381 Integer temp1=a, temp2;
03382 unsigned i=0;
03383 const char vec[]="0123456789ABCDEF";
03384
03385 if (a.IsNegative())
03386 {
03387 out << '-';
03388 temp1.Negate();
03389 }
03390
03391 if (!a)
03392 out << '0';
03393
03394 while (!!temp1)
03395 {
03396 word digit;
03397 Integer::Divide(digit, temp2, temp1, base);
03398 s[i++]=vec[digit];
03399 temp1=temp2;
03400 }
03401
03402 while (i--)
03403 {
03404 out << s[i];
03405
03406
03407 }
03408 return out << suffix;
03409 }
03410
03411 Integer& Integer::operator++()
03412 {
03413 if (NotNegative())
03414 {
03415 if (Increment(reg, reg.size()))
03416 {
03417 reg.CleanGrow(2*reg.size());
03418 reg[reg.size()/2]=1;
03419 }
03420 }
03421 else
03422 {
03423 word borrow = Decrement(reg, reg.size());
03424 assert(!borrow);
03425 if (WordCount()==0)
03426 *this = Zero();
03427 }
03428 return *this;
03429 }
03430
03431 Integer& Integer::operator--()
03432 {
03433 if (IsNegative())
03434 {
03435 if (Increment(reg, reg.size()))
03436 {
03437 reg.CleanGrow(2*reg.size());
03438 reg[reg.size()/2]=1;
03439 }
03440 }
03441 else
03442 {
03443 if (Decrement(reg, reg.size()))
03444 *this = -One();
03445 }
03446 return *this;
03447 }
03448
03449 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03450 {
03451 word carry;
03452 if (a.reg.size() == b.reg.size())
03453 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03454 else if (a.reg.size() > b.reg.size())
03455 {
03456 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03457 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03458 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03459 }
03460 else
03461 {
03462 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03463 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03464 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03465 }
03466
03467 if (carry)
03468 {
03469 sum.reg.CleanGrow(2*sum.reg.size());
03470 sum.reg[sum.reg.size()/2] = 1;
03471 }
03472 sum.sign = Integer::POSITIVE;
03473 }
03474
03475 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03476 {
03477 unsigned aSize = a.WordCount();
03478 aSize += aSize%2;
03479 unsigned bSize = b.WordCount();
03480 bSize += bSize%2;
03481
03482 if (aSize == bSize)
03483 {
03484 if (Compare(a.reg, b.reg, aSize) >= 0)
03485 {
03486 Subtract(diff.reg, a.reg, b.reg, aSize);
03487 diff.sign = Integer::POSITIVE;
03488 }
03489 else
03490 {
03491 Subtract(diff.reg, b.reg, a.reg, aSize);
03492 diff.sign = Integer::NEGATIVE;
03493 }
03494 }
03495 else if (aSize > bSize)
03496 {
03497 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03498 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03499 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03500 assert(!borrow);
03501 diff.sign = Integer::POSITIVE;
03502 }
03503 else
03504 {
03505 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03506 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03507 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03508 assert(!borrow);
03509 diff.sign = Integer::NEGATIVE;
03510 }
03511 }
03512
03513 Integer Integer::Plus(const Integer& b) const
03514 {
03515 Integer sum((word)0, STDMAX(reg.size(), b.reg.size()));
03516 if (NotNegative())
03517 {
03518 if (b.NotNegative())
03519 PositiveAdd(sum, *this, b);
03520 else
03521 PositiveSubtract(sum, *this, b);
03522 }
03523 else
03524 {
03525 if (b.NotNegative())
03526 PositiveSubtract(sum, b, *this);
03527 else
03528 {
03529 PositiveAdd(sum, *this, b);
03530 sum.sign = Integer::NEGATIVE;
03531 }
03532 }
03533 return sum;
03534 }
03535
03536 Integer& Integer::operator+=(const Integer& t)
03537 {
03538 reg.CleanGrow(t.reg.size());
03539 if (NotNegative())
03540 {
03541 if (t.NotNegative())
03542 PositiveAdd(*this, *this, t);
03543 else
03544 PositiveSubtract(*this, *this, t);
03545 }
03546 else
03547 {
03548 if (t.NotNegative())
03549 PositiveSubtract(*this, t, *this);
03550 else
03551 {
03552 PositiveAdd(*this, *this, t);
03553 sign = Integer::NEGATIVE;
03554 }
03555 }
03556 return *this;
03557 }
03558
03559 Integer Integer::Minus(const Integer& b) const
03560 {
03561 Integer diff((word)0, STDMAX(reg.size(), b.reg.size()));
03562 if (NotNegative())
03563 {
03564 if (b.NotNegative())
03565 PositiveSubtract(diff, *this, b);
03566 else
03567 PositiveAdd(diff, *this, b);
03568 }
03569 else
03570 {
03571 if (b.NotNegative())
03572 {
03573 PositiveAdd(diff, *this, b);
03574 diff.sign = Integer::NEGATIVE;
03575 }
03576 else
03577 PositiveSubtract(diff, b, *this);
03578 }
03579 return diff;
03580 }
03581
03582 Integer& Integer::operator-=(const Integer& t)
03583 {
03584 reg.CleanGrow(t.reg.size());
03585 if (NotNegative())
03586 {
03587 if (t.NotNegative())
03588 PositiveSubtract(*this, *this, t);
03589 else
03590 PositiveAdd(*this, *this, t);
03591 }
03592 else
03593 {
03594 if (t.NotNegative())
03595 {
03596 PositiveAdd(*this, *this, t);
03597 sign = Integer::NEGATIVE;
03598 }
03599 else
03600 PositiveSubtract(*this, t, *this);
03601 }
03602 return *this;
03603 }
03604
03605 Integer& Integer::operator<<=(unsigned int n)
03606 {
03607 const unsigned int wordCount = WordCount();
03608 const unsigned int shiftWords = n / WORD_BITS;
03609 const unsigned int shiftBits = n % WORD_BITS;
03610
03611 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03612 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03613 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03614 return *this;
03615 }
03616
03617 Integer& Integer::operator>>=(unsigned int n)
03618 {
03619 const unsigned int wordCount = WordCount();
03620 const unsigned int shiftWords = n / WORD_BITS;
03621 const unsigned int shiftBits = n % WORD_BITS;
03622
03623 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03624 if (wordCount > shiftWords)
03625 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03626 if (IsNegative() && WordCount()==0)
03627 *this = Zero();
03628 return *this;
03629 }
03630
03631 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03632 {
03633 unsigned aSize = RoundupSize(a.WordCount());
03634 unsigned bSize = RoundupSize(b.WordCount());
03635
03636 product.reg.CleanNew(RoundupSize(aSize+bSize));
03637 product.sign = Integer::POSITIVE;
03638
03639 SecAlignedWordBlock workspace(aSize + bSize);
03640 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03641 }
03642
03643 void Multiply(Integer &product, const Integer &a, const Integer &b)
03644 {
03645 PositiveMultiply(product, a, b);
03646
03647 if (a.NotNegative() != b.NotNegative())
03648 product.Negate();
03649 }
03650
03651 Integer Integer::Times(const Integer &b) const
03652 {
03653 Integer product;
03654 Multiply(product, *this, b);
03655 return product;
03656 }
03657
03658
03659
03660
03661
03662
03663
03664
03665
03666
03667
03668
03669
03670
03671
03672
03673
03674
03675
03676
03677
03678
03679
03680 void PositiveDivide(Integer &remainder, Integer "ient,
03681 const Integer &a, const Integer &b)
03682 {
03683 unsigned aSize = a.WordCount();
03684 unsigned bSize = b.WordCount();
03685
03686 if (!bSize)
03687 throw Integer::DivideByZero();
03688
03689 if (a.PositiveCompare(b) == -1)
03690 {
03691 remainder = a;
03692 remainder.sign = Integer::POSITIVE;
03693 quotient = Integer::Zero();
03694 return;
03695 }
03696
03697 aSize += aSize%2;
03698 bSize += bSize%2;
03699
03700 remainder.reg.CleanNew(RoundupSize(bSize));
03701 remainder.sign = Integer::POSITIVE;
03702 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03703 quotient.sign = Integer::POSITIVE;
03704
03705 SecAlignedWordBlock T(aSize+2*bSize+4);
03706 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03707 }
03708
03709 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03710 {
03711 PositiveDivide(remainder, quotient, dividend, divisor);
03712
03713 if (dividend.IsNegative())
03714 {
03715 quotient.Negate();
03716 if (remainder.NotZero())
03717 {
03718 --quotient;
03719 remainder = divisor.AbsoluteValue() - remainder;
03720 }
03721 }
03722
03723 if (divisor.IsNegative())
03724 quotient.Negate();
03725 }
03726
03727 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03728 {
03729 q = a;
03730 q >>= n;
03731
03732 const unsigned int wordCount = BitsToWords(n);
03733 if (wordCount <= a.WordCount())
03734 {
03735 r.reg.resize(RoundupSize(wordCount));
03736 CopyWords(r.reg, a.reg, wordCount);
03737 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03738 if (n % WORD_BITS != 0)
03739 r.reg[wordCount-1] %= (1 << (n % WORD_BITS));
03740 }
03741 else
03742 {
03743 r.reg.resize(RoundupSize(a.WordCount()));
03744 CopyWords(r.reg, a.reg, r.reg.size());
03745 }
03746 r.sign = POSITIVE;
03747
03748 if (a.IsNegative() && r.NotZero())
03749 {
03750 --q;
03751 r = Power2(n) - r;
03752 }
03753 }
03754
03755 Integer Integer::DividedBy(const Integer &b) const
03756 {
03757 Integer remainder, quotient;
03758 Integer::Divide(remainder, quotient, *this, b);
03759 return quotient;
03760 }
03761
03762 Integer Integer::Modulo(const Integer &b) const
03763 {
03764 Integer remainder, quotient;
03765 Integer::Divide(remainder, quotient, *this, b);
03766 return remainder;
03767 }
03768
03769 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03770 {
03771 if (!divisor)
03772 throw Integer::DivideByZero();
03773
03774 assert(divisor);
03775
03776 if ((divisor & (divisor-1)) == 0)
03777 {
03778 quotient = dividend >> (BitPrecision(divisor)-1);
03779 remainder = dividend.reg[0] & (divisor-1);
03780 return;
03781 }
03782
03783 unsigned int i = dividend.WordCount();
03784 quotient.reg.CleanNew(RoundupSize(i));
03785 remainder = 0;
03786 while (i--)
03787 {
03788 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03789 remainder = DWord(dividend.reg[i], remainder) % divisor;
03790 }
03791
03792 if (dividend.NotNegative())
03793 quotient.sign = POSITIVE;
03794 else
03795 {
03796 quotient.sign = NEGATIVE;
03797 if (remainder)
03798 {
03799 --quotient;
03800 remainder = divisor - remainder;
03801 }
03802 }
03803 }
03804
03805 Integer Integer::DividedBy(word b) const
03806 {
03807 word remainder;
03808 Integer quotient;
03809 Integer::Divide(remainder, quotient, *this, b);
03810 return quotient;
03811 }
03812
03813 word Integer::Modulo(word divisor) const
03814 {
03815 if (!divisor)
03816 throw Integer::DivideByZero();
03817
03818 assert(divisor);
03819
03820 word remainder;
03821
03822 if ((divisor & (divisor-1)) == 0)
03823 remainder = reg[0] & (divisor-1);
03824 else
03825 {
03826 unsigned int i = WordCount();
03827
03828 if (divisor <= 5)
03829 {
03830 DWord sum(0, 0);
03831 while (i--)
03832 sum += reg[i];
03833 remainder = sum % divisor;
03834 }
03835 else
03836 {
03837 remainder = 0;
03838 while (i--)
03839 remainder = DWord(reg[i], remainder) % divisor;
03840 }
03841 }
03842
03843 if (IsNegative() && remainder)
03844 remainder = divisor - remainder;
03845
03846 return remainder;
03847 }
03848
03849 void Integer::Negate()
03850 {
03851 if (!!(*this))
03852 sign = Sign(1-sign);
03853 }
03854
03855 int Integer::PositiveCompare(const Integer& t) const
03856 {
03857 unsigned size = WordCount(), tSize = t.WordCount();
03858
03859 if (size == tSize)
03860 return CryptoPP::Compare(reg, t.reg, size);
03861 else
03862 return size > tSize ? 1 : -1;
03863 }
03864
03865 int Integer::Compare(const Integer& t) const
03866 {
03867 if (NotNegative())
03868 {
03869 if (t.NotNegative())
03870 return PositiveCompare(t);
03871 else
03872 return 1;
03873 }
03874 else
03875 {
03876 if (t.NotNegative())
03877 return -1;
03878 else
03879 return -PositiveCompare(t);
03880 }
03881 }
03882
03883 Integer Integer::SquareRoot() const
03884 {
03885 if (!IsPositive())
03886 return Zero();
03887
03888
03889 Integer x, y = Power2((BitCount()+1)/2);
03890 assert(y*y >= *this);
03891
03892 do
03893 {
03894 x = y;
03895 y = (x + *this/x) >> 1;
03896 } while (y<x);
03897
03898 return x;
03899 }
03900
03901 bool Integer::IsSquare() const
03902 {
03903 Integer r = SquareRoot();
03904 return *this == r.Squared();
03905 }
03906
03907 bool Integer::IsUnit() const
03908 {
03909 return (WordCount() == 1) && (reg[0] == 1);
03910 }
03911
03912 Integer Integer::MultiplicativeInverse() const
03913 {
03914 return IsUnit() ? *this : Zero();
03915 }
03916
03917 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03918 {
03919 return x*y%m;
03920 }
03921
03922 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03923 {
03924 ModularArithmetic mr(m);
03925 return mr.Exponentiate(x, e);
03926 }
03927
03928 Integer Integer::Gcd(const Integer &a, const Integer &b)
03929 {
03930 return EuclideanDomainOf<Integer>().Gcd(a, b);
03931 }
03932
03933 Integer Integer::InverseMod(const Integer &m) const
03934 {
03935 assert(m.NotNegative());
03936
03937 if (IsNegative() || *this>=m)
03938 return (*this%m).InverseMod(m);
03939
03940 if (m.IsEven())
03941 {
03942 if (!m || IsEven())
03943 return Zero();
03944 if (*this == One())
03945 return One();
03946
03947 Integer u = m.InverseMod(*this);
03948 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03949 }
03950
03951 SecBlock<word> T(m.reg.size() * 4);
03952 Integer r((word)0, m.reg.size());
03953 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03954 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03955 return r;
03956 }
03957
03958 word Integer::InverseMod(const word mod) const
03959 {
03960 word g0 = mod, g1 = *this % mod;
03961 word v0 = 0, v1 = 1;
03962 word y;
03963
03964 while (g1)
03965 {
03966 if (g1 == 1)
03967 return v1;
03968 y = g0 / g1;
03969 g0 = g0 % g1;
03970 v0 += y * v1;
03971
03972 if (!g0)
03973 break;
03974 if (g0 == 1)
03975 return mod-v0;
03976 y = g1 / g0;
03977 g1 = g1 % g0;
03978 v1 += y * v0;
03979 }
03980 return 0;
03981 }
03982
03983
03984
03985 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
03986 {
03987 BERSequenceDecoder seq(bt);
03988 OID oid(seq);
03989 if (oid != ASN1::prime_field())
03990 BERDecodeError();
03991 m_modulus.BERDecode(seq);
03992 seq.MessageEnd();
03993 m_result.reg.resize(m_modulus.reg.size());
03994 }
03995
03996 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
03997 {
03998 DERSequenceEncoder seq(bt);
03999 ASN1::prime_field().DEREncode(seq);
04000 m_modulus.DEREncode(seq);
04001 seq.MessageEnd();
04002 }
04003
04004 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04005 {
04006 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04007 }
04008
04009 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04010 {
04011 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04012 }
04013
04014 const Integer& ModularArithmetic::Half(const Integer &a) const
04015 {
04016 if (a.reg.size()==m_modulus.reg.size())
04017 {
04018 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04019 return m_result;
04020 }
04021 else
04022 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04023 }
04024
04025 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04026 {
04027 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04028 {
04029 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04030 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04031 {
04032 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04033 }
04034 return m_result;
04035 }
04036 else
04037 {
04038 m_result1 = a+b;
04039 if (m_result1 >= m_modulus)
04040 m_result1 -= m_modulus;
04041 return m_result1;
04042 }
04043 }
04044
04045 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04046 {
04047 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04048 {
04049 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04050 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04051 {
04052 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04053 }
04054 }
04055 else
04056 {
04057 a+=b;
04058 if (a>=m_modulus)
04059 a-=m_modulus;
04060 }
04061
04062 return a;
04063 }
04064
04065 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04066 {
04067 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04068 {
04069 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04070 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04071 return m_result;
04072 }
04073 else
04074 {
04075 m_result1 = a-b;
04076 if (m_result1.IsNegative())
04077 m_result1 += m_modulus;
04078 return m_result1;
04079 }
04080 }
04081
04082 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04083 {
04084 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04085 {
04086 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04087 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04088 }
04089 else
04090 {
04091 a-=b;
04092 if (a.IsNegative())
04093 a+=m_modulus;
04094 }
04095
04096 return a;
04097 }
04098
04099 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04100 {
04101 if (!a)
04102 return a;
04103
04104 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04105 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04106 Decrement(m_result.reg.begin()+a.reg.size(), 1, m_modulus.reg.size()-a.reg.size());
04107
04108 return m_result;
04109 }
04110
04111 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04112 {
04113 if (m_modulus.IsOdd())
04114 {
04115 MontgomeryRepresentation dr(m_modulus);
04116 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04117 }
04118 else
04119 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04120 }
04121
04122 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04123 {
04124 if (m_modulus.IsOdd())
04125 {
04126 MontgomeryRepresentation dr(m_modulus);
04127 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04128 for (unsigned int i=0; i<exponentsCount; i++)
04129 results[i] = dr.ConvertOut(results[i]);
04130 }
04131 else
04132 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04133 }
04134
04135 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04136 : ModularArithmetic(m),
04137 m_u((word)0, m_modulus.reg.size()),
04138 m_workspace(5*m_modulus.reg.size())
04139 {
04140 if (!m_modulus.IsOdd())
04141 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04142
04143 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04144 }
04145
04146 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04147 {
04148 word *const T = m_workspace.begin();
04149 word *const R = m_result.reg.begin();
04150 const unsigned int N = m_modulus.reg.size();
04151 assert(a.reg.size()<=N && b.reg.size()<=N);
04152
04153 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04154 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04155 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04156 return m_result;
04157 }
04158
04159 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04160 {
04161 word *const T = m_workspace.begin();
04162 word *const R = m_result.reg.begin();
04163 const unsigned int N = m_modulus.reg.size();
04164 assert(a.reg.size()<=N);
04165
04166 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04167 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04168 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04169 return m_result;
04170 }
04171
04172 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04173 {
04174 word *const T = m_workspace.begin();
04175 word *const R = m_result.reg.begin();
04176 const unsigned int N = m_modulus.reg.size();
04177 assert(a.reg.size()<=N);
04178
04179 CopyWords(T, a.reg, a.reg.size());
04180 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04181 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04182 return m_result;
04183 }
04184
04185 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04186 {
04187
04188 word *const T = m_workspace.begin();
04189 word *const R = m_result.reg.begin();
04190 const unsigned int N = m_modulus.reg.size();
04191 assert(a.reg.size()<=N);
04192
04193 CopyWords(T, a.reg, a.reg.size());
04194 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04195 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04196 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04197
04198
04199
04200 if (k>N*WORD_BITS)
04201 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04202 else
04203 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04204
04205 return m_result;
04206 }
04207
04208 NAMESPACE_END
04209
04210 #endif