Crypto++
|
00001 #ifndef CRYPTOPP_CPU_H 00002 #define CRYPTOPP_CPU_H 00003 00004 #ifdef CRYPTOPP_GENERATE_X64_MASM 00005 00006 #define CRYPTOPP_X86_ASM_AVAILABLE 00007 #define CRYPTOPP_BOOL_X64 1 00008 #define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1 00009 #define NAMESPACE_END 00010 00011 #else 00012 00013 #include "config.h" 00014 00015 #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 00016 #include <emmintrin.h> 00017 #endif 00018 00019 #if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 00020 #if !defined(__GNUC__) || defined(__SSSE3__) || defined(__INTEL_COMPILER) 00021 #include <tmmintrin.h> 00022 #else 00023 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00024 _mm_shuffle_epi8 (__m128i a, __m128i b) 00025 { 00026 asm ("pshufb %1, %0" : "+x"(a) : "xm"(b)); 00027 return a; 00028 } 00029 #endif 00030 #if !defined(__GNUC__) || defined(__SSE4_1__) || defined(__INTEL_COMPILER) 00031 #include <smmintrin.h> 00032 #else 00033 __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00034 _mm_extract_epi32 (__m128i a, const int i) 00035 { 00036 int r; 00037 asm ("pextrd %2, %1, %0" : "=rm"(r) : "x"(a), "i"(i)); 00038 return r; 00039 } 00040 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00041 _mm_insert_epi32 (__m128i a, int b, const int i) 00042 { 00043 asm ("pinsrd %2, %1, %0" : "+x"(a) : "rm"(b), "i"(i)); 00044 return a; 00045 } 00046 #endif 00047 #if !defined(__GNUC__) || (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER) 00048 #include <wmmintrin.h> 00049 #else 00050 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00051 _mm_clmulepi64_si128 (__m128i a, __m128i b, const int i) 00052 { 00053 asm ("pclmulqdq %2, %1, %0" : "+x"(a) : "xm"(b), "i"(i)); 00054 return a; 00055 } 00056 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00057 _mm_aeskeygenassist_si128 (__m128i a, const int i) 00058 { 00059 __m128i r; 00060 asm ("aeskeygenassist %2, %1, %0" : "=x"(r) : "xm"(a), "i"(i)); 00061 return r; 00062 } 00063 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00064 _mm_aesimc_si128 (__m128i a) 00065 { 00066 __m128i r; 00067 asm ("aesimc %1, %0" : "=x"(r) : "xm"(a)); 00068 return r; 00069 } 00070 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00071 _mm_aesenc_si128 (__m128i a, __m128i b) 00072 { 00073 asm ("aesenc %1, %0" : "+x"(a) : "xm"(b)); 00074 return a; 00075 } 00076 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00077 _mm_aesenclast_si128 (__m128i a, __m128i b) 00078 { 00079 asm ("aesenclast %1, %0" : "+x"(a) : "xm"(b)); 00080 return a; 00081 } 00082 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00083 _mm_aesdec_si128 (__m128i a, __m128i b) 00084 { 00085 asm ("aesdec %1, %0" : "+x"(a) : "xm"(b)); 00086 return a; 00087 } 00088 __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) 00089 _mm_aesdeclast_si128 (__m128i a, __m128i b) 00090 { 00091 asm ("aesdeclast %1, %0" : "+x"(a) : "xm"(b)); 00092 return a; 00093 } 00094 #endif 00095 #endif 00096 00097 NAMESPACE_BEGIN(CryptoPP) 00098 00099 #if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64 00100 00101 #define CRYPTOPP_CPUID_AVAILABLE 00102 00103 // these should not be used directly 00104 extern CRYPTOPP_DLL bool g_x86DetectionDone; 00105 extern CRYPTOPP_DLL bool g_hasSSSE3; 00106 extern CRYPTOPP_DLL bool g_hasAESNI; 00107 extern CRYPTOPP_DLL bool g_hasCLMUL; 00108 extern CRYPTOPP_DLL bool g_isP4; 00109 extern CRYPTOPP_DLL word32 g_cacheLineSize; 00110 CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features(); 00111 CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 *output); 00112 00113 #if CRYPTOPP_BOOL_X64 00114 inline bool HasSSE2() {return true;} 00115 inline bool HasISSE() {return true;} 00116 inline bool HasMMX() {return true;} 00117 #else 00118 00119 extern CRYPTOPP_DLL bool g_hasSSE2; 00120 extern CRYPTOPP_DLL bool g_hasISSE; 00121 extern CRYPTOPP_DLL bool g_hasMMX; 00122 00123 inline bool HasSSE2() 00124 { 00125 if (!g_x86DetectionDone) 00126 DetectX86Features(); 00127 return g_hasSSE2; 00128 } 00129 00130 inline bool HasISSE() 00131 { 00132 if (!g_x86DetectionDone) 00133 DetectX86Features(); 00134 return g_hasISSE; 00135 } 00136 00137 inline bool HasMMX() 00138 { 00139 if (!g_x86DetectionDone) 00140 DetectX86Features(); 00141 return g_hasMMX; 00142 } 00143 00144 #endif 00145 00146 inline bool HasSSSE3() 00147 { 00148 if (!g_x86DetectionDone) 00149 DetectX86Features(); 00150 return g_hasSSSE3; 00151 } 00152 00153 inline bool HasAESNI() 00154 { 00155 if (!g_x86DetectionDone) 00156 DetectX86Features(); 00157 return g_hasAESNI; 00158 } 00159 00160 inline bool HasCLMUL() 00161 { 00162 if (!g_x86DetectionDone) 00163 DetectX86Features(); 00164 return g_hasCLMUL; 00165 } 00166 00167 inline bool IsP4() 00168 { 00169 if (!g_x86DetectionDone) 00170 DetectX86Features(); 00171 return g_isP4; 00172 } 00173 00174 inline int GetCacheLineSize() 00175 { 00176 if (!g_x86DetectionDone) 00177 DetectX86Features(); 00178 return g_cacheLineSize; 00179 } 00180 00181 #else 00182 00183 inline int GetCacheLineSize() 00184 { 00185 return CRYPTOPP_L1_CACHE_LINE_SIZE; 00186 } 00187 00188 #endif 00189 00190 #endif 00191 00192 #ifdef CRYPTOPP_GENERATE_X64_MASM 00193 #define AS1(x) x*newline* 00194 #define AS2(x, y) x, y*newline* 00195 #define AS3(x, y, z) x, y, z*newline* 00196 #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline* 00197 #define ASL(x) label##x:*newline* 00198 #define ASJ(x, y, z) x label##y*newline* 00199 #define ASC(x, y) x label##y*newline* 00200 #define AS_HEX(y) 0##y##h 00201 #elif defined(_MSC_VER) || defined(__BORLANDC__) 00202 #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY 00203 #define AS1(x) __asm {x} 00204 #define AS2(x, y) __asm {x, y} 00205 #define AS3(x, y, z) __asm {x, y, z} 00206 #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)} 00207 #define ASL(x) __asm {label##x:} 00208 #define ASJ(x, y, z) __asm {x label##y} 00209 #define ASC(x, y) __asm {x label##y} 00210 #define CRYPTOPP_NAKED __declspec(naked) 00211 #define AS_HEX(y) 0x##y 00212 #else 00213 #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY 00214 // define these in two steps to allow arguments to be expanded 00215 #define GNU_AS1(x) #x ";" 00216 #define GNU_AS2(x, y) #x ", " #y ";" 00217 #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" 00218 #define GNU_ASL(x) "\n" #x ":" 00219 #define GNU_ASJ(x, y, z) #x " " #y #z ";" 00220 #define AS1(x) GNU_AS1(x) 00221 #define AS2(x, y) GNU_AS2(x, y) 00222 #define AS3(x, y, z) GNU_AS3(x, y, z) 00223 #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";" 00224 #define ASL(x) GNU_ASL(x) 00225 #define ASJ(x, y, z) GNU_ASJ(x, y, z) 00226 #define ASC(x, y) #x " " #y ";" 00227 #define CRYPTOPP_NAKED 00228 #define AS_HEX(y) 0x##y 00229 #endif 00230 00231 #define IF0(y) 00232 #define IF1(y) y 00233 00234 #ifdef CRYPTOPP_GENERATE_X64_MASM 00235 #define ASM_MOD(x, y) ((x) MOD (y)) 00236 #define XMMWORD_PTR XMMWORD PTR 00237 #else 00238 // GNU assembler doesn't seem to have mod operator 00239 #define ASM_MOD(x, y) ((x)-((x)/(y))*(y)) 00240 // GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM 00241 #define XMMWORD_PTR 00242 #endif 00243 00244 #if CRYPTOPP_BOOL_X86 00245 #define AS_REG_1 ecx 00246 #define AS_REG_2 edx 00247 #define AS_REG_3 esi 00248 #define AS_REG_4 edi 00249 #define AS_REG_5 eax 00250 #define AS_REG_6 ebx 00251 #define AS_REG_7 ebp 00252 #define AS_REG_1d ecx 00253 #define AS_REG_2d edx 00254 #define AS_REG_3d esi 00255 #define AS_REG_4d edi 00256 #define AS_REG_5d eax 00257 #define AS_REG_6d ebx 00258 #define AS_REG_7d ebp 00259 #define WORD_SZ 4 00260 #define WORD_REG(x) e##x 00261 #define WORD_PTR DWORD PTR 00262 #define AS_PUSH_IF86(x) AS1(push e##x) 00263 #define AS_POP_IF86(x) AS1(pop e##x) 00264 #define AS_JCXZ jecxz 00265 #elif CRYPTOPP_BOOL_X64 00266 #ifdef CRYPTOPP_GENERATE_X64_MASM 00267 #define AS_REG_1 rcx 00268 #define AS_REG_2 rdx 00269 #define AS_REG_3 r8 00270 #define AS_REG_4 r9 00271 #define AS_REG_5 rax 00272 #define AS_REG_6 r10 00273 #define AS_REG_7 r11 00274 #define AS_REG_1d ecx 00275 #define AS_REG_2d edx 00276 #define AS_REG_3d r8d 00277 #define AS_REG_4d r9d 00278 #define AS_REG_5d eax 00279 #define AS_REG_6d r10d 00280 #define AS_REG_7d r11d 00281 #else 00282 #define AS_REG_1 rdi 00283 #define AS_REG_2 rsi 00284 #define AS_REG_3 rdx 00285 #define AS_REG_4 rcx 00286 #define AS_REG_5 r8 00287 #define AS_REG_6 r9 00288 #define AS_REG_7 r10 00289 #define AS_REG_1d edi 00290 #define AS_REG_2d esi 00291 #define AS_REG_3d edx 00292 #define AS_REG_4d ecx 00293 #define AS_REG_5d r8d 00294 #define AS_REG_6d r9d 00295 #define AS_REG_7d r10d 00296 #endif 00297 #define WORD_SZ 8 00298 #define WORD_REG(x) r##x 00299 #define WORD_PTR QWORD PTR 00300 #define AS_PUSH_IF86(x) 00301 #define AS_POP_IF86(x) 00302 #define AS_JCXZ jrcxz 00303 #endif 00304 00305 // helper macro for stream cipher output 00306 #define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\ 00307 AS2( test inputPtr, inputPtr)\ 00308 ASC( jz, labelPrefix##3)\ 00309 AS2( test inputPtr, 15)\ 00310 ASC( jnz, labelPrefix##7)\ 00311 AS2( pxor xmm##x0, [inputPtr+p0*16])\ 00312 AS2( pxor xmm##x1, [inputPtr+p1*16])\ 00313 AS2( pxor xmm##x2, [inputPtr+p2*16])\ 00314 AS2( pxor xmm##x3, [inputPtr+p3*16])\ 00315 AS2( add inputPtr, increment*16)\ 00316 ASC( jmp, labelPrefix##3)\ 00317 ASL(labelPrefix##7)\ 00318 AS2( movdqu xmm##t, [inputPtr+p0*16])\ 00319 AS2( pxor xmm##x0, xmm##t)\ 00320 AS2( movdqu xmm##t, [inputPtr+p1*16])\ 00321 AS2( pxor xmm##x1, xmm##t)\ 00322 AS2( movdqu xmm##t, [inputPtr+p2*16])\ 00323 AS2( pxor xmm##x2, xmm##t)\ 00324 AS2( movdqu xmm##t, [inputPtr+p3*16])\ 00325 AS2( pxor xmm##x3, xmm##t)\ 00326 AS2( add inputPtr, increment*16)\ 00327 ASL(labelPrefix##3)\ 00328 AS2( test outputPtr, 15)\ 00329 ASC( jnz, labelPrefix##8)\ 00330 AS2( movdqa [outputPtr+p0*16], xmm##x0)\ 00331 AS2( movdqa [outputPtr+p1*16], xmm##x1)\ 00332 AS2( movdqa [outputPtr+p2*16], xmm##x2)\ 00333 AS2( movdqa [outputPtr+p3*16], xmm##x3)\ 00334 ASC( jmp, labelPrefix##9)\ 00335 ASL(labelPrefix##8)\ 00336 AS2( movdqu [outputPtr+p0*16], xmm##x0)\ 00337 AS2( movdqu [outputPtr+p1*16], xmm##x1)\ 00338 AS2( movdqu [outputPtr+p2*16], xmm##x2)\ 00339 AS2( movdqu [outputPtr+p3*16], xmm##x3)\ 00340 ASL(labelPrefix##9)\ 00341 AS2( add outputPtr, increment*16) 00342 00343 NAMESPACE_END 00344 00345 #endif