SHOGUN
v1.1.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2009 Soeren Sonnenburg 00008 * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society 00009 * 00010 * The MD5 and Murmor hashing functions were integrated from public sources. 00011 * Their respective copyrights follow. 00012 * 00013 * MD5 00014 * 00015 * This code implements the MD5 message-digest algorithm. 00016 * The algorithm is due to Ron Rivest. This code was 00017 * written by Colin Plumb in 1993, no copyright is claimed. 00018 * This code is in the public domain; do with it what you wish. 00019 * 00020 * Equivalent code is available from RSA Data Security, Inc. 00021 * This code has been tested against that, and is equivalent, 00022 * except that you don't need to include two pages of legalese 00023 * with every copy. 00024 * 00025 * To compute the message digest of a chunk of bytes, declare an 00026 * MD5Context structure, pass it to MD5Init, call MD5Update as 00027 * needed on buffers full of bytes, and then call MD5Final, which 00028 * will fill a supplied 16-byte array with the digest. 00029 * 00030 * MurmurHash2 00031 * 00032 * (C) Austin Appleby, released under the MIT License 00033 * 00034 * Note - This code makes a few assumptions about how your machine behaves - 00035 * 00036 * 1. We can read a 4-byte value from any address without crashing 00037 * 2. It will not produce the same results on little-endian and big-endian 00038 * machines. 00039 */ 00040 00041 #include <shogun/lib/common.h> 00042 #include <shogun/lib/Hash.h> 00043 #include <ctype.h> 00044 00045 using namespace shogun; 00046 00047 uint32_t CHash::crc32(uint8_t *data, int32_t len) 00048 { 00049 uint32_t result; 00050 int32_t i,j; 00051 uint8_t octet; 00052 00053 result = 0-1; 00054 for (i=0; i<len; i++) 00055 { 00056 octet = *(data++); 00057 for (j=0; j<8; j++) 00058 { 00059 if ((octet >> 7) ^ (result >> 31)) 00060 { 00061 result = (result << 1) ^ 0x04c11db7; 00062 } 00063 else 00064 { 00065 result = (result << 1); 00066 } 00067 octet <<= 1; 00068 } 00069 } 00070 00071 return ~result; 00072 } 00073 00074 void CHash::MD5(unsigned char *x, unsigned l, unsigned char *buf) 00075 { 00076 struct MD5Context ctx; 00077 00078 MD5Init(&ctx); 00079 MD5Update(&ctx, x, l); 00080 MD5Final(buf, &ctx); 00081 } 00082 00083 #ifndef HIGHFIRST 00084 #define byteReverse(buf, len) /* Nothing */ 00085 #else 00086 void byteReverse(unsigned char *buf, unsigned uint32_t longs); 00087 00088 #ifndef ASM_MD5 00089 /* 00090 * Note: this code is harmless on little-endian machines. 00091 */ 00092 void byteReverse(unsigned char *buf, unsigned uint32_t longs) 00093 { 00094 uint32_t t; 00095 do { 00096 t = (uint32_t) ((unsigned) buf[3] << 8 | buf[2]) << 16 | 00097 ((unsigned) buf[1] << 8 | buf[0]); 00098 *(uint32_t *) buf = t; 00099 buf += 4; 00100 } while (--longs); 00101 } 00102 #endif 00103 #endif 00104 00105 void CHash::MD5Init(struct MD5Context *ctx) 00106 { 00107 ctx->buf[0] = 0x67452301; 00108 ctx->buf[1] = 0xefcdab89; 00109 ctx->buf[2] = 0x98badcfe; 00110 ctx->buf[3] = 0x10325476; 00111 00112 ctx->bits[0] = 0; 00113 ctx->bits[1] = 0; 00114 } 00115 00116 void CHash::MD5Update(struct MD5Context *ctx, unsigned char const *buf, 00117 unsigned len) 00118 { 00119 uint32_t t; 00120 00121 /* Update bitcount */ 00122 00123 t = ctx->bits[0]; 00124 if ((ctx->bits[0] = t + ((uint32_t) len << 3)) < t) 00125 ctx->bits[1]++; /* Carry from low to high */ 00126 ctx->bits[1] += len >> 29; 00127 00128 t = (t >> 3) & 0x3f; /* Bytes already in shsInfo->data */ 00129 00130 /* Handle any leading odd-sized chunks */ 00131 00132 if (t) { 00133 unsigned char *p = (unsigned char *) ctx->in + t; 00134 00135 t = 64 - t; 00136 if (len < t) { 00137 memcpy(p, buf, len); 00138 return; 00139 } 00140 memcpy(p, buf, t); 00141 byteReverse(ctx->in, 16); 00142 MD5Transform(ctx->buf, (uint32_t *) ctx->in); 00143 buf += t; 00144 len -= t; 00145 } 00146 /* Process data in 64-byte chunks */ 00147 00148 while (len >= 64) { 00149 memcpy(ctx->in, buf, 64); 00150 byteReverse(ctx->in, 16); 00151 MD5Transform(ctx->buf, (uint32_t *) ctx->in); 00152 buf += 64; 00153 len -= 64; 00154 } 00155 00156 /* Handle any remaining bytes of data. */ 00157 00158 memcpy(ctx->in, buf, len); 00159 } 00160 00161 void CHash::MD5Final(unsigned char digest[16], struct MD5Context *ctx) 00162 { 00163 unsigned count; 00164 unsigned char *p; 00165 00166 /* Compute number of bytes mod 64 */ 00167 count = (ctx->bits[0] >> 3) & 0x3F; 00168 00169 /* Set the first char of padding to 0x80. This is safe since there is 00170 always at least one byte free */ 00171 p = ctx->in + count; 00172 *p++ = 0x80; 00173 00174 /* Bytes of padding needed to make 64 bytes */ 00175 count = 64 - 1 - count; 00176 00177 /* Pad out to 56 mod 64 */ 00178 if (count < 8) { 00179 /* Two lots of padding: Pad the first block to 64 bytes */ 00180 memset(p, 0, count); 00181 byteReverse(ctx->in, 16); 00182 MD5Transform(ctx->buf, (uint32_t *) ctx->in); 00183 00184 /* Now fill the next block with 56 bytes */ 00185 memset(ctx->in, 0, 56); 00186 } else { 00187 /* Pad block to 56 bytes */ 00188 memset(p, 0, count - 8); 00189 } 00190 byteReverse(ctx->in, 14); 00191 00192 /* Append length in bits and transform */ 00193 ((uint32_t *) ctx->in)[14] = ctx->bits[0]; 00194 ((uint32_t *) ctx->in)[15] = ctx->bits[1]; 00195 00196 MD5Transform(ctx->buf, (uint32_t *) ctx->in); 00197 byteReverse((unsigned char *) ctx->buf, 4); 00198 memcpy(digest, ctx->buf, 16); 00199 memset(ctx, 0, sizeof(ctx)); /* In case it's sensitive */ 00200 } 00201 00202 #ifndef ASM_MD5 00203 00204 /* The four core functions - F1 is optimized somewhat */ 00205 00206 /* #define F1(x, y, z) (x & y | ~x & z) */ 00207 #define F1(x, y, z) (z ^ (x & (y ^ z))) 00208 #define F2(x, y, z) F1(z, x, y) 00209 #define F3(x, y, z) (x ^ y ^ z) 00210 #define F4(x, y, z) (y ^ (x | ~z)) 00211 00212 /* This is the central step in the MD5 algorithm. */ 00213 #ifdef __PUREC__ 00214 #define MD5STEP(f, w, x, y, z, data, s) \ 00215 ( w += f /*(x, y, z)*/ + data, w = w<<s | w>>(32-s), w += x ) 00216 #else 00217 #define MD5STEP(f, w, x, y, z, data, s) \ 00218 ( w += f(x, y, z) + data, w = w<<s | w>>(32-s), w += x ) 00219 #endif 00220 00221 void CHash::MD5Transform(uint32_t buf[4], uint32_t const in[16]) 00222 { 00223 register uint32_t a, b, c, d; 00224 00225 a = buf[0]; 00226 b = buf[1]; 00227 c = buf[2]; 00228 d = buf[3]; 00229 00230 #ifdef __PUREC__ /* PureC Weirdness... (GG) */ 00231 MD5STEP(F1(b, c, d), a, b, c, d, in[0] + 0xd76aa478L, 7); 00232 MD5STEP(F1(a, b, c), d, a, b, c, in[1] + 0xe8c7b756L, 12); 00233 MD5STEP(F1(d, a, b), c, d, a, b, in[2] + 0x242070dbL, 17); 00234 MD5STEP(F1(c, d, a), b, c, d, a, in[3] + 0xc1bdceeeL, 22); 00235 MD5STEP(F1(b, c, d), a, b, c, d, in[4] + 0xf57c0fafL, 7); 00236 MD5STEP(F1(a, b, c), d, a, b, c, in[5] + 0x4787c62aL, 12); 00237 MD5STEP(F1(d, a, b), c, d, a, b, in[6] + 0xa8304613L, 17); 00238 MD5STEP(F1(c, d, a), b, c, d, a, in[7] + 0xfd469501L, 22); 00239 MD5STEP(F1(b, c, d), a, b, c, d, in[8] + 0x698098d8L, 7); 00240 MD5STEP(F1(a, b, c), d, a, b, c, in[9] + 0x8b44f7afL, 12); 00241 MD5STEP(F1(d, a, b), c, d, a, b, in[10] + 0xffff5bb1L, 17); 00242 MD5STEP(F1(c, d, a), b, c, d, a, in[11] + 0x895cd7beL, 22); 00243 MD5STEP(F1(b, c, d), a, b, c, d, in[12] + 0x6b901122L, 7); 00244 MD5STEP(F1(a, b, c), d, a, b, c, in[13] + 0xfd987193L, 12); 00245 MD5STEP(F1(d, a, b), c, d, a, b, in[14] + 0xa679438eL, 17); 00246 MD5STEP(F1(c, d, a), b, c, d, a, in[15] + 0x49b40821L, 22); 00247 00248 MD5STEP(F2(b, c, d), a, b, c, d, in[1] + 0xf61e2562L, 5); 00249 MD5STEP(F2(a, b, c), d, a, b, c, in[6] + 0xc040b340L, 9); 00250 MD5STEP(F2(d, a, b), c, d, a, b, in[11] + 0x265e5a51L, 14); 00251 MD5STEP(F2(c, d, a), b, c, d, a, in[0] + 0xe9b6c7aaL, 20); 00252 MD5STEP(F2(b, c, d), a, b, c, d, in[5] + 0xd62f105dL, 5); 00253 MD5STEP(F2(a, b, c), d, a, b, c, in[10] + 0x02441453L, 9); 00254 MD5STEP(F2(d, a, b), c, d, a, b, in[15] + 0xd8a1e681L, 14); 00255 MD5STEP(F2(c, d, a), b, c, d, a, in[4] + 0xe7d3fbc8L, 20); 00256 MD5STEP(F2(b, c, d), a, b, c, d, in[9] + 0x21e1cde6L, 5); 00257 MD5STEP(F2(a, b, c), d, a, b, c, in[14] + 0xc33707d6L, 9); 00258 MD5STEP(F2(d, a, b), c, d, a, b, in[3] + 0xf4d50d87L, 14); 00259 MD5STEP(F2(c, d, a), b, c, d, a, in[8] + 0x455a14edL, 20); 00260 MD5STEP(F2(b, c, d), a, b, c, d, in[13] + 0xa9e3e905L, 5); 00261 MD5STEP(F2(a, b, c), d, a, b, c, in[2] + 0xfcefa3f8L, 9); 00262 MD5STEP(F2(d, a, b), c, d, a, b, in[7] + 0x676f02d9L, 14); 00263 MD5STEP(F2(c, d, a), b, c, d, a, in[12] + 0x8d2a4c8aL, 20); 00264 00265 MD5STEP(F3(b, c, d), a, b, c, d, in[5] + 0xfffa3942L, 4); 00266 MD5STEP(F3(a, b, c), d, a, b, c, in[8] + 0x8771f681L, 11); 00267 MD5STEP(F3(d, a, b), c, d, a, b, in[11] + 0x6d9d6122L, 16); 00268 MD5STEP(F3(c, d, a), b, c, d, a, in[14] + 0xfde5380cL, 23); 00269 MD5STEP(F3(b, c, d), a, b, c, d, in[1] + 0xa4beea44L, 4); 00270 MD5STEP(F3(a, b, c), d, a, b, c, in[4] + 0x4bdecfa9L, 11); 00271 MD5STEP(F3(d, a, b), c, d, a, b, in[7] + 0xf6bb4b60L, 16); 00272 MD5STEP(F3(c, d, a), b, c, d, a, in[10] + 0xbebfbc70L, 23); 00273 MD5STEP(F3(b, c, d), a, b, c, d, in[13] + 0x289b7ec6L, 4); 00274 MD5STEP(F3(a, b, c), d, a, b, c, in[0] + 0xeaa127faL, 11); 00275 MD5STEP(F3(d, a, b), c, d, a, b, in[3] + 0xd4ef3085L, 16); 00276 MD5STEP(F3(c, d, a), b, c, d, a, in[6] + 0x04881d05L, 23); 00277 MD5STEP(F3(b, c, d), a, b, c, d, in[9] + 0xd9d4d039L, 4); 00278 MD5STEP(F3(a, b, c), d, a, b, c, in[12] + 0xe6db99e5L, 11); 00279 MD5STEP(F3(d, a, b), c, d, a, b, in[15] + 0x1fa27cf8L, 16); 00280 MD5STEP(F3(c, d, a), b, c, d, a, in[2] + 0xc4ac5665L, 23); 00281 00282 MD5STEP(F4(b, c, d), a, b, c, d, in[0] + 0xf4292244L, 6); 00283 MD5STEP(F4(a, b, c), d, a, b, c, in[7] + 0x432aff97L, 10); 00284 MD5STEP(F4(d, a, b), c, d, a, b, in[14] + 0xab9423a7L, 15); 00285 MD5STEP(F4(c, d, a), b, c, d, a, in[5] + 0xfc93a039L, 21); 00286 MD5STEP(F4(b, c, d), a, b, c, d, in[12] + 0x655b59c3L, 6); 00287 MD5STEP(F4(a, b, c), d, a, b, c, in[3] + 0x8f0ccc92L, 10); 00288 MD5STEP(F4(d, a, b), c, d, a, b, in[10] + 0xffeff47dL, 15); 00289 MD5STEP(F4(c, d, a), b, c, d, a, in[1] + 0x85845dd1L, 21); 00290 MD5STEP(F4(b, c, d), a, b, c, d, in[8] + 0x6fa87e4fL, 6); 00291 MD5STEP(F4(a, b, c), d, a, b, c, in[15] + 0xfe2ce6e0L, 10); 00292 MD5STEP(F4(d, a, b), c, d, a, b, in[6] + 0xa3014314L, 15); 00293 MD5STEP(F4(c, d, a), b, c, d, a, in[13] + 0x4e0811a1L, 21); 00294 MD5STEP(F4(b, c, d), a, b, c, d, in[4] + 0xf7537e82L, 6); 00295 MD5STEP(F4(a, b, c), d, a, b, c, in[11] + 0xbd3af235L, 10); 00296 MD5STEP(F4(d, a, b), c, d, a, b, in[2] + 0x2ad7d2bbL, 15); 00297 MD5STEP(F4(c, d, a), b, c, d, a, in[9] + 0xeb86d391L, 21); 00298 #else 00299 MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); 00300 MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); 00301 MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); 00302 MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); 00303 MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); 00304 MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); 00305 MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); 00306 MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); 00307 MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); 00308 MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); 00309 MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); 00310 MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); 00311 MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); 00312 MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); 00313 MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); 00314 MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); 00315 00316 MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); 00317 MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); 00318 MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); 00319 MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); 00320 MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); 00321 MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); 00322 MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); 00323 MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); 00324 MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); 00325 MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); 00326 MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); 00327 MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); 00328 MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); 00329 MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); 00330 MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); 00331 MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); 00332 00333 MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); 00334 MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); 00335 MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); 00336 MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); 00337 MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); 00338 MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); 00339 MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); 00340 MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); 00341 MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); 00342 MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); 00343 MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); 00344 MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); 00345 MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); 00346 MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); 00347 MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); 00348 MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); 00349 00350 MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); 00351 MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); 00352 MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); 00353 MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); 00354 MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); 00355 MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); 00356 MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); 00357 MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); 00358 MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); 00359 MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); 00360 MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); 00361 MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); 00362 MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); 00363 MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); 00364 MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); 00365 MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); 00366 #endif 00367 00368 buf[0] += a; 00369 buf[1] += b; 00370 buf[2] += c; 00371 buf[3] += d; 00372 } 00373 #endif 00374 00375 uint32_t CHash::MurmurHash2(uint8_t* data, int32_t len, uint32_t seed) 00376 { 00377 // 'm' and 'r' are mixing constants generated offline. 00378 // They're not really 'magic', they just happen to work well. 00379 00380 const uint32_t m = 0x5bd1e995; 00381 const int32_t r = 24; 00382 00383 // Initialize the hash to a 'random' value 00384 00385 uint32_t h = seed ^ len; 00386 00387 // Mix 4 bytes at a time into the hash 00388 00389 while(len >= 4) 00390 { 00391 uint32_t k = *(uint32_t *)data; 00392 00393 k *= m; 00394 k ^= k >> r; 00395 k *= m; 00396 00397 h *= m; 00398 h ^= k; 00399 00400 data += 4; 00401 len -= 4; 00402 } 00403 00404 // Handle the last few bytes of the input array 00405 00406 switch(len) 00407 { 00408 case 3: h ^= data[2] << 16; 00409 case 2: h ^= data[1] << 8; 00410 case 1: h ^= data[0]; 00411 h *= m; 00412 }; 00413 00414 // Do a few final mixes of the hash to ensure the last few 00415 // bytes are well-incorporated. 00416 00417 h ^= h >> 13; 00418 h *= m; 00419 h ^= h >> 15; 00420 00421 return h; 00422 } 00423 00424 uint32_t CHash::IncrementalMurmurHash2(uint8_t data, uint32_t h) 00425 { 00426 // 'm' and 'r' are mixing constants generated offline. 00427 // They're not really 'magic', they just happen to work well. 00428 00429 const uint32_t m = 0x5bd1e995; 00430 00431 h ^= data; 00432 h *= m; 00433 00434 // Do a few final mixes of the hash to ensure the last few 00435 // bytes are well-incorporated. 00436 00437 h ^= h >> 13; 00438 h *= m; 00439 h ^= h >> 15; 00440 00441 return h; 00442 } 00443 00444 uint32_t CHash::MurmurHashString(substring s, uint32_t h) 00445 { 00446 uint32_t ret = 0; 00447 00448 // Trim leading whitespace 00449 for(; *(s.start) <= 0x20 && s.start < s.end; s.start++); 00450 00451 // Trim trailing white space 00452 for(; *(s.end-1) <= 0x20 && s.end > s.start; s.end--); 00453 00454 char *p = s.start; 00455 while (p != s.end) 00456 if (isdigit(*p)) 00457 ret = 10*ret + *(p++) - '0'; 00458 else 00459 return MurmurHash2((uint8_t *)s.start, s.end - s.start, h); 00460 00461 return ret + h; 00462 }