mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* Remove ext files
This commit is contained in:
		
							parent
							
								
									b9016c4633
								
							
						
					
					
						commit
						e5da104bc7
					
				|  | @ -1,523 +0,0 @@ | |||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHash2 was written by Austin Appleby, and is placed in the public
 | ||||
| // domain. The author hereby disclaims copyright to this source code.
 | ||||
| 
 | ||||
| // Note - This code makes a few assumptions about how your machine behaves -
 | ||||
| 
 | ||||
| // 1. We can read a 4-byte value from any address without crashing
 | ||||
| // 2. sizeof(int) == 4
 | ||||
| 
 | ||||
| // And it has a few limitations -
 | ||||
| 
 | ||||
| // 1. It will not work incrementally.
 | ||||
| // 2. It will not produce the same results on little-endian and big-endian
 | ||||
| //    machines.
 | ||||
| 
 | ||||
| #include "MurmurHash2.h" | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // Platform-specific functions and macros
 | ||||
| 
 | ||||
| // Microsoft Visual Studio
 | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| 
 | ||||
| #define BIG_CONSTANT(x) (x) | ||||
| 
 | ||||
| // Other compilers
 | ||||
| 
 | ||||
| #else	// defined(_MSC_VER)
 | ||||
| 
 | ||||
| #define BIG_CONSTANT(x) (x##LLU) | ||||
| 
 | ||||
| #endif // !defined(_MSC_VER)
 | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
| uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) | ||||
| { | ||||
|   // 'm' and 'r' are mixing constants generated offline.
 | ||||
|   // They're not really 'magic', they just happen to work well.
 | ||||
| 
 | ||||
|   const uint32_t m = 0x5bd1e995; | ||||
|   const int r = 24; | ||||
| 
 | ||||
|   // Initialize the hash to a 'random' value
 | ||||
| 
 | ||||
|   uint32_t h = seed ^ len; | ||||
| 
 | ||||
|   // Mix 4 bytes at a time into the hash
 | ||||
| 
 | ||||
|   const unsigned char * data = (const unsigned char *)key; | ||||
| 
 | ||||
|   while(len >= 4) | ||||
|   { | ||||
|     uint32_t k = *(uint32_t*)data; | ||||
| 
 | ||||
|     k *= m; | ||||
|     k ^= k >> r; | ||||
|     k *= m; | ||||
| 
 | ||||
|     h *= m; | ||||
|     h ^= k; | ||||
| 
 | ||||
|     data += 4; | ||||
|     len -= 4; | ||||
|   } | ||||
| 
 | ||||
|   // Handle the last few bytes of the input array
 | ||||
| 
 | ||||
|   switch(len) | ||||
|   { | ||||
|   case 3: h ^= data[2] << 16; | ||||
|   case 2: h ^= data[1] << 8; | ||||
|   case 1: h ^= data[0]; | ||||
|       h *= m; | ||||
|   }; | ||||
| 
 | ||||
|   // Do a few final mixes of the hash to ensure the last few
 | ||||
|   // bytes are well-incorporated.
 | ||||
| 
 | ||||
|   h ^= h >> 13; | ||||
|   h *= m; | ||||
|   h ^= h >> 15; | ||||
| 
 | ||||
|   return h; | ||||
| }  | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHash2, 64-bit versions, by Austin Appleby
 | ||||
| 
 | ||||
| // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 
 | ||||
| // and endian-ness issues if used across multiple platforms.
 | ||||
| 
 | ||||
| // 64-bit hash for 64-bit platforms
 | ||||
| 
 | ||||
| uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ) | ||||
| { | ||||
|   const uint64_t m = BIG_CONSTANT(0xc6a4a7935bd1e995); | ||||
|   const int r = 47; | ||||
| 
 | ||||
|   uint64_t h = seed ^ (len * m); | ||||
| 
 | ||||
|   const uint64_t * data = (const uint64_t *)key; | ||||
|   const uint64_t * end = data + (len/8); | ||||
| 
 | ||||
|   while(data != end) | ||||
|   { | ||||
|     uint64_t k = *data++; | ||||
| 
 | ||||
|     k *= m;  | ||||
|     k ^= k >> r;  | ||||
|     k *= m;  | ||||
|      | ||||
|     h ^= k; | ||||
|     h *= m;  | ||||
|   } | ||||
| 
 | ||||
|   const unsigned char * data2 = (const unsigned char*)data; | ||||
| 
 | ||||
|   switch(len & 7) | ||||
|   { | ||||
|   case 7: h ^= uint64_t(data2[6]) << 48; | ||||
|   case 6: h ^= uint64_t(data2[5]) << 40; | ||||
|   case 5: h ^= uint64_t(data2[4]) << 32; | ||||
|   case 4: h ^= uint64_t(data2[3]) << 24; | ||||
|   case 3: h ^= uint64_t(data2[2]) << 16; | ||||
|   case 2: h ^= uint64_t(data2[1]) << 8; | ||||
|   case 1: h ^= uint64_t(data2[0]); | ||||
|           h *= m; | ||||
|   }; | ||||
|   | ||||
|   h ^= h >> r; | ||||
|   h *= m; | ||||
|   h ^= h >> r; | ||||
| 
 | ||||
|   return h; | ||||
| }  | ||||
| 
 | ||||
| 
 | ||||
| // 64-bit hash for 32-bit platforms
 | ||||
| 
 | ||||
| uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ) | ||||
| { | ||||
|   const uint32_t m = 0x5bd1e995; | ||||
|   const int r = 24; | ||||
| 
 | ||||
|   uint32_t h1 = uint32_t(seed) ^ len; | ||||
|   uint32_t h2 = uint32_t(seed >> 32); | ||||
| 
 | ||||
|   const uint32_t * data = (const uint32_t *)key; | ||||
| 
 | ||||
|   while(len >= 8) | ||||
|   { | ||||
|     uint32_t k1 = *data++; | ||||
|     k1 *= m; k1 ^= k1 >> r; k1 *= m; | ||||
|     h1 *= m; h1 ^= k1; | ||||
|     len -= 4; | ||||
| 
 | ||||
|     uint32_t k2 = *data++; | ||||
|     k2 *= m; k2 ^= k2 >> r; k2 *= m; | ||||
|     h2 *= m; h2 ^= k2; | ||||
|     len -= 4; | ||||
|   } | ||||
| 
 | ||||
|   if(len >= 4) | ||||
|   { | ||||
|     uint32_t k1 = *data++; | ||||
|     k1 *= m; k1 ^= k1 >> r; k1 *= m; | ||||
|     h1 *= m; h1 ^= k1; | ||||
|     len -= 4; | ||||
|   } | ||||
| 
 | ||||
|   switch(len) | ||||
|   { | ||||
|   case 3: h2 ^= ((unsigned char*)data)[2] << 16; | ||||
|   case 2: h2 ^= ((unsigned char*)data)[1] << 8; | ||||
|   case 1: h2 ^= ((unsigned char*)data)[0]; | ||||
|       h2 *= m; | ||||
|   }; | ||||
| 
 | ||||
|   h1 ^= h2 >> 18; h1 *= m; | ||||
|   h2 ^= h1 >> 22; h2 *= m; | ||||
|   h1 ^= h2 >> 17; h1 *= m; | ||||
|   h2 ^= h1 >> 19; h2 *= m; | ||||
| 
 | ||||
|   uint64_t h = h1; | ||||
| 
 | ||||
|   h = (h << 32) | h2; | ||||
| 
 | ||||
|   return h; | ||||
| }  | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHash2A, by Austin Appleby
 | ||||
| 
 | ||||
| // This is a variant of MurmurHash2 modified to use the Merkle-Damgard 
 | ||||
| // construction. Bulk speed should be identical to Murmur2, small-key speed 
 | ||||
| // will be 10%-20% slower due to the added overhead at the end of the hash.
 | ||||
| 
 | ||||
| // This variant fixes a minor issue where null keys were more likely to
 | ||||
| // collide with each other than expected, and also makes the function
 | ||||
| // more amenable to incremental implementations.
 | ||||
| 
 | ||||
| #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } | ||||
| 
 | ||||
| uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ) | ||||
| { | ||||
|   const uint32_t m = 0x5bd1e995; | ||||
|   const int r = 24; | ||||
|   uint32_t l = len; | ||||
| 
 | ||||
|   const unsigned char * data = (const unsigned char *)key; | ||||
| 
 | ||||
|   uint32_t h = seed; | ||||
| 
 | ||||
|   while(len >= 4) | ||||
|   { | ||||
|     uint32_t k = *(uint32_t*)data; | ||||
| 
 | ||||
|     mmix(h,k); | ||||
| 
 | ||||
|     data += 4; | ||||
|     len -= 4; | ||||
|   } | ||||
| 
 | ||||
|   uint32_t t = 0; | ||||
| 
 | ||||
|   switch(len) | ||||
|   { | ||||
|   case 3: t ^= data[2] << 16; | ||||
|   case 2: t ^= data[1] << 8; | ||||
|   case 1: t ^= data[0]; | ||||
|   }; | ||||
| 
 | ||||
|   mmix(h,t); | ||||
|   mmix(h,l); | ||||
| 
 | ||||
|   h ^= h >> 13; | ||||
|   h *= m; | ||||
|   h ^= h >> 15; | ||||
| 
 | ||||
|   return h; | ||||
| } | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // CMurmurHash2A, by Austin Appleby
 | ||||
| 
 | ||||
| // This is a sample implementation of MurmurHash2A designed to work 
 | ||||
| // incrementally.
 | ||||
| 
 | ||||
| // Usage - 
 | ||||
| 
 | ||||
| // CMurmurHash2A hasher
 | ||||
| // hasher.Begin(seed);
 | ||||
| // hasher.Add(data1,size1);
 | ||||
| // hasher.Add(data2,size2);
 | ||||
| // ...
 | ||||
| // hasher.Add(dataN,sizeN);
 | ||||
| // uint32_t hash = hasher.End()
 | ||||
| 
 | ||||
| class CMurmurHash2A | ||||
| { | ||||
| public: | ||||
| 
 | ||||
|   void Begin ( uint32_t seed = 0 ) | ||||
|   { | ||||
|     m_hash  = seed; | ||||
|     m_tail  = 0; | ||||
|     m_count = 0; | ||||
|     m_size  = 0; | ||||
|   } | ||||
| 
 | ||||
|   void Add ( const unsigned char * data, int len ) | ||||
|   { | ||||
|     m_size += len; | ||||
| 
 | ||||
|     MixTail(data,len); | ||||
| 
 | ||||
|     while(len >= 4) | ||||
|     { | ||||
|       uint32_t k = *(uint32_t*)data; | ||||
| 
 | ||||
|       mmix(m_hash,k); | ||||
| 
 | ||||
|       data += 4; | ||||
|       len -= 4; | ||||
|     } | ||||
| 
 | ||||
|     MixTail(data,len); | ||||
|   } | ||||
| 
 | ||||
|   uint32_t End ( void ) | ||||
|   { | ||||
|     mmix(m_hash,m_tail); | ||||
|     mmix(m_hash,m_size); | ||||
| 
 | ||||
|     m_hash ^= m_hash >> 13; | ||||
|     m_hash *= m; | ||||
|     m_hash ^= m_hash >> 15; | ||||
| 
 | ||||
|     return m_hash; | ||||
|   } | ||||
| 
 | ||||
| private: | ||||
| 
 | ||||
|   static const uint32_t m = 0x5bd1e995; | ||||
|   static const int r = 24; | ||||
| 
 | ||||
|   void MixTail ( const unsigned char * & data, int & len ) | ||||
|   { | ||||
|     while( len && ((len<4) || m_count) ) | ||||
|     { | ||||
|       m_tail |= (*data++) << (m_count * 8); | ||||
| 
 | ||||
|       m_count++; | ||||
|       len--; | ||||
| 
 | ||||
|       if(m_count == 4) | ||||
|       { | ||||
|         mmix(m_hash,m_tail); | ||||
|         m_tail = 0; | ||||
|         m_count = 0; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   uint32_t m_hash; | ||||
|   uint32_t m_tail; | ||||
|   uint32_t m_count; | ||||
|   uint32_t m_size; | ||||
| }; | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHashNeutral2, by Austin Appleby
 | ||||
| 
 | ||||
| // Same as MurmurHash2, but endian- and alignment-neutral.
 | ||||
| // Half the speed though, alas.
 | ||||
| 
 | ||||
| uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ) | ||||
| { | ||||
|   const uint32_t m = 0x5bd1e995; | ||||
|   const int r = 24; | ||||
| 
 | ||||
|   uint32_t h = seed ^ len; | ||||
| 
 | ||||
|   const unsigned char * data = (const unsigned char *)key; | ||||
| 
 | ||||
|   while(len >= 4) | ||||
|   { | ||||
|     uint32_t k; | ||||
| 
 | ||||
|     k  = data[0]; | ||||
|     k |= data[1] << 8; | ||||
|     k |= data[2] << 16; | ||||
|     k |= data[3] << 24; | ||||
| 
 | ||||
|     k *= m;  | ||||
|     k ^= k >> r;  | ||||
|     k *= m; | ||||
| 
 | ||||
|     h *= m; | ||||
|     h ^= k; | ||||
| 
 | ||||
|     data += 4; | ||||
|     len -= 4; | ||||
|   } | ||||
|    | ||||
|   switch(len) | ||||
|   { | ||||
|   case 3: h ^= data[2] << 16; | ||||
|   case 2: h ^= data[1] << 8; | ||||
|   case 1: h ^= data[0]; | ||||
|           h *= m; | ||||
|   }; | ||||
| 
 | ||||
|   h ^= h >> 13; | ||||
|   h *= m; | ||||
|   h ^= h >> 15; | ||||
| 
 | ||||
|   return h; | ||||
| }  | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHashAligned2, by Austin Appleby
 | ||||
| 
 | ||||
| // Same algorithm as MurmurHash2, but only does aligned reads - should be safer
 | ||||
| // on certain platforms. 
 | ||||
| 
 | ||||
| // Performance will be lower than MurmurHash2
 | ||||
| 
 | ||||
| #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } | ||||
| 
 | ||||
| 
 | ||||
| uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ) | ||||
| { | ||||
|   const uint32_t m = 0x5bd1e995; | ||||
|   const int r = 24; | ||||
| 
 | ||||
|   const unsigned char * data = (const unsigned char *)key; | ||||
| 
 | ||||
|   uint32_t h = seed ^ len; | ||||
| 
 | ||||
|   int align = (uint64_t)data & 3; | ||||
| 
 | ||||
|   if(align && (len >= 4)) | ||||
|   { | ||||
|     // Pre-load the temp registers
 | ||||
| 
 | ||||
|     uint32_t t = 0, d = 0; | ||||
| 
 | ||||
|     switch(align) | ||||
|     { | ||||
|       case 1: t |= data[2] << 16; | ||||
|       case 2: t |= data[1] << 8; | ||||
|       case 3: t |= data[0]; | ||||
|     } | ||||
| 
 | ||||
|     t <<= (8 * align); | ||||
| 
 | ||||
|     data += 4-align; | ||||
|     len -= 4-align; | ||||
| 
 | ||||
|     int sl = 8 * (4-align); | ||||
|     int sr = 8 * align; | ||||
| 
 | ||||
|     // Mix
 | ||||
| 
 | ||||
|     while(len >= 4) | ||||
|     { | ||||
|       d = *(uint32_t *)data; | ||||
|       t = (t >> sr) | (d << sl); | ||||
| 
 | ||||
|       uint32_t k = t; | ||||
| 
 | ||||
|       MIX(h,k,m); | ||||
| 
 | ||||
|       t = d; | ||||
| 
 | ||||
|       data += 4; | ||||
|       len -= 4; | ||||
|     } | ||||
| 
 | ||||
|     // Handle leftover data in temp registers
 | ||||
| 
 | ||||
|     d = 0; | ||||
| 
 | ||||
|     if(len >= align) | ||||
|     { | ||||
|       switch(align) | ||||
|       { | ||||
|       case 3: d |= data[2] << 16; | ||||
|       case 2: d |= data[1] << 8; | ||||
|       case 1: d |= data[0]; | ||||
|       } | ||||
| 
 | ||||
|       uint32_t k = (t >> sr) | (d << sl); | ||||
|       MIX(h,k,m); | ||||
| 
 | ||||
|       data += align; | ||||
|       len -= align; | ||||
| 
 | ||||
|       //----------
 | ||||
|       // Handle tail bytes
 | ||||
| 
 | ||||
|       switch(len) | ||||
|       { | ||||
|       case 3: h ^= data[2] << 16; | ||||
|       case 2: h ^= data[1] << 8; | ||||
|       case 1: h ^= data[0]; | ||||
|           h *= m; | ||||
|       }; | ||||
|     } | ||||
|     else | ||||
|     { | ||||
|       switch(len) | ||||
|       { | ||||
|       case 3: d |= data[2] << 16; | ||||
|       case 2: d |= data[1] << 8; | ||||
|       case 1: d |= data[0]; | ||||
|       case 0: h ^= (t >> sr) | (d << sl); | ||||
|           h *= m; | ||||
|       } | ||||
|     } | ||||
| 
 | ||||
|     h ^= h >> 13; | ||||
|     h *= m; | ||||
|     h ^= h >> 15; | ||||
| 
 | ||||
|     return h; | ||||
|   } | ||||
|   else | ||||
|   { | ||||
|     while(len >= 4) | ||||
|     { | ||||
|       uint32_t k = *(uint32_t *)data; | ||||
| 
 | ||||
|       MIX(h,k,m); | ||||
| 
 | ||||
|       data += 4; | ||||
|       len -= 4; | ||||
|     } | ||||
| 
 | ||||
|     //----------
 | ||||
|     // Handle tail bytes
 | ||||
| 
 | ||||
|     switch(len) | ||||
|     { | ||||
|     case 3: h ^= data[2] << 16; | ||||
|     case 2: h ^= data[1] << 8; | ||||
|     case 1: h ^= data[0]; | ||||
|         h *= m; | ||||
|     }; | ||||
| 
 | ||||
|     h ^= h >> 13; | ||||
|     h *= m; | ||||
|     h ^= h >> 15; | ||||
| 
 | ||||
|     return h; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
|  | @ -1,39 +0,0 @@ | |||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHash2 was written by Austin Appleby, and is placed in the public
 | ||||
| // domain. The author hereby disclaims copyright to this source code.
 | ||||
| 
 | ||||
| #ifndef _MURMURHASH2_H_ | ||||
| #define _MURMURHASH2_H_ | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // Platform-specific functions and macros
 | ||||
| 
 | ||||
| // Microsoft Visual Studio
 | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| 
 | ||||
| typedef unsigned char uint8_t; | ||||
| typedef unsigned long uint32_t; | ||||
| typedef unsigned __int64 uint64_t; | ||||
| 
 | ||||
| // Other compilers
 | ||||
| 
 | ||||
| #else	// defined(_MSC_VER)
 | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| 
 | ||||
| #endif // !defined(_MSC_VER)
 | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
| uint32_t MurmurHash2        ( const void * key, int len, uint32_t seed ); | ||||
| uint64_t MurmurHash64A      ( const void * key, int len, uint64_t seed ); | ||||
| uint64_t MurmurHash64B      ( const void * key, int len, uint64_t seed ); | ||||
| uint32_t MurmurHash2A       ( const void * key, int len, uint32_t seed ); | ||||
| uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ); | ||||
| uint32_t MurmurHashAligned2 ( const void * key, int len, uint32_t seed ); | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
| #endif // _MURMURHASH2_H_
 | ||||
| 
 | ||||
|  | @ -1,346 +0,0 @@ | |||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHash3 was written by Austin Appleby, and is placed in the public
 | ||||
| // domain. The author hereby disclaims copyright to this source code.
 | ||||
| 
 | ||||
| // Note - The x86 and x64 versions do _not_ produce the same results, as the
 | ||||
| // algorithms are optimized for their respective platforms. You can still
 | ||||
| // compile and run any of them on any platform, but your performance with the
 | ||||
| // non-native version will be less than optimal.
 | ||||
| 
 | ||||
| #include "MurmurHash3.h" | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // Platform-specific functions and macros
 | ||||
| 
 | ||||
| // Microsoft Visual Studio
 | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| 
 | ||||
| #define FORCE_INLINE	__forceinline | ||||
| 
 | ||||
| #include <stdlib.h> | ||||
| 
 | ||||
| #define ROTL32(x,y)	_rotl(x,y) | ||||
| #define ROTL64(x,y)	_rotl64(x,y) | ||||
| 
 | ||||
| #define BIG_CONSTANT(x) (x) | ||||
| 
 | ||||
| // Other compilers
 | ||||
| 
 | ||||
| #else	// defined(_MSC_VER)
 | ||||
| 
 | ||||
| #if defined(GNUC) && ((GNUC > 4) || (GNUC == 4 && GNUC_MINOR >= 4)) | ||||
| 
 | ||||
| /* gcc version >= 4.4 4.1 = RHEL 5, 4.4 = RHEL 6.
 | ||||
|  * Don't inline for RHEL 5 gcc which is 4.1 */ | ||||
| #define FORCE_INLINE attribute((always_inline)) | ||||
| 
 | ||||
| #else | ||||
| 
 | ||||
| #define FORCE_INLINE | ||||
| 
 | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| inline uint32_t rotl32 ( uint32_t x, int8_t r ) | ||||
| { | ||||
|   return (x << r) | (x >> (32 - r)); | ||||
| } | ||||
| 
 | ||||
| inline uint64_t rotl64 ( uint64_t x, int8_t r ) | ||||
| { | ||||
|   return (x << r) | (x >> (64 - r)); | ||||
| } | ||||
| 
 | ||||
| #define	ROTL32(x,y)	rotl32(x,y) | ||||
| #define ROTL64(x,y)	rotl64(x,y) | ||||
| 
 | ||||
| #define BIG_CONSTANT(x) (x##LLU) | ||||
| 
 | ||||
| #endif // !defined(_MSC_VER)
 | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // Block read - if your platform needs to do endian-swapping or can only
 | ||||
| // handle aligned reads, do the conversion here
 | ||||
| 
 | ||||
| FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i ) | ||||
| { | ||||
|   return p[i]; | ||||
| } | ||||
| 
 | ||||
| FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i ) | ||||
| { | ||||
|   return p[i]; | ||||
| } | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // Finalization mix - force all bits of a hash block to avalanche
 | ||||
| 
 | ||||
| FORCE_INLINE uint32_t fmix ( uint32_t h ) | ||||
| { | ||||
|   h ^= h >> 16; | ||||
|   h *= 0x85ebca6b; | ||||
|   h ^= h >> 13; | ||||
|   h *= 0xc2b2ae35; | ||||
|   h ^= h >> 16; | ||||
| 
 | ||||
|   return h; | ||||
| } | ||||
| 
 | ||||
| //----------
 | ||||
| 
 | ||||
| FORCE_INLINE uint64_t fmix ( uint64_t k ) | ||||
| { | ||||
|   k ^= k >> 33; | ||||
|   k *= BIG_CONSTANT(0xff51afd7ed558ccd); | ||||
|   k ^= k >> 33; | ||||
|   k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53); | ||||
|   k ^= k >> 33; | ||||
| 
 | ||||
|   return k; | ||||
| } | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
| void MurmurHash3_x86_32 ( const void * key, int len, | ||||
|                           uint32_t seed, void * out ) | ||||
| { | ||||
|   const uint8_t * data = (const uint8_t*)key; | ||||
|   const int nblocks = len / 4; | ||||
| 
 | ||||
|   uint32_t h1 = seed; | ||||
| 
 | ||||
|   uint32_t c1 = 0xcc9e2d51; | ||||
|   uint32_t c2 = 0x1b873593; | ||||
| 
 | ||||
|   //----------
 | ||||
|   // body
 | ||||
| 
 | ||||
|   const uint32_t * blocks = (const uint32_t *)(data + nblocks*4); | ||||
| 
 | ||||
|   for(int i = -nblocks; i; i++) | ||||
|   { | ||||
|     uint32_t k1 = getblock(blocks,i); | ||||
| 
 | ||||
|     k1 *= c1; | ||||
|     k1 = ROTL32(k1,15); | ||||
|     k1 *= c2; | ||||
| 
 | ||||
|     h1 ^= k1; | ||||
|     h1 = ROTL32(h1,13); | ||||
|     h1 = h1*5+0xe6546b64; | ||||
|   } | ||||
| 
 | ||||
|   //----------
 | ||||
|   // tail
 | ||||
| 
 | ||||
|   const uint8_t * tail = (const uint8_t*)(data + nblocks*4); | ||||
| 
 | ||||
|   uint32_t k1 = 0; | ||||
| 
 | ||||
|   switch(len & 3) | ||||
|   { | ||||
|   case 3: k1 ^= tail[2] << 16; | ||||
|   case 2: k1 ^= tail[1] << 8; | ||||
|   case 1: k1 ^= tail[0]; | ||||
|           k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1; | ||||
|   }; | ||||
| 
 | ||||
|   //----------
 | ||||
|   // finalization
 | ||||
| 
 | ||||
|   h1 ^= len; | ||||
| 
 | ||||
|   h1 = fmix(h1); | ||||
| 
 | ||||
|   *(uint32_t*)out = h1; | ||||
| } | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
| void MurmurHash3_x86_128 ( const void * key, const int len, | ||||
|                            uint32_t seed, void * out ) | ||||
| { | ||||
|   const uint8_t * data = (const uint8_t*)key; | ||||
|   const int nblocks = len / 16; | ||||
| 
 | ||||
|   uint32_t h1 = seed; | ||||
|   uint32_t h2 = seed; | ||||
|   uint32_t h3 = seed; | ||||
|   uint32_t h4 = seed; | ||||
| 
 | ||||
|   uint32_t c1 = 0x239b961b; | ||||
|   uint32_t c2 = 0xab0e9789; | ||||
|   uint32_t c3 = 0x38b34ae5; | ||||
|   uint32_t c4 = 0xa1e38b93; | ||||
| 
 | ||||
|   //----------
 | ||||
|   // body
 | ||||
| 
 | ||||
|   const uint32_t * blocks = (const uint32_t *)(data + nblocks*16); | ||||
| 
 | ||||
|   for(int i = -nblocks; i; i++) | ||||
|   { | ||||
|     uint32_t k1 = getblock(blocks,i*4+0); | ||||
|     uint32_t k2 = getblock(blocks,i*4+1); | ||||
|     uint32_t k3 = getblock(blocks,i*4+2); | ||||
|     uint32_t k4 = getblock(blocks,i*4+3); | ||||
| 
 | ||||
|     k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1; | ||||
| 
 | ||||
|     h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b; | ||||
| 
 | ||||
|     k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2; | ||||
| 
 | ||||
|     h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747; | ||||
| 
 | ||||
|     k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3; | ||||
| 
 | ||||
|     h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35; | ||||
| 
 | ||||
|     k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4; | ||||
| 
 | ||||
|     h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17; | ||||
|   } | ||||
| 
 | ||||
|   //----------
 | ||||
|   // tail
 | ||||
| 
 | ||||
|   const uint8_t * tail = (const uint8_t*)(data + nblocks*16); | ||||
| 
 | ||||
|   uint32_t k1 = 0; | ||||
|   uint32_t k2 = 0; | ||||
|   uint32_t k3 = 0; | ||||
|   uint32_t k4 = 0; | ||||
| 
 | ||||
|   switch(len & 15) | ||||
|   { | ||||
|   case 15: k4 ^= tail[14] << 16; | ||||
|   case 14: k4 ^= tail[13] << 8; | ||||
|   case 13: k4 ^= tail[12] << 0; | ||||
|            k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4; | ||||
| 
 | ||||
|   case 12: k3 ^= tail[11] << 24; | ||||
|   case 11: k3 ^= tail[10] << 16; | ||||
|   case 10: k3 ^= tail[ 9] << 8; | ||||
|   case  9: k3 ^= tail[ 8] << 0; | ||||
|            k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3; | ||||
| 
 | ||||
|   case  8: k2 ^= tail[ 7] << 24; | ||||
|   case  7: k2 ^= tail[ 6] << 16; | ||||
|   case  6: k2 ^= tail[ 5] << 8; | ||||
|   case  5: k2 ^= tail[ 4] << 0; | ||||
|            k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2; | ||||
| 
 | ||||
|   case  4: k1 ^= tail[ 3] << 24; | ||||
|   case  3: k1 ^= tail[ 2] << 16; | ||||
|   case  2: k1 ^= tail[ 1] << 8; | ||||
|   case  1: k1 ^= tail[ 0] << 0; | ||||
|            k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1; | ||||
|   }; | ||||
| 
 | ||||
|   //----------
 | ||||
|   // finalization
 | ||||
| 
 | ||||
|   h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len; | ||||
| 
 | ||||
|   h1 += h2; h1 += h3; h1 += h4; | ||||
|   h2 += h1; h3 += h1; h4 += h1; | ||||
| 
 | ||||
|   h1 = fmix(h1); | ||||
|   h2 = fmix(h2); | ||||
|   h3 = fmix(h3); | ||||
|   h4 = fmix(h4); | ||||
| 
 | ||||
|   h1 += h2; h1 += h3; h1 += h4; | ||||
|   h2 += h1; h3 += h1; h4 += h1; | ||||
| 
 | ||||
|   ((uint32_t*)out)[0] = h1; | ||||
|   ((uint32_t*)out)[1] = h2; | ||||
|   ((uint32_t*)out)[2] = h3; | ||||
|   ((uint32_t*)out)[3] = h4; | ||||
| } | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
| void MurmurHash3_x64_128 ( const void * key, const int len, | ||||
|                            const uint32_t seed, void * out ) | ||||
| { | ||||
|   const uint8_t * data = (const uint8_t*)key; | ||||
|   const int nblocks = len / 16; | ||||
| 
 | ||||
|   uint64_t h1 = seed; | ||||
|   uint64_t h2 = seed; | ||||
| 
 | ||||
|   uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5); | ||||
|   uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f); | ||||
| 
 | ||||
|   //----------
 | ||||
|   // body
 | ||||
| 
 | ||||
|   const uint64_t * blocks = (const uint64_t *)(data); | ||||
| 
 | ||||
|   for(int i = 0; i < nblocks; i++) | ||||
|   { | ||||
|     uint64_t k1 = getblock(blocks,i*2+0); | ||||
|     uint64_t k2 = getblock(blocks,i*2+1); | ||||
| 
 | ||||
|     k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1; | ||||
| 
 | ||||
|     h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729; | ||||
| 
 | ||||
|     k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2; | ||||
| 
 | ||||
|     h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5; | ||||
|   } | ||||
| 
 | ||||
|   //----------
 | ||||
|   // tail
 | ||||
| 
 | ||||
|   const uint8_t * tail = (const uint8_t*)(data + nblocks*16); | ||||
| 
 | ||||
|   uint64_t k1 = 0; | ||||
|   uint64_t k2 = 0; | ||||
| 
 | ||||
|   switch(len & 15) | ||||
|   { | ||||
|   case 15: k2 ^= uint64_t(tail[14]) << 48; | ||||
|   case 14: k2 ^= uint64_t(tail[13]) << 40; | ||||
|   case 13: k2 ^= uint64_t(tail[12]) << 32; | ||||
|   case 12: k2 ^= uint64_t(tail[11]) << 24; | ||||
|   case 11: k2 ^= uint64_t(tail[10]) << 16; | ||||
|   case 10: k2 ^= uint64_t(tail[ 9]) << 8; | ||||
|   case  9: k2 ^= uint64_t(tail[ 8]) << 0; | ||||
|            k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2; | ||||
| 
 | ||||
|   case  8: k1 ^= uint64_t(tail[ 7]) << 56; | ||||
|   case  7: k1 ^= uint64_t(tail[ 6]) << 48; | ||||
|   case  6: k1 ^= uint64_t(tail[ 5]) << 40; | ||||
|   case  5: k1 ^= uint64_t(tail[ 4]) << 32; | ||||
|   case  4: k1 ^= uint64_t(tail[ 3]) << 24; | ||||
|   case  3: k1 ^= uint64_t(tail[ 2]) << 16; | ||||
|   case  2: k1 ^= uint64_t(tail[ 1]) << 8; | ||||
|   case  1: k1 ^= uint64_t(tail[ 0]) << 0; | ||||
|            k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1; | ||||
|   }; | ||||
| 
 | ||||
|   //----------
 | ||||
|   // finalization
 | ||||
| 
 | ||||
|   h1 ^= len; h2 ^= len; | ||||
| 
 | ||||
|   h1 += h2; | ||||
|   h2 += h1; | ||||
| 
 | ||||
|   h1 = fmix(h1); | ||||
|   h2 = fmix(h2); | ||||
| 
 | ||||
|   h1 += h2; | ||||
|   h2 += h1; | ||||
| 
 | ||||
|   ((uint64_t*)out)[0] = h1; | ||||
|   ((uint64_t*)out)[1] = h2; | ||||
| } | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
|  | @ -1,45 +0,0 @@ | |||
| //-----------------------------------------------------------------------------
 | ||||
| // MurmurHash3 was written by Austin Appleby, and is placed in the public
 | ||||
| // domain. The author hereby disclaims copyright to this source code.
 | ||||
| 
 | ||||
| #ifndef _MURMURHASH3_H_ | ||||
| #define _MURMURHASH3_H_ | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| // Platform-specific functions and macros
 | ||||
| 
 | ||||
| // Microsoft Visual Studio
 | ||||
| 
 | ||||
| #if defined(_MSC_VER) | ||||
| 
 | ||||
| typedef unsigned char uint8_t; | ||||
| typedef unsigned long uint32_t; | ||||
| typedef unsigned __int64 uint64_t; | ||||
| 
 | ||||
| // Other compilers
 | ||||
| 
 | ||||
| #else	// defined(_MSC_VER)
 | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| 
 | ||||
| #endif // !defined(_MSC_VER)
 | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| #ifdef __cplusplus | ||||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| 
 | ||||
| void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out ); | ||||
| 
 | ||||
| void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out ); | ||||
| 
 | ||||
| void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out ); | ||||
| 
 | ||||
| #ifdef __cplusplus | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
| //-----------------------------------------------------------------------------
 | ||||
| 
 | ||||
| #endif // _MURMURHASH3_H_
 | ||||
|  | @ -1,13 +0,0 @@ | |||
| # cython profile=True | ||||
| 
 | ||||
| from libc.stdint cimport uint64_t, int64_t | ||||
| 
 | ||||
| 
 | ||||
| cdef extern from "../include/MurmurHash3.h": | ||||
|     void MurmurHash3_x86_32(void * key, uint64_t len, uint64_t seed, void* out) nogil | ||||
|     void MurmurHash3_x86_128(void * key, uint64_t len, uint64_t seed, void* out) nogil | ||||
| 
 | ||||
| 
 | ||||
| cdef extern from "../include/MurmurHash2.h": | ||||
|     uint64_t MurmurHash64A(void * key, uint64_t len, int64_t seed) nogil | ||||
|     uint64_t MurmurHash64B(void * key, uint64_t len, int64_t seed) nogil | ||||
|  | @ -1 +0,0 @@ | |||
| # cython: profile=True | ||||
|  | @ -1,48 +0,0 @@ | |||
| from libcpp.utility cimport pair | ||||
| from libcpp.vector cimport vector | ||||
| from libc.stdint cimport uint64_t, int64_t | ||||
| 
 | ||||
| 
 | ||||
| cdef extern from "sparsehash/dense_hash_map" namespace "google": | ||||
|     cdef cppclass dense_hash_map[K, D]: | ||||
|         K& key_type | ||||
|         D& data_type | ||||
|         pair[K, D]& value_type | ||||
|         uint64_t size_type | ||||
|         cppclass iterator: | ||||
|             pair[K, D]& operator*() nogil | ||||
|             iterator operator++() nogil | ||||
|             iterator operator--() nogil | ||||
|             bint operator==(iterator) nogil | ||||
|             bint operator!=(iterator) nogil | ||||
|         iterator begin() | ||||
|         iterator end() | ||||
|         uint64_t size() | ||||
|         uint64_t max_size() | ||||
|         bint empty() | ||||
|         uint64_t bucket_count() | ||||
|         uint64_t bucket_size(uint64_t i) | ||||
|         uint64_t bucket(K& key) | ||||
|         double max_load_factor() | ||||
|         void max_load_vactor(double new_grow) | ||||
|         double min_load_factor() | ||||
|         double min_load_factor(double new_grow) | ||||
|         void set_resizing_parameters(double shrink, double grow) | ||||
|         void resize(uint64_t n) | ||||
|         void rehash(uint64_t n) | ||||
|         dense_hash_map() | ||||
|         dense_hash_map(uint64_t n) | ||||
|         void swap(dense_hash_map&) | ||||
|         pair[iterator, bint] insert(pair[K, D]) nogil | ||||
|         void set_empty_key(K&) | ||||
|         void set_deleted_key(K& key) | ||||
|         void clear_deleted_key() | ||||
|         void erase(iterator pos) | ||||
|         uint64_t erase(K& k) | ||||
|         void erase(iterator first, iterator last) | ||||
|         void clear() | ||||
|         void clear_no_resize() | ||||
|         pair[iterator, iterator] equal_range(K& k) | ||||
|         D& operator[](K&) nogil | ||||
| 
 | ||||
| 
 | ||||
|  | @ -1 +0,0 @@ | |||
| # cython profile=True | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user