Hashing Functions

Some Hashing Functions



////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
unsigned ConstantHash(const char *Key, unsigned TableSize)
{
  return 1;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
unsigned ReflexiveHash(const char *Key, unsigned TableSize)
{
  return StrToInt(Key) % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
unsigned PJWHash(const char *Key, unsigned TableSize)
{
    // Initial value of hash
  unsigned hash = 0;

    // Process each char in the string
  while (*Key)
  {
      // Shift hash left 4
    hash = (hash << 4);

      // Add in current char
    hash = hash + (*Key);

      // Get the four high-order bits
    unsigned bits = hash & 0xF0000000;

      // If any of the four bits are non-zero,
    if (bits)
    {
        // Shift the four bits right 24 positions (...bbbb0000)
        // and XOR them back in to the hash
      hash = hash ^ (bits >> 24);

        // Now, XOR the four bits back in
      hash = hash ^ bits;
    }

      // Next char
    Key++;
  }

    // Modulo so hash is within 0 - TableSize
  return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
unsigned SimpleHash(const char *Key, unsigned TableSize)
{
    // Initial value of hash
  unsigned hash = 0;

    // Process each char in the string
  while (*Key)
  {
      // Add in current char
    hash += *Key;

      // Next char
    Key++;
  }

    // Modulo so hash is within the table
  return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
unsigned RSHash(const char *Key, unsigned TableSize)
{
  unsigned hash = 0;         // Initial value of hash
  unsigned multiplier = 127; // Prevent anomalies

    // Process each char in the string
  while (*Key)
  {
      // Adjust hash total
    hash = hash * multiplier;

      // Add in current char and mod result
    hash = (hash + *Key) % TableSize;

      // Next char
    Key++;
  }

    // Hash is within 0 - TableSize
  return hash;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Same as above but performing the modulo once at the end
unsigned RSHashMod(const char *Key, unsigned TableSize)
{
  unsigned hash = 0;         // Initial value of hash
  unsigned multiplier = 127; // Prevent anomalies

    // Process each char in the string
  while (*Key)
  {
      // Adjust hash total
    hash = hash * multiplier;

      // Add in current char and mod result
    hash = hash + *Key;

      // Next char
    Key++;
  }

    // Hash is within 0 - TableSize
  return hash % TableSize;;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
unsigned UHash(const char *Key, unsigned TableSize)
{
  unsigned hash = 0;       // Initial value of hash
  unsigned rand1 = 31415; // "Random" 1
  unsigned rand2 = 27183; // "Random" 2

    // Process each char in string
  while (*Key)
  {
      // Multiply hash by random
    hash = hash * rand1;

      // Add in current char, keep within TableSize
    hash = (hash + *Key) % TableSize;

      // Update rand1 for next "random" number
    rand1 = (rand1 * rand2) % (TableSize - 1);

      // Next char
    Key++;
  }

    // Hash value is within 0 - TableSize - 1
  return hash;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Same as above but performing the modulo once at the end
unsigned UHashMod(const char *Key, unsigned TableSize)
{
  unsigned hash = 0;       // Initial value of hash
  unsigned rand1 = 31415; // "Random" 1
  unsigned rand2 = 27183; // "Random" 2

    // Process each char in string
  while (*Key)
  {
      // Multiply hash by random
    hash = hash * rand1;

      // Add in current char, keep within TableSize
    hash = (hash + *Key);

      // Update rand1 for next "random" number
    rand1 = (rand1 * rand2);

      // Next char
    Key++;
  }

    // Hash value is within 0 - TableSize - 1
  return hash % TableSize;
}

// Daniel J. Bernstein
/*
 * DJBX33A (Daniel J. Bernstein, Times 33 with Addition)
 *
 * This is Daniel J. Bernstein's popular `times 33' hash function as
 * posted by him years ago on comp.lang.c. It basically uses a function
 * like ``hash(i) = hash(i-1) * 33 + string[i]''. This is one of the
 * best hashing functions for strings. Because it is both computed very
 * fast and distributes very well.
 *
 * The magic of the number 33, i.e. why it works better than many other
 * constants, prime or not, has never been adequately explained by
 * anyone. So I try an own RSE-explanation: if one experimentally tests
 * all multipliers between 1 and 256 (as I did it) one detects that
 * even numbers are not useable at all. The remaining 128 odd numbers
 * (except for the number 1) work more or less all equally well. They
 * all distribute in an acceptable way and this way fill a hash table
 * with an average percent of approx. 86%.
 *
 * If one compares the Chi/2 values resulting of the various
 * multipliers, the 33 not even has the best value. But the 33 and a
 * few other equally good values like 17, 31, 63, 127 and 129 have
 * nevertheless a great advantage over the remaining values in the large
 * set of possible multipliers: their multiply operation can be replaced
 * by a faster operation based on just one bit-wise shift plus either a
 * single addition or subtraction operation. And because a hash function
 * has to both distribute good and has to be very fast to compute, those
 * few values should be preferred and seems to be also the reason why
 * Daniel J. Bernstein also preferred it.
 *
 * Lawsuit here:
 * http://epic.org/crypto/export_controls/bernstein_decision_9_cir.html
 */


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//  Daniel J. Bernstein #1
unsigned DJB1Hash(const char *Key, unsigned TableSize)
{
  unsigned hash = 0;
  while (*Key)
  {
    hash *= 33;
    hash += *Key;
    Key++;
  }
  return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
//  Daniel J. Bernstein  #2
unsigned DJB2Hash(const char *Key, unsigned TableSize)
{
  unsigned hash = 5381;
  while (*Key)
  {
    hash <<= 5;
    hash += hash;
    hash += *Key;
    Key++;
  }
  return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Donald E. Knuth
unsigned DEKHash(const char *Key, unsigned TableSize)
{
  unsigned hash = (unsigned)strlen(Key);
  while (*Key)
  {
    hash <<= 5;
    hash ^= (hash >> 27);
    hash ^= *Key;
    Key++;
  }
  return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// K&R Hash page 144 in the white book (second edition)
unsigned KRHash(const char *Key, unsigned TableSize)
{
  unsigned seed = 31;
  unsigned hash = 0;

  while (*Key)
  {
    hash *= seed;
    hash += *Key;
    Key++;
  }
  return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Fowler, Noll, Vo
// http://isthe.com/chongo/tech/comp/fnv/
unsigned FNVHash(const char *Key, unsigned TableSize)
{
  unsigned FNV_prime32 = 16777619;
  unsigned FNV_offset32 = 2166136261;

  unsigned hash = FNV_offset32;
  while (*Key)
  {
    hash *= FNV_prime32;
    hash ^= *Key;
    Key++;
  }
  return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// SuperFastHash by Paul Hsieh
// http://www.azillionmonkeys.com/qed/hash.html
#include <stdint.h> /* Replace with <stdint.h> if appropriate */
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
  || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
#define get16bits(d) (*((const uint16_t *) (d)))
#endif

#if !defined (get16bits)
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
                       +(uint32_t)(((const uint8_t *)(d))[0]) )
#endif

uint32_t SFHash (const char * data, unsigned TableSize)
{
  int len = strlen(data);
  uint32_t hash = len;
  uint32_t tmp;
  int rem;

    if (len <= 0 || data == NULL) return 0;

    rem = len & 3;
    len >>= 2;

    /* Main loop */
    for (;len > 0; len--) {
        hash  += get16bits (data);
        tmp    = (get16bits (data+2) << 11) ^ hash;
        hash   = (hash << 16) ^ tmp;
        data  += 2*sizeof (uint16_t);
        hash  += hash >> 11;
    }

    /* Handle end cases */
    switch (rem) {
        case 3: hash += get16bits (data);
                hash ^= hash << 16;
                hash ^= data[sizeof (uint16_t)] << 18;
                hash += hash >> 11;
                break;
        case 2: hash += get16bits (data);
                hash ^= hash << 11;
                hash += hash >> 17;
                break;
        case 1: hash += *data;
                hash ^= hash << 10;
                hash += hash >> 1;
    }

    /* Force "avalanching" of final 127 bits */
    hash ^= hash << 3;
    hash += hash >> 5;
    hash ^= hash << 4;
    hash += hash >> 17;
    hash ^= hash << 25;
    hash += hash >> 6;

    return hash % TableSize;
}


////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// STLPort 4.6.2
unsigned STLPortHash(const char *Key, unsigned TableSize)
{
  unsigned hash = 0;
  while (*Key)
  {
    hash = (hash * 5) + *Key;
    Key++;
  }
  return hash % TableSize;
}

int GetRandom(int Low, int High)
{
  return rand() % (High - Low + 1) + Low;
}

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// Code to test the performance in the GUI.
void TOAHashMain::TestPerformance(HASHFUNC fn, const AnsiString& fname, int keysize, int iterations)
{
  srand(123);
  char *buffer = new char[keysize + 1];

    // Generate a random "key"
  for (int i = 0; i < keysize; i++)
  {
    char c = (char)GetRandom(32, 255);
    buffer[i] = c;
  }
  buffer[keysize] = 0;

  char buf[100];

    // Time the performance of this hash function
  clock_t start = clock();
    for (int i = 0; i < iterations; i++)
    {
      fn(buffer, keysize);  // call hash function

      if (FStopPerformanceTest)
        break;

        // Give the app a chance to respond to other events once in a while
      if (!(i % 1000))
        Application->ProcessMessages();
    }
  clock_t end = clock();
  std::sprintf(buf, "%18s... %5.3f s", fname.c_str(), (end - start) / (double)CLOCKS_PER_SEC);
  mmoResults->Lines->Add(buf);

  delete [] buffer;
}