scala.util.hashing.MurmurHash3

object MurmurHash3 extends MurmurHash3

An implementation of Austin Appleby’s MurmurHash 3 algorithm (MurmurHash3_x86_32). This object contains methods that hash values of various types as well as means to construct Hashing objects.

This algorithm is designed to generate well-distributed non-cryptographic hashes. It is designed to hash data in 32 bit chunks (ints).

The mix method needs to be called at each step to update the intermediate hash value. For the last chunk to incorporate into the hash mixLast may be used instead, which is slightly faster. Finally finalizeHash needs to be called to compute the final hash value.

This is based on the earlier MurmurHash3 code by Rex Kerr, but the MurmurHash3 algorithm was since changed by its creator Austin Appleby to remedy some weaknesses and improve performance. This represents the latest and supposedly final version of the algorithm (revision 136).

Type Members

class ArrayHashing[T] extends Hashing[Array[T]]

Value Members From scala.util.hashing.MurmurHash3

def arrayHash[T](a: Array[T]): Int

(defined at scala.util.hashing.MurmurHash3)

final def arrayHash[T](a: Array[T], seed: Int): Int

Compute the hash of an array.

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def arrayHashing[T]: ArrayHashing[T]

(defined at scala.util.hashing.MurmurHash3)

def bytesHash(data: Array[Byte]): Int

(defined at scala.util.hashing.MurmurHash3)

final def bytesHash(data: Array[Byte], seed: Int): Int

Compute the hash of a byte array. Faster than arrayHash, because it hashes 4 bytes at once.

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def bytesHashing: Hashing[Array[Byte]]

(defined at scala.util.hashing.MurmurHash3)

final def finalizeHash(hash: Int, length: Int): Int

Finalize a hash to incorporate the length and make sure all bits avalanche.

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

final def listHash(xs: collection.immutable.List[_], seed: Int): Int

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def mapHash(xs: Map[_, _]): Int

(defined at scala.util.hashing.MurmurHash3)

final def mix(hash: Int, data: Int): Int

Mix in a block of data into an intermediate hash value.

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

final def mixLast(hash: Int, data: Int): Int

May optionally be used as the last mixing step. Is a little bit faster than mix, as it does no further mixing of the resulting hash. For the last element this is not necessary as the hash is thoroughly mixed during finalization anyway.

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def orderedHash(xs: TraversableOnce[Any]): Int

(defined at scala.util.hashing.MurmurHash3)

final def orderedHash(xs: TraversableOnce[Any], seed: Int): Int

Compute a hash that depends on the order of its arguments.

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def orderedHashing: Hashing[TraversableOnce[Any]]

(defined at scala.util.hashing.MurmurHash3)

def productHash(x: Product): Int

(defined at scala.util.hashing.MurmurHash3)

final def productHash(x: Product, seed: Int): Int

Compute the hash of a product

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def productHashing: Hashing[Product]

(defined at scala.util.hashing.MurmurHash3)

def seqHash(xs: collection.Seq[_]): Int

To offer some potential for optimization.

(defined at scala.util.hashing.MurmurHash3)

def setHash(xs: Set[_]): Int

(defined at scala.util.hashing.MurmurHash3)

def stringHash(x: String): Int

(defined at scala.util.hashing.MurmurHash3)

final def stringHash(str: String, seed: Int): Int

Compute the hash of a string

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def stringHashing: Hashing[String]

(defined at scala.util.hashing.MurmurHash3)

def unorderedHash(xs: TraversableOnce[Any]): Int

(defined at scala.util.hashing.MurmurHash3)

final def unorderedHash(xs: TraversableOnce[Any], seed: Int): Int

Compute a hash that is symmetric in its arguments - that is a hash where the order of appearance of elements does not matter. This is useful for hashing sets, for example.

  • Definition Classes
    • MurmurHash3

(defined at scala.util.hashing.MurmurHash3)

def unorderedHashing: Hashing[TraversableOnce[Any]]

(defined at scala.util.hashing.MurmurHash3)

Full Source:

/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2003-2013, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

package scala
package util.hashing

import java.lang.Integer.{ rotateLeft => rotl }

private[hashing] class MurmurHash3 {
  /** Mix in a block of data into an intermediate hash value. */
  final def mix(hash: Int, data: Int): Int = {
    var h = mixLast(hash, data)
    h = rotl(h, 13)
    h * 5 + 0xe6546b64
  }

  /** May optionally be used as the last mixing step. Is a little bit faster than mix,
   *  as it does no further mixing of the resulting hash. For the last element this is not
   *  necessary as the hash is thoroughly mixed during finalization anyway. */
  final def mixLast(hash: Int, data: Int): Int = {
    var k = data

    k *= 0xcc9e2d51
    k = rotl(k, 15)
    k *= 0x1b873593

    hash ^ k
  }

  /** Finalize a hash to incorporate the length and make sure all bits avalanche. */
  final def finalizeHash(hash: Int, length: Int): Int = avalanche(hash ^ length)

  /** Force all bits of the hash to avalanche. Used for finalizing the hash. */
  private final def avalanche(hash: Int): Int = {
    var h = hash

    h ^= h >>> 16
    h *= 0x85ebca6b
    h ^= h >>> 13
    h *= 0xc2b2ae35
    h ^= h >>> 16

    h
  }

  /** Compute the hash of a product */
  final def productHash(x: Product, seed: Int): Int = {
    val arr = x.productArity
    // Case objects have the hashCode inlined directly into the
    // synthetic hashCode method, but this method should still give
    // a correct result if passed a case object.
    if (arr == 0) {
      x.productPrefix.hashCode
    }
    else {
      var h = seed
      var i = 0
      while (i < arr) {
        h = mix(h, x.productElement(i).##)
        i += 1
      }
      finalizeHash(h, arr)
    }
  }

  /** Compute the hash of a string */
  final def stringHash(str: String, seed: Int): Int = {
    var h = seed
    var i = 0
    while (i + 1 < str.length) {
      val data = (str.charAt(i) << 16) + str.charAt(i + 1)
      h = mix(h, data)
      i += 2
    }
    if (i < str.length) h = mixLast(h, str.charAt(i).toInt)
    finalizeHash(h, str.length)
  }

  /** Compute a hash that is symmetric in its arguments - that is a hash
   *  where the order of appearance of elements does not matter.
   *  This is useful for hashing sets, for example.
   */
  final def unorderedHash(xs: TraversableOnce[Any], seed: Int): Int = {
    var a, b, n = 0
    var c = 1
    xs foreach { x =>
      val h = x.##
      a += h
      b ^= h
      if (h != 0) c *= h
      n += 1
    }
    var h = seed
    h = mix(h, a)
    h = mix(h, b)
    h = mixLast(h, c)
    finalizeHash(h, n)
  }
  /** Compute a hash that depends on the order of its arguments.
   */
  final def orderedHash(xs: TraversableOnce[Any], seed: Int): Int = {
    var n = 0
    var h = seed
    xs foreach { x =>
      h = mix(h, x.##)
      n += 1
    }
    finalizeHash(h, n)
  }

  /** Compute the hash of an array.
   */
  final def arrayHash[@specialized T](a: Array[T], seed: Int): Int = {
    var h = seed
    var i = 0
    while (i < a.length) {
      h = mix(h, a(i).##)
      i += 1
    }
    finalizeHash(h, a.length)
  }

  /** Compute the hash of a byte array. Faster than arrayHash, because
   *  it hashes 4 bytes at once.
   */
  final def bytesHash(data: Array[Byte], seed: Int): Int = {
    var len = data.length
    var h = seed

    // Body
    var i = 0
    while(len >= 4) {
      var k = data(i + 0) & 0xFF
      k |= (data(i + 1) & 0xFF) << 8
      k |= (data(i + 2) & 0xFF) << 16
      k |= (data(i + 3) & 0xFF) << 24

      h = mix(h, k)

      i += 4
      len -= 4
    }

    // Tail
    var k = 0
    if(len == 3) k ^= (data(i + 2) & 0xFF) << 16
    if(len >= 2) k ^= (data(i + 1) & 0xFF) << 8
    if(len >= 1) {
      k ^= (data(i + 0) & 0xFF)
      h = mixLast(h, k)
    }

    // Finalization
    finalizeHash(h, data.length)
  }

  final def listHash(xs: scala.collection.immutable.List[_], seed: Int): Int = {
    var n = 0
    var h = seed
    var elems = xs
    while (!elems.isEmpty) {
      val head = elems.head
      val tail = elems.tail
      h = mix(h, head.##)
      n += 1
      elems = tail
    }
    finalizeHash(h, n)
  }
}

/**
 * An implementation of Austin Appleby's MurmurHash 3 algorithm
 * (MurmurHash3_x86_32). This object contains methods that hash
 * values of various types as well as means to construct `Hashing`
 * objects.
 *
 * This algorithm is designed to generate well-distributed non-cryptographic
 * hashes. It is designed to hash data in 32 bit chunks (ints).
 *
 * The mix method needs to be called at each step to update the intermediate
 * hash value. For the last chunk to incorporate into the hash mixLast may
 * be used instead, which is slightly faster. Finally finalizeHash needs to
 * be called to compute the final hash value.
 *
 * This is based on the earlier MurmurHash3 code by Rex Kerr, but the
 * MurmurHash3 algorithm was since changed by its creator Austin Appleby
 * to remedy some weaknesses and improve performance. This represents the
 * latest and supposedly final version of the algorithm (revision 136).
 *
 * @see [[http://code.google.com/p/smhasher]]
 */
object MurmurHash3 extends MurmurHash3 {
  final val arraySeed       = 0x3c074a61
  final val stringSeed      = 0xf7ca7fd2
  final val productSeed     = 0xcafebabe
  final val symmetricSeed   = 0xb592f7ae
  final val traversableSeed = 0xe73a8b15
  final val seqSeed         = "Seq".hashCode
  final val mapSeed         = "Map".hashCode
  final val setSeed         = "Set".hashCode

  def arrayHash[@specialized T](a: Array[T]): Int  = arrayHash(a, arraySeed)
  def bytesHash(data: Array[Byte]): Int            = bytesHash(data, arraySeed)
  def orderedHash(xs: TraversableOnce[Any]): Int   = orderedHash(xs, symmetricSeed)
  def productHash(x: Product): Int                 = productHash(x, productSeed)
  def stringHash(x: String): Int                   = stringHash(x, stringSeed)
  def unorderedHash(xs: TraversableOnce[Any]): Int = unorderedHash(xs, traversableSeed)

  /** To offer some potential for optimization.
   */
  def seqHash(xs: scala.collection.Seq[_]): Int    = xs match {
    case xs: List[_] => listHash(xs, seqSeed)
    case xs => orderedHash(xs, seqSeed)
  }

  def mapHash(xs: scala.collection.Map[_, _]): Int = unorderedHash(xs, mapSeed)
  def setHash(xs: scala.collection.Set[_]): Int    = unorderedHash(xs, setSeed)

  class ArrayHashing[@specialized T] extends Hashing[Array[T]] {
    def hash(a: Array[T]) = arrayHash(a)
  }

  def arrayHashing[@specialized T] = new ArrayHashing[T]

  def bytesHashing = new Hashing[Array[Byte]] {
    def hash(data: Array[Byte]) = bytesHash(data)
  }

  def orderedHashing = new Hashing[TraversableOnce[Any]] {
    def hash(xs: TraversableOnce[Any]) = orderedHash(xs)
  }

  def productHashing = new Hashing[Product] {
    def hash(x: Product) = productHash(x)
  }

  def stringHashing = new Hashing[String] {
    def hash(x: String) = stringHash(x)
  }

  def unorderedHashing = new Hashing[TraversableOnce[Any]] {
    def hash(xs: TraversableOnce[Any]) = unorderedHash(xs)
  }

  /** All this trouble and foreach still appears faster.
   *  Leaving in place in case someone would like to investigate further.
   */
  /**
  def linearSeqHash(xs: scala.collection.LinearSeq[_], seed: Int): Int = {
    var n = 0
    var h = seed
    var elems = xs
    while (elems.nonEmpty) {
      h = mix(h, elems.head.##)
      n += 1
      elems = elems.tail
    }
    finalizeHash(h, n)
  }

  def indexedSeqHash(xs: scala.collection.IndexedSeq[_], seed: Int): Int = {
    var n = 0
    var h = seed
    val len = xs.length
    while (n < len) {
      h = mix(h, xs(n).##)
      n += 1
    }
    finalizeHash(h, n)
  }
  */
}