diff --git a/setup.py b/setup.py
index d33ab750e..5c588a70e 100644
--- a/setup.py
+++ b/setup.py
@@ -46,6 +46,7 @@ else:
 
 exts = [
     Extension("spacy.lang", ["spacy/lang.pyx"], language="c++", include_dirs=includes),
+    Extension("spacy._hashing", ["spacy/_hashing.pyx"], language="c++", include_dirs=includes),
     Extension("spacy.word", ["spacy/word.pyx"], language="c++",
               include_dirs=includes),
     Extension("spacy.lexeme", ["spacy/lexeme.pyx"], language="c++",
diff --git a/spacy/_hashing.pxd b/spacy/_hashing.pxd
index 2be9d109d..f4c4f5b43 100644
--- a/spacy/_hashing.pxd
+++ b/spacy/_hashing.pxd
@@ -12,9 +12,9 @@ cdef struct Cell:
 cdef class PointerHash:
     cdef size_t size
     cdef size_t filled
+    cdef Cell* _last
     cdef Cell* cells
 
-    cdef size_t find_slot(self, key_t key)
-    cdef Cell* lookup(self, key_t key)
-    cdef void insert(self, key_t key, val_t value)
-    cdef void resize(self, size_t new_size)
+    cdef val_t lookup(self, key_t key)
+    cdef void insert(self, key_t key, val_t value) except *
+    cdef void resize(self, size_t new_size) except *
diff --git a/spacy/_hashing.pyx b/spacy/_hashing.pyx
index 2645d2bcf..2218fb1c5 100644
--- a/spacy/_hashing.pyx
+++ b/spacy/_hashing.pyx
@@ -6,7 +6,9 @@ cimport cython
 cdef class PointerHash:
     def __cinit__(self, size_t initial_size=8):
         self.size = initial_size
+        self.size = 8
         self.filled = 0
+        self._last = NULL
         # Size must be power of two
         assert self.size & (self.size - 1) == 0
         self.cells = <Cell*>calloc(self.size, sizeof(Cell))
@@ -16,42 +18,37 @@ cdef class PointerHash:
 
     def __getitem__(self, key_t key):
         assert key != 0
-        cdef Cell* cell = self.lookup(key)
-        return cell.value if cell.key != 0 else None
+        cdef val_t value = self.lookup(key)
+        return value if value != 0 else None
 
     def __setitem__(self, key_t key,  val_t value):
         assert key != 0
+        assert value != 0
         self.insert(key, value)
 
-    @cython.cdivision
-    cdef size_t find_slot(self, key_t key):
-        cdef size_t i = (key % self.size)
-        while self.cells[i].key != 0 and self.cells[i].key != key:
-            i = (i + 1) % self.size
-        return i
+    cdef val_t lookup(self, key_t key):
+        cell = _find_cell(self.cells, self.size, key)
+        self._last = cell
+        return cell.value
 
-    @cython.cdivision
-    cdef Cell* lookup(self, key_t key):
-        cdef size_t i = (key % self.size)
-        while self.cells[i].key != 0 and self.cells[i].key != key:
-            i = (i + 1) % self.size
-        return &self.cells[i]
-
-    cdef void insert(self, key_t key, val_t value):
-        cdef size_t i = self.find_slot(key)
-        if self.cells[i].key == 0:
-            self.cells[i].key = key
+    cdef void insert(self, key_t key, val_t value) except *:
+        cdef Cell* cell
+        if self._last != NULL and key == self._last.key:
+            cell = self._last
+        else:
+            cell = _find_cell(self.cells, self.size, key)
+        self._last = NULL
+        if cell.key == 0:
+            cell.key = key
             self.filled += 1
-        self.cells[i].value = value
+        cell.value = value
         if (self.filled + 1) * 4 >= (self.size * 3):
             self.resize(self.size * 2)
 
-    cdef void resize(self, size_t new_size):
+    cdef void resize(self, size_t new_size) except *:
         assert (new_size & (new_size - 1)) == 0 # Must be a power of 2
         assert self.filled * 4 <= new_size * 3
         
-        self.size = new_size
-
         cdef Cell* old_cells = self.cells
         cdef size_t old_size = self.size
 
@@ -60,6 +57,17 @@ cdef class PointerHash:
         
         self.filled = 0
         cdef size_t i
+        cdef size_t slot
         for i in range(old_size):
-            if self.cells[i].key != 0:
-                self.insert(self.cells[i].key, self.cells[i].value)
+            if old_cells[i].key != 0:
+                assert old_cells[i].value != 0, i
+                self.insert(old_cells[i].key, old_cells[i].value)
+        free(old_cells)
+
+
+@cython.cdivision
+cdef inline Cell* _find_cell(Cell* cells, size_t size, key_t key) nogil:
+    cdef size_t i = (key % size)
+    while cells[i].key != 0 and cells[i].key != key:
+        i = (i + 1) % size
+    return &cells[i]
diff --git a/spacy/en.pyx b/spacy/en.pyx
index a3ce4da59..6f801d96e 100644
--- a/spacy/en.pyx
+++ b/spacy/en.pyx
@@ -238,7 +238,7 @@ cdef class English(Language):
     v_shape = View_WordShape
     def __cinit__(self, name, user_string_features, user_flag_features):
         self.cache = PointerHash(2 ** 25)
-        self.specials.set_empty_key(0)
+        self.specials = PointerHash(2 ** 16)
         lang_data = util.read_lang_data(name)
         rules, words, probs, clusters, case_stats, tag_stats = lang_data
         self.lexicon = lang.Lexicon(words, probs, clusters, case_stats, tag_stats,
diff --git a/spacy/lang.pxd b/spacy/lang.pxd
index 619993ebc..1f61d0e95 100644
--- a/spacy/lang.pxd
+++ b/spacy/lang.pxd
@@ -15,49 +15,6 @@ cdef extern from "Python.h":
     cdef bint Py_UNICODE_ISALNUM(Py_UNICODE ch)
 
 
-cdef extern from "sparsehash/dense_hash_map" namespace "google":
-    cdef cppclass dense_hash_map[K, D]:
-        K& key_type
-        D& data_type
-        pair[K, D]& value_type
-        uint64_t size_type
-        cppclass iterator:
-            pair[K, D]& operator*() nogil
-            iterator operator++() nogil
-            iterator operator--() nogil
-            bint operator==(iterator) nogil
-            bint operator!=(iterator) nogil
-        iterator begin()
-        iterator end()
-        uint64_t size()
-        uint64_t max_size()
-        bint empty()
-        uint64_t bucket_count()
-        uint64_t bucket_size(uint64_t i)
-        uint64_t bucket(K& key)
-        double max_load_factor()
-        void max_load_vactor(double new_grow)
-        double min_load_factor()
-        double min_load_factor(double new_grow)
-        void set_resizing_parameters(double shrink, double grow)
-        void resize(uint64_t n)
-        void rehash(uint64_t n)
-        dense_hash_map()
-        dense_hash_map(uint64_t n)
-        void swap(dense_hash_map&)
-        pair[iterator, bint] insert(pair[K, D]) nogil
-        void set_empty_key(K&)
-        void set_deleted_key(K& key)
-        void clear_deleted_key()
-        void erase(iterator pos)
-        uint64_t erase(K& k)
-        void erase(iterator first, iterator last)
-        void clear()
-        void clear_no_resize()
-        pair[iterator, iterator] equal_range(K& k)
-        D& operator[](K&) nogil
-
-
 cdef struct String:
     Py_UNICODE* chars
     size_t n
@@ -70,7 +27,7 @@ cdef class Lexicon:
     cpdef Lexeme lookup(self, unicode string)
     cdef size_t get(self, String* s)
     
-    cdef dense_hash_map[uint64_t, size_t] _dict
+    cdef PointerHash _dict
     
     cdef list _string_features
     cdef list _flag_features
@@ -79,7 +36,7 @@ cdef class Lexicon:
 cdef class Language:
     cdef unicode name
     cdef PointerHash cache
-    cdef dense_hash_map[uint64_t, size_t] specials
+    cdef PointerHash specials
     cpdef readonly Lexicon lexicon
     cpdef readonly object tokens_class
 
diff --git a/spacy/lang.pyx b/spacy/lang.pyx
index a9ed5be3d..172a99de2 100644
--- a/spacy/lang.pyx
+++ b/spacy/lang.pyx
@@ -43,7 +43,7 @@ cdef class Language:
             string_features = []
         self.name = name
         self.cache = PointerHash(2 ** 22)
-        self.specials.set_empty_key(0)
+        self.specials = PointerHash(2 ** 16)
         lang_data = read_lang_data(name)
         rules, words, probs, clusters, case_stats, tag_stats = lang_data
         self.lexicon = Lexicon(words, probs, clusters, case_stats, tag_stats,
@@ -52,10 +52,7 @@ cdef class Language:
         self.tokens_class = Tokens
 
     def __dealloc__(self):
-        cdef uint64_t hashed
-        cdef size_t lex_addr
-        for (hashed, lex_addr) in self.specials:
-            free(<LexemeC*>lex_addr)
+        pass
 
     property nr_types:
         def __get__(self):
@@ -112,28 +109,25 @@ cdef class Language:
         return tokens
 
     cdef int _tokenize(self, Tokens tokens, String* string):
-        cdef Cell* cell = self.cache.lookup(string.key)
-        cdef LexemeC** lexemes 
+        cdef LexemeC** lexemes = <LexemeC**>self.cache.lookup(string.key)
         cdef size_t i
-        if cell.key != 0:
-            lexemes = <LexemeC**>cell.value
+        if lexemes != NULL:
             i = 0
             while lexemes[i] != NULL:
                 tokens.push_back(lexemes[i])
                 i += 1
             return 0
-
-        cell.key = string.key
-        self.cache.filled += 1
+        cdef uint64_t key = string.key 
         cdef size_t first_token = tokens.length
         cdef int split
         cdef int remaining = string.n
         cdef String prefix
+        cdef Cell* tmp_cell
         while remaining >= 1:
             split = self._split_one(string.chars, string.n)
             remaining -= split
             string_slice_prefix(string, &prefix, split)
-            lexemes = <LexemeC**>self.specials[prefix.key]
+            lexemes = <LexemeC**>self.specials.lookup(prefix.key)
             if lexemes != NULL:
                 i = 0
                 while lexemes[i] != NULL:
@@ -145,7 +139,7 @@ cdef class Language:
         cdef size_t j
         for i, j in enumerate(range(first_token, tokens.length)):
             lexemes[i] = tokens.lexemes[j]
-        cell.value = <size_t>lexemes
+        self.cache.insert(key, <size_t>lexemes)
 
     cdef int _split_one(self, Py_UNICODE* characters, size_t length):
         return length
@@ -181,7 +175,7 @@ cdef class Lexicon:
                   string_features, flag_features):
         self._flag_features = flag_features
         self._string_features = string_features
-        self._dict.set_empty_key(0)
+        self._dict = PointerHash(2 ** 20)
         self.size = 0
         cdef Lexeme word
         for string in words:
@@ -200,9 +194,9 @@ cdef class Lexicon:
             self.size += 1
 
     cdef size_t get(self, String* string):
-        cdef LexemeC* lexeme = <LexemeC*>self._dict[string.key]
-        if lexeme != NULL:
-            return <size_t>lexeme
+        cdef size_t lex_addr = self._dict.lookup(string.key)
+        if lex_addr != 0:
+            return lex_addr
         
         cdef unicode uni_string = string.chars[:string.n]
         views = [string_view(uni_string, 0.0, 0, {}, {})
@@ -212,8 +206,8 @@ cdef class Lexicon:
             if flag_feature(uni_string, 0.0, {}, {}):
                 flags.add(i)
  
-        lexeme = lexeme_init(uni_string, 0, 0, views, flags)
-        self._dict[string.key] = <size_t>lexeme
+        cdef LexemeC* lexeme = lexeme_init(uni_string, 0, 0, views, flags)
+        self._dict.insert(string.key, <size_t>lexeme)
         self.size += 1
         return <size_t>lexeme
 
diff --git a/tests/test_hashing.py b/tests/test_hashing.py
new file mode 100644
index 000000000..408f0d017
--- /dev/null
+++ b/tests/test_hashing.py
@@ -0,0 +1,20 @@
+import pytest
+
+from spacy._hashing import PointerHash
+import random
+
+
+def test_insert():
+    h = PointerHash()
+    assert h[1] is None
+    h[1] = 5
+    assert h[1] == 5
+    h[2] = 6
+    assert h[1] == 5
+    assert h[2] == 6
+
+def test_resize():
+    h = PointerHash(4)
+    for i in range(1, 100):
+        value = int(i * (random.random() + 1))
+        h[i] = value