diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py
index 5cdbabf52..7f6898fe4 100644
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@@ -199,7 +199,7 @@ def _verify_rich_config_group(
     if lengths is not None or rows is not None:
         if is_search_char_group and (search_chars is None or len(search_chars) == 0):
             raise ValueError(Errors.E1047.format(label=label))
-        if len(search_chars) > 63:
+        if search_chars is not None and len(search_chars) > 63:
             raise ValueError(Errors.E1048.format(label=label))
         if lengths is None or rows is None:
             raise ValueError(Errors.E1047.format(label=label))
@@ -262,6 +262,12 @@ def RichMultiHashEmbed(
     plural noun does not become `a` if it is the third or fourth vowel from the
     end of the word.
 
+    There are a few rare situations where a graphical character is expressed as
+    more than one UTF-8 character, e.g. *i* when representing the lower-case form
+    of the Turkish letter *İ*. Such situations are supported, but the lengths of
+    prefixes, suffixes and character search results may need to be increased
+    accordingly.
+
     All lengths must be specified in ascending order.
 
     width (int): The output width. Also used as the width of the embedding tables.
diff --git a/spacy/ml/richfeatureextractor.py b/spacy/ml/richfeatureextractor.py
index 488c33a60..e623da45f 100644
--- a/spacy/ml/richfeatureextractor.py
+++ b/spacy/ml/richfeatureextractor.py
@@ -2,7 +2,7 @@ from typing import List, Optional, Callable, Tuple
 from spacy.util import get_search_char_byte_arrays
 
 # from ..util import get_arrays_for_search_chars
-from thinc.types import Ints1d, Ints2d
+from thinc.types import Ints2d
 from thinc.api import Model, registry, get_current_ops
 
 from ..tokens import Doc
@@ -21,46 +21,35 @@ def RichFeatureExtractor(
 ) -> Model[List[Doc], List[Ints2d]]:
     ops = get_current_ops()
     if pref_search_chars is not None:
-        (
-            ps_1byte_ch,
-            ps_2byte_ch,
-            ps_3byte_ch,
-            ps_4byte_ch,
-        ) = get_search_char_byte_arrays(pref_search_chars, case_sensitive)
+        ps_search_chars, ps_width_offsets = get_search_char_byte_arrays(pref_search_chars, case_sensitive)
     else:
-        ps_1byte_ch = ps_2byte_ch = ps_3byte_ch = ps_4byte_ch = bytes()
+        ps_search_chars = bytes()
+        ps_width_offsets = bytes()
     if suff_search_chars is not None:
-        (
-            ss_1byte_ch,
-            ss_2byte_ch,
-            ss_3byte_ch,
-            ss_4byte_ch,
-        ) = get_search_char_byte_arrays(suff_search_chars, case_sensitive)
+        
+        ss_search_chars, ss_width_offsets = get_search_char_byte_arrays(suff_search_chars, case_sensitive)
     else:
-        ss_1byte_ch = ss_2byte_ch = ss_3byte_ch = ss_4byte_ch = bytes()
+        ss_search_chars = bytes()
+        ss_width_offsets = bytes()
     return Model(
         "extract_character_combination_hashes",
         forward,
         attrs={
             "case_sensitive": case_sensitive,
-            "pref_lengths": bytes(pref_lengths)
+            "p_lengths": bytes(pref_lengths)
             if pref_lengths is not None
             else bytes(),
-            "suff_lengths": bytes(suff_lengths)
+            "s_lengths": bytes(suff_lengths)
             if suff_lengths is not None
             else bytes(),
-            "pref_search_1_byte": ps_1byte_ch,
-            "pref_search_2_bytes": ps_2byte_ch,
-            "pref_search_3_bytes": ps_3byte_ch,
-            "pref_search_4_bytes": ps_4byte_ch,
-            "pref_search_lengths": bytes(pref_search_lengths)
+            "ps_search_chars": ps_search_chars,
+            "ps_width_offsets": ps_width_offsets,
+            "ps_lengths": bytes(pref_search_lengths)
             if pref_search_lengths is not None
             else bytes(),
-            "suff_search_1_byte": ss_1byte_ch,
-            "suff_search_2_bytes": ss_2byte_ch,
-            "suff_search_3_bytes": ss_3byte_ch,
-            "suff_search_4_bytes": ss_4byte_ch,
-            "suff_search_lengths": bytes(suff_search_lengths)
+            "ss_search_chars": ss_search_chars,
+            "ss_width_offsets": ss_width_offsets,
+            "ss_lengths": bytes(suff_search_lengths)
             if suff_search_lengths is not None
             else bytes(),
         },
@@ -72,36 +61,28 @@ def forward(
 ) -> Tuple[List[Ints2d], Callable]:
     ops = model.ops
     case_sensitive: bool = model.attrs["case_sensitive"]
-    pref_lengths: bytes = model.attrs["pref_lengths"]
-    suff_lengths: bytes = model.attrs["suff_lengths"]
-    ps_1byte_ch: bytes = model.attrs["pref_search_1_byte"]
-    ps_2byte_ch: bytes = model.attrs["pref_search_2_bytes"]
-    ps_3byte_ch: bytes = model.attrs["pref_search_3_bytes"]
-    ps_4byte_ch: bytes = model.attrs["pref_search_4_bytes"]
-    pref_search_lengths: bytes = model.attrs["pref_search_lengths"]
-    ss_1byte_ch: bytes = model.attrs["pref_search_1_byte"]
-    ss_2byte_ch: bytes = model.attrs["pref_search_2_bytes"]
-    ss_3byte_ch: bytes = model.attrs["pref_search_3_bytes"]
-    ss_4byte_ch: bytes = model.attrs["pref_search_4_bytes"]
-    suff_search_lengths: bytes = model.attrs["suff_search_lengths"]
+    p_lengths: bytes = model.attrs["p_lengths"]
+    s_lengths: bytes = model.attrs["s_lengths"]
+    ps_search_chars: bytes = model.attrs["ps_search_chars"]
+    ps_width_offsets: bytes = model.attrs["ps_width_offsets"]
+    ps_lengths: bytes = model.attrs["ps_lengths"]
+    ss_search_chars: bytes = model.attrs["ss_search_chars"]
+    ss_width_offsets: bytes = model.attrs["ss_width_offsets"]
+    ss_lengths: bytes = model.attrs["ss_lengths"]
     features: List[Ints2d] = []
     for doc in docs:
         hashes = doc.get_character_combination_hashes(
             cs=case_sensitive,
-            p_lengths=pref_lengths,
-            s_lengths=suff_lengths,
-            ps_1byte_ch=ps_1byte_ch,
-            ps_2byte_ch=ps_2byte_ch,
-            ps_3byte_ch=ps_3byte_ch,
-            ps_4byte_ch=ps_4byte_ch,
-            ps_lengths=pref_search_lengths,
-            ss_1byte_ch=ss_1byte_ch,
-            ss_2byte_ch=ss_2byte_ch,
-            ss_3byte_ch=ss_3byte_ch,
-            ss_4byte_ch=ss_4byte_ch,
-            ss_lengths=suff_search_lengths,
+            p_lengths=p_lengths,
+            s_lengths=s_lengths,
+            ps_search_chars=ps_search_chars,
+            ps_width_offsets=ps_width_offsets,
+            ps_lengths=ps_lengths,
+            ss_search_chars=ss_search_chars,
+            ss_width_offsets=ss_width_offsets,
+            ss_lengths=ss_lengths,
         )
-        features.append(ops.asarray2i(hashes))
+        features.append(ops.asarray2i(hashes, dtype="uint64"))
 
     backprop: Callable[[List[Ints2d]], List] = lambda d_features: []
     return features, backprop
diff --git a/spacy/tokens/doc.pxd b/spacy/tokens/doc.pxd
index fbb537408..5f215c009 100644
--- a/spacy/tokens/doc.pxd
+++ b/spacy/tokens/doc.pxd
@@ -73,7 +73,7 @@ cdef int _write_hashes(
     const unsigned char* aff_l_buf,
     const unsigned char* offset_buf,
     const int res_buf_last,
-    np.uint32_t* hashes_ptr,
+    np.uint64_t* hashes_ptr,
 ) nogil
 
 
diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 452dfb652..711436a0f 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -1803,15 +1803,15 @@ cdef class Doc:
         cdef unsigned char* ss_res_buf = <unsigned char*> mem.alloc(ss_max_l, 4)
         cdef unsigned char* ss_l_buf = <unsigned char*> mem.alloc(ss_max_l, 1)
         cdef int doc_l = self.length, total_hashes = doc_l * hashes_per_tok 
-        cdef np.uint32_t* hashes_ptr = <np.uint32_t*> mem.alloc(
-            total_hashes, sizeof(np.uint32_t))
+        cdef np.uint64_t* hashes_ptr = <np.uint64_t*> mem.alloc(
+            total_hashes, sizeof(np.uint64_t))
          
         # Define working variables
         cdef TokenC tok_c
         cdef int tok_i, tok_str_l
         cdef attr_t num_tok_attr
         cdef const unsigned char* tok_str
-        cdef np.uint32_t* w_hashes_ptr = hashes_ptr
+        cdef np.uint64_t* w_hashes_ptr = hashes_ptr
         
         for tok_i in range(doc_l):
             tok_c = self.c[tok_i]
@@ -1837,9 +1837,9 @@ cdef class Doc:
                     ss_max_l, True, ss_res_buf, ss_l_buf)
                 w_hashes_ptr += _write_hashes(ss_res_buf, ss_lengths, ss_l_buf, 0, w_hashes_ptr)
         
-        cdef np.ndarray[np.uint32_t, ndim=2] hashes = numpy.empty(
-            (doc_l, hashes_per_tok), dtype="uint32")
-        memcpy(hashes.data, hashes_ptr, total_hashes * sizeof(np.uint32_t))
+        cdef np.ndarray[np.uint64_t, ndim=2] hashes = numpy.empty(
+            (doc_l, hashes_per_tok), dtype="uint64")
+        memcpy(hashes.data, hashes_ptr, total_hashes * sizeof(np.uint64_t))
         return hashes
 
 
@@ -2173,7 +2173,7 @@ cdef int _write_hashes(
     const unsigned char* aff_l_buf,
     const unsigned char* offset_buf,
     const int res_buf_last,
-    np.uint32_t* hashes_ptr,
+    np.uint64_t* hashes_ptr,
 ) nogil:    
     """ Write FNV1A hashes for a token/rich property group combination.