From 356a3410967a7daeecc36fecd2faa39c4cdc9c0c Mon Sep 17 00:00:00 2001 From: richardpaulhudson Date: Fri, 14 Oct 2022 20:29:22 +0200 Subject: [PATCH] Add note --- spacy/util.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spacy/util.py b/spacy/util.py index 16e38917d..7a9a5e7ee 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -1764,6 +1764,9 @@ def get_byte_arrays_for_search_chars( If *case_sensitive==False*, the lower- or uppercase counterparts of any characters that have case are added to the search byte arrays, and both the original character and its other-cased counterpart map to the lower-case version in the finding byte array. + + All encodings are little-endian regardless of architecture, as this is what is expected by the + murmurhash library used downstream. """ def encode(ch: str, width: int) -> bytes: