mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-02 11:20:19 +03:00
Fix case sensitivity
This commit is contained in:
parent
146d286da6
commit
5052ed8cad
|
@ -983,7 +983,7 @@ def test_doc_spans_setdefault(en_tokenizer):
|
|||
def test_get_affixes_good_case(en_tokenizer, case_sensitive):
|
||||
doc = en_tokenizer("spaCy✨ and Prodigy")
|
||||
prefixes = doc.get_affixes(False, case_sensitive, 1, 5, "", 2, 3)
|
||||
suffixes = doc.get_affixes(True, case_sensitive, 2, 6, "xx✨rp", 2, 3)
|
||||
suffixes = doc.get_affixes(True, case_sensitive, 2, 6, "xx✨rP", 2, 3)
|
||||
assert prefixes[0][3, 3, 3] == suffixes[0][3, 3, 3]
|
||||
assert prefixes[0][3, 3, 2] == suffixes[0][3, 3, 4]
|
||||
assert (prefixes[0][0, :, 1:] == 0).all()
|
||||
|
@ -994,10 +994,16 @@ def test_get_affixes_good_case(en_tokenizer, case_sensitive):
|
|||
assert not (suffixes[0][1, :, 2:] == 0).all()
|
||||
assert (suffixes[0][1, :, 3:] == 0).all()
|
||||
assert suffixes[1][0][1].tolist() == [10024, 0]
|
||||
if case_sensitive:
|
||||
assert suffixes[1][0][3].tolist() == [114, 80]
|
||||
else:
|
||||
assert suffixes[1][0][3].tolist() == [114, 112]
|
||||
suffixes = doc.get_affixes(True, case_sensitive, 2, 6, "xx✨rp", 2, 3)
|
||||
if case_sensitive:
|
||||
assert suffixes[1][0][3].tolist() == [114, 0]
|
||||
else:
|
||||
assert suffixes[1][0][3].tolist() == [114, 112]
|
||||
|
||||
|
||||
|
||||
def test_get_affixes_4_byte_normal_char(en_tokenizer):
|
||||
|
|
|
@ -1755,6 +1755,7 @@ cdef class Doc:
|
|||
token_attrs = [t.orth_ for t in self]
|
||||
else:
|
||||
token_attrs = [t.lower_ for t in self]
|
||||
special_chars = special_chars.lower()
|
||||
cdef unsigned int sc_len = len(special_chars)
|
||||
cdef const unsigned char[:] sc_bytes = get_utf16_memoryview(special_chars, True)
|
||||
cdef np.ndarray[np.uint16_t, ndim=1] scs = numpy.ndarray((sc_len,), buffer=sc_bytes, dtype="uint16")
|
||||
|
|
Loading…
Reference in New Issue
Block a user