mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-06 06:03:11 +03:00
* Change span lemmas to use original whitespace (fix #8368) This is a redo of #8371 based off master. The test for this required some changes to existing tests. I don't think the changes were significant but I'd like someone to check them. * Remove mystery docstring This sentence was uncompleted for years, and now we will never know how it ends.
This commit is contained in:
parent
2c105cdbce
commit
94e1346f44
|
@ -20,7 +20,17 @@ def doc(en_tokenizer):
|
|||
"O", "O", "O", "O", "O"]
|
||||
# fmt: on
|
||||
tokens = en_tokenizer(text)
|
||||
return Doc(tokens.vocab, words=[t.text for t in tokens], heads=heads, deps=deps, ents=ents)
|
||||
lemmas = [t.text for t in tokens] # this is not correct, just a placeholder
|
||||
spaces = [bool(t.whitespace_) for t in tokens]
|
||||
return Doc(
|
||||
tokens.vocab,
|
||||
words=[t.text for t in tokens],
|
||||
spaces=spaces,
|
||||
heads=heads,
|
||||
deps=deps,
|
||||
ents=ents,
|
||||
lemmas=lemmas,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
@ -286,7 +296,6 @@ def test_span_attrs_writable(doc):
|
|||
|
||||
|
||||
def test_span_ents_property(doc):
|
||||
"""Test span.ents for the """
|
||||
doc.ents = [
|
||||
(doc.vocab.strings["PRODUCT"], 0, 1),
|
||||
(doc.vocab.strings["PRODUCT"], 7, 8),
|
||||
|
@ -361,6 +370,12 @@ def test_span_boundaries(doc):
|
|||
span[5]
|
||||
|
||||
|
||||
def test_span_lemma(doc):
|
||||
# span lemmas should have the same number of spaces as the span
|
||||
sp = doc[1:5]
|
||||
assert len(sp.text.split(" ")) == len(sp.lemma_.split(" "))
|
||||
|
||||
|
||||
def test_sent(en_tokenizer):
|
||||
doc = en_tokenizer("Check span.sent raises error if doc is not sentencized.")
|
||||
span = doc[1:3]
|
||||
|
|
|
@ -754,7 +754,7 @@ cdef class Span:
|
|||
@property
|
||||
def lemma_(self):
|
||||
"""RETURNS (str): The span's lemma."""
|
||||
return " ".join([t.lemma_ for t in self]).strip()
|
||||
return "".join([t.lemma_ + t.whitespace_ for t in self]).strip()
|
||||
|
||||
property label_:
|
||||
"""RETURNS (str): The span's label."""
|
||||
|
|
Loading…
Reference in New Issue
Block a user