mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Refactor Token morph setting (#6175)
* Refactor Token morph setting * Remove `Token.morph_` * Add `Token.set_morph()` * `0` resets `token.c.morph` to unset * Any other values are passed to `Morphology.add` * Add token.morph setter to set from MorphAnalysis
This commit is contained in:
		
							parent
							
								
									da30701cd1
								
							
						
					
					
						commit
						86c3ec9c2b
					
				| 
						 | 
					@ -710,6 +710,9 @@ class Errors:
 | 
				
			||||||
             "options: {modes}")
 | 
					             "options: {modes}")
 | 
				
			||||||
    E1012 = ("Entity spans and blocked/missing/outside spans should be "
 | 
					    E1012 = ("Entity spans and blocked/missing/outside spans should be "
 | 
				
			||||||
             "provided to doc.set_ents as lists of `Span` objects.")
 | 
					             "provided to doc.set_ents as lists of `Span` objects.")
 | 
				
			||||||
 | 
					    E1013 = ("Invalid morph: the MorphAnalysis must have the same vocab as the "
 | 
				
			||||||
 | 
					             "token itself. To set the morph from this MorphAnalysis, set from "
 | 
				
			||||||
 | 
					             "the string value with: `token.set_morph(str(other_morph))`.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@add_codes
 | 
					@add_codes
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -149,7 +149,7 @@ class Morphologizer(Tagger):
 | 
				
			||||||
        for example in get_examples():
 | 
					        for example in get_examples():
 | 
				
			||||||
            for i, token in enumerate(example.reference):
 | 
					            for i, token in enumerate(example.reference):
 | 
				
			||||||
                pos = token.pos_
 | 
					                pos = token.pos_
 | 
				
			||||||
                morph = token.morph_
 | 
					                morph = str(token.morph)
 | 
				
			||||||
                # create and add the combined morph+POS label
 | 
					                # create and add the combined morph+POS label
 | 
				
			||||||
                morph_dict = Morphology.feats_to_dict(morph)
 | 
					                morph_dict = Morphology.feats_to_dict(morph)
 | 
				
			||||||
                if pos:
 | 
					                if pos:
 | 
				
			||||||
| 
						 | 
					@ -167,7 +167,7 @@ class Morphologizer(Tagger):
 | 
				
			||||||
            gold_array = []
 | 
					            gold_array = []
 | 
				
			||||||
            for i, token in enumerate(example.reference):
 | 
					            for i, token in enumerate(example.reference):
 | 
				
			||||||
                pos = token.pos_
 | 
					                pos = token.pos_
 | 
				
			||||||
                morph = token.morph_
 | 
					                morph = str(token.morph)
 | 
				
			||||||
                morph_dict = Morphology.feats_to_dict(morph)
 | 
					                morph_dict = Morphology.feats_to_dict(morph)
 | 
				
			||||||
                if pos:
 | 
					                if pos:
 | 
				
			||||||
                    morph_dict[self.POS_FEAT] = pos
 | 
					                    morph_dict[self.POS_FEAT] = pos
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -46,9 +46,9 @@ def test_doc_array_morph(en_vocab):
 | 
				
			||||||
    words = ["Eat", "blue", "ham"]
 | 
					    words = ["Eat", "blue", "ham"]
 | 
				
			||||||
    morph = ["Feat=V", "Feat=J", "Feat=N"]
 | 
					    morph = ["Feat=V", "Feat=J", "Feat=N"]
 | 
				
			||||||
    doc = Doc(en_vocab, words=words, morphs=morph)
 | 
					    doc = Doc(en_vocab, words=words, morphs=morph)
 | 
				
			||||||
    assert morph[0] == doc[0].morph_
 | 
					    assert morph[0] == str(doc[0].morph)
 | 
				
			||||||
    assert morph[1] == doc[1].morph_
 | 
					    assert morph[1] == str(doc[1].morph)
 | 
				
			||||||
    assert morph[2] == doc[2].morph_
 | 
					    assert morph[2] == str(doc[2].morph)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    feats_array = doc.to_array((ORTH, MORPH))
 | 
					    feats_array = doc.to_array((ORTH, MORPH))
 | 
				
			||||||
    assert feats_array[0][1] == doc[0].morph.key
 | 
					    assert feats_array[0][1] == doc[0].morph.key
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -319,15 +319,13 @@ def test_doc_from_array_morph(en_vocab):
 | 
				
			||||||
    words = ["I", "live", "in", "New", "York", "."]
 | 
					    words = ["I", "live", "in", "New", "York", "."]
 | 
				
			||||||
    morphs = ["Feat1=A", "Feat1=B", "Feat1=C", "Feat1=A|Feat2=D", "Feat2=E", "Feat3=F"]
 | 
					    morphs = ["Feat1=A", "Feat1=B", "Feat1=C", "Feat1=A|Feat2=D", "Feat2=E", "Feat3=F"]
 | 
				
			||||||
    # fmt: on
 | 
					    # fmt: on
 | 
				
			||||||
    doc = Doc(en_vocab, words=words)
 | 
					    doc = Doc(en_vocab, words=words, morphs=morphs)
 | 
				
			||||||
    for i, morph in enumerate(morphs):
 | 
					 | 
				
			||||||
        doc[i].morph_ = morph
 | 
					 | 
				
			||||||
    attrs = [MORPH]
 | 
					    attrs = [MORPH]
 | 
				
			||||||
    arr = doc.to_array(attrs)
 | 
					    arr = doc.to_array(attrs)
 | 
				
			||||||
    new_doc = Doc(en_vocab, words=words)
 | 
					    new_doc = Doc(en_vocab, words=words)
 | 
				
			||||||
    new_doc.from_array(attrs, arr)
 | 
					    new_doc.from_array(attrs, arr)
 | 
				
			||||||
    assert [t.morph_ for t in new_doc] == morphs
 | 
					    assert [str(t.morph) for t in new_doc] == morphs
 | 
				
			||||||
    assert [t.morph_ for t in doc] == [t.morph_ for t in new_doc]
 | 
					    assert [str(t.morph) for t in doc] == [str(t.morph) for t in new_doc]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
 | 
					def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
 | 
				
			||||||
| 
						 | 
					@ -423,7 +421,7 @@ def test_has_annotation(en_vocab):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    doc[0].tag_ = "A"
 | 
					    doc[0].tag_ = "A"
 | 
				
			||||||
    doc[0].pos_ = "X"
 | 
					    doc[0].pos_ = "X"
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val"
 | 
					    doc[0].set_morph("Feat=Val")
 | 
				
			||||||
    doc[0].lemma_ = "a"
 | 
					    doc[0].lemma_ = "a"
 | 
				
			||||||
    doc[0].dep_ = "dep"
 | 
					    doc[0].dep_ = "dep"
 | 
				
			||||||
    doc[0].head = doc[1]
 | 
					    doc[0].head = doc[1]
 | 
				
			||||||
| 
						 | 
					@ -435,7 +433,7 @@ def test_has_annotation(en_vocab):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    doc[1].tag_ = "A"
 | 
					    doc[1].tag_ = "A"
 | 
				
			||||||
    doc[1].pos_ = "X"
 | 
					    doc[1].pos_ = "X"
 | 
				
			||||||
    doc[1].morph_ = ""
 | 
					    doc[1].set_morph("")
 | 
				
			||||||
    doc[1].lemma_ = "a"
 | 
					    doc[1].lemma_ = "a"
 | 
				
			||||||
    doc[1].dep_ = "dep"
 | 
					    doc[1].dep_ = "dep"
 | 
				
			||||||
    doc.ents = [Span(doc, 0, 2, label="HELLO")]
 | 
					    doc.ents = [Span(doc, 0, 2, label="HELLO")]
 | 
				
			||||||
| 
						 | 
					@ -538,6 +536,32 @@ def test_doc_ents_setter():
 | 
				
			||||||
    assert [e.label_ for e in doc.ents] == ["HELLO", "WORLD"]
 | 
					    assert [e.label_ for e in doc.ents] == ["HELLO", "WORLD"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def test_doc_morph_setter(en_tokenizer, de_tokenizer):
 | 
				
			||||||
 | 
					    doc1 = en_tokenizer("a b")
 | 
				
			||||||
 | 
					    doc1b = en_tokenizer("c d")
 | 
				
			||||||
 | 
					    doc2 = de_tokenizer("a b")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # unset values can be copied
 | 
				
			||||||
 | 
					    doc1[0].morph = doc1[1].morph
 | 
				
			||||||
 | 
					    assert doc1[0].morph.key == 0
 | 
				
			||||||
 | 
					    assert doc1[1].morph.key == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # morph values from the same vocab can be copied
 | 
				
			||||||
 | 
					    doc1[0].set_morph("Feat=Val")
 | 
				
			||||||
 | 
					    doc1[1].morph = doc1[0].morph
 | 
				
			||||||
 | 
					    assert doc1[0].morph == doc1[1].morph
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # ... also across docs
 | 
				
			||||||
 | 
					    doc1b[0].morph = doc1[0].morph
 | 
				
			||||||
 | 
					    assert doc1[0].morph == doc1b[0].morph
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    doc2[0].set_morph("Feat2=Val2")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # the morph value must come from the same vocab
 | 
				
			||||||
 | 
					    with pytest.raises(ValueError):
 | 
				
			||||||
 | 
					        doc1[0].morph = doc2[0].morph
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_doc_init_iob():
 | 
					def test_doc_init_iob():
 | 
				
			||||||
    """Test ents validation/normalization in Doc.__init__"""
 | 
					    """Test ents validation/normalization in Doc.__init__"""
 | 
				
			||||||
    words = ["a", "b", "c", "d", "e"]
 | 
					    words = ["a", "b", "c", "d", "e"]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,13 +4,13 @@ import pytest
 | 
				
			||||||
@pytest.fixture
 | 
					@pytest.fixture
 | 
				
			||||||
def i_has(en_tokenizer):
 | 
					def i_has(en_tokenizer):
 | 
				
			||||||
    doc = en_tokenizer("I has")
 | 
					    doc = en_tokenizer("I has")
 | 
				
			||||||
    doc[0].morph_ = {"PronType": "prs"}
 | 
					    doc[0].set_morph({"PronType": "prs"})
 | 
				
			||||||
    doc[1].morph_ = {
 | 
					    doc[1].set_morph({
 | 
				
			||||||
        "VerbForm": "fin",
 | 
					        "VerbForm": "fin",
 | 
				
			||||||
        "Tense": "pres",
 | 
					        "Tense": "pres",
 | 
				
			||||||
        "Number": "sing",
 | 
					        "Number": "sing",
 | 
				
			||||||
        "Person": "three",
 | 
					        "Person": "three",
 | 
				
			||||||
    }
 | 
					    })
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return doc
 | 
					    return doc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -47,20 +47,20 @@ def test_morph_get(i_has):
 | 
				
			||||||
def test_morph_set(i_has):
 | 
					def test_morph_set(i_has):
 | 
				
			||||||
    assert i_has[0].morph.get("PronType") == ["prs"]
 | 
					    assert i_has[0].morph.get("PronType") == ["prs"]
 | 
				
			||||||
    # set by string
 | 
					    # set by string
 | 
				
			||||||
    i_has[0].morph_ = "PronType=unk"
 | 
					    i_has[0].set_morph("PronType=unk")
 | 
				
			||||||
    assert i_has[0].morph.get("PronType") == ["unk"]
 | 
					    assert i_has[0].morph.get("PronType") == ["unk"]
 | 
				
			||||||
    # set by string, fields are alphabetized
 | 
					    # set by string, fields are alphabetized
 | 
				
			||||||
    i_has[0].morph_ = "PronType=123|NounType=unk"
 | 
					    i_has[0].set_morph("PronType=123|NounType=unk")
 | 
				
			||||||
    assert i_has[0].morph_ == "NounType=unk|PronType=123"
 | 
					    assert str(i_has[0].morph) == "NounType=unk|PronType=123"
 | 
				
			||||||
    # set by dict
 | 
					    # set by dict
 | 
				
			||||||
    i_has[0].morph_ = {"AType": "123", "BType": "unk"}
 | 
					    i_has[0].set_morph({"AType": "123", "BType": "unk"})
 | 
				
			||||||
    assert i_has[0].morph_ == "AType=123|BType=unk"
 | 
					    assert str(i_has[0].morph) == "AType=123|BType=unk"
 | 
				
			||||||
    # set by string with multiple values, fields and values are alphabetized
 | 
					    # set by string with multiple values, fields and values are alphabetized
 | 
				
			||||||
    i_has[0].morph_ = "BType=c|AType=b,a"
 | 
					    i_has[0].set_morph("BType=c|AType=b,a")
 | 
				
			||||||
    assert i_has[0].morph_ == "AType=a,b|BType=c"
 | 
					    assert str(i_has[0].morph) == "AType=a,b|BType=c"
 | 
				
			||||||
    # set by dict with multiple values, fields and values are alphabetized
 | 
					    # set by dict with multiple values, fields and values are alphabetized
 | 
				
			||||||
    i_has[0].morph_ = {"AType": "b,a", "BType": "c"}
 | 
					    i_has[0].set_morph({"AType": "b,a", "BType": "c"})
 | 
				
			||||||
    assert i_has[0].morph_ == "AType=a,b|BType=c"
 | 
					    assert str(i_has[0].morph) == "AType=a,b|BType=c"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_morph_str(i_has):
 | 
					def test_morph_str(i_has):
 | 
				
			||||||
| 
						 | 
					@ -72,25 +72,25 @@ def test_morph_property(tokenizer):
 | 
				
			||||||
    doc = tokenizer("a dog")
 | 
					    doc = tokenizer("a dog")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # set through token.morph_
 | 
					    # set through token.morph_
 | 
				
			||||||
    doc[0].morph_ = "PronType=prs"
 | 
					    doc[0].set_morph("PronType=prs")
 | 
				
			||||||
    assert doc[0].morph_ == "PronType=prs"
 | 
					    assert str(doc[0].morph) == "PronType=prs"
 | 
				
			||||||
    assert doc.to_array(["MORPH"])[0] != 0
 | 
					    assert doc.to_array(["MORPH"])[0] != 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # unset with token.morph
 | 
					    # unset with token.morph
 | 
				
			||||||
    doc[0].morph = 0
 | 
					    doc[0].set_morph(0)
 | 
				
			||||||
    assert doc.to_array(["MORPH"])[0] == 0
 | 
					    assert doc.to_array(["MORPH"])[0] == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # empty morph is equivalent to "_"
 | 
					    # empty morph is equivalent to "_"
 | 
				
			||||||
    doc[0].morph_ = ""
 | 
					    doc[0].set_morph("")
 | 
				
			||||||
    assert doc[0].morph_ == ""
 | 
					    assert str(doc[0].morph) == ""
 | 
				
			||||||
    assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
 | 
					    assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # "_" morph is also equivalent to empty morph
 | 
					    # "_" morph is also equivalent to empty morph
 | 
				
			||||||
    doc[0].morph_ = "_"
 | 
					    doc[0].set_morph("_")
 | 
				
			||||||
    assert doc[0].morph_ == ""
 | 
					    assert str(doc[0].morph) == ""
 | 
				
			||||||
    assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
 | 
					    assert doc.to_array(["MORPH"])[0] == tokenizer.vocab.strings["_"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # set through existing hash with token.morph
 | 
					    # set through existing hash with token.morph
 | 
				
			||||||
    tokenizer.vocab.strings.add("Feat=Val")
 | 
					    tokenizer.vocab.strings.add("Feat=Val")
 | 
				
			||||||
    doc[0].morph = tokenizer.vocab.strings.add("Feat=Val")
 | 
					    doc[0].set_morph(tokenizer.vocab.strings.add("Feat=Val"))
 | 
				
			||||||
    assert doc[0].morph_ == "Feat=Val"
 | 
					    assert str(doc[0].morph) == "Feat=Val"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -21,11 +21,11 @@ def test_doc_retokenize_merge(en_tokenizer):
 | 
				
			||||||
    assert doc[4].text == "the beach boys"
 | 
					    assert doc[4].text == "the beach boys"
 | 
				
			||||||
    assert doc[4].text_with_ws == "the beach boys "
 | 
					    assert doc[4].text_with_ws == "the beach boys "
 | 
				
			||||||
    assert doc[4].tag_ == "NAMED"
 | 
					    assert doc[4].tag_ == "NAMED"
 | 
				
			||||||
    assert doc[4].morph_ == "Number=Plur"
 | 
					    assert str(doc[4].morph) == "Number=Plur"
 | 
				
			||||||
    assert doc[5].text == "all night"
 | 
					    assert doc[5].text == "all night"
 | 
				
			||||||
    assert doc[5].text_with_ws == "all night"
 | 
					    assert doc[5].text_with_ws == "all night"
 | 
				
			||||||
    assert doc[5].tag_ == "NAMED"
 | 
					    assert doc[5].tag_ == "NAMED"
 | 
				
			||||||
    assert doc[5].morph_ == "Number=Plur"
 | 
					    assert str(doc[5].morph) == "Number=Plur"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_doc_retokenize_merge_children(en_tokenizer):
 | 
					def test_doc_retokenize_merge_children(en_tokenizer):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -27,11 +27,11 @@ def test_doc_retokenize_split(en_vocab):
 | 
				
			||||||
    assert doc[0].text == "Los"
 | 
					    assert doc[0].text == "Los"
 | 
				
			||||||
    assert doc[0].head.text == "Angeles"
 | 
					    assert doc[0].head.text == "Angeles"
 | 
				
			||||||
    assert doc[0].idx == 0
 | 
					    assert doc[0].idx == 0
 | 
				
			||||||
    assert doc[0].morph_ == "Number=Sing"
 | 
					    assert str(doc[0].morph) == "Number=Sing"
 | 
				
			||||||
    assert doc[1].idx == 3
 | 
					    assert doc[1].idx == 3
 | 
				
			||||||
    assert doc[1].text == "Angeles"
 | 
					    assert doc[1].text == "Angeles"
 | 
				
			||||||
    assert doc[1].head.text == "start"
 | 
					    assert doc[1].head.text == "start"
 | 
				
			||||||
    assert doc[1].morph_ == "Number=Sing"
 | 
					    assert str(doc[1].morph) == "Number=Sing"
 | 
				
			||||||
    assert doc[2].text == "start"
 | 
					    assert doc[2].text == "start"
 | 
				
			||||||
    assert doc[2].head.text == "."
 | 
					    assert doc[2].head.text == "."
 | 
				
			||||||
    assert doc[3].text == "."
 | 
					    assert doc[3].text == "."
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -236,13 +236,13 @@ def test_matcher_subset_value_operator(en_vocab):
 | 
				
			||||||
    matcher.add("M", [pattern])
 | 
					    matcher.add("M", [pattern])
 | 
				
			||||||
    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
					    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
				
			||||||
    assert len(matcher(doc)) == 3
 | 
					    assert len(matcher(doc)) == 3
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val"
 | 
					    doc[0].set_morph("Feat=Val")
 | 
				
			||||||
    assert len(matcher(doc)) == 3
 | 
					    assert len(matcher(doc)) == 3
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val|Feat2=Val2"
 | 
					    doc[0].set_morph("Feat=Val|Feat2=Val2")
 | 
				
			||||||
    assert len(matcher(doc)) == 3
 | 
					    assert len(matcher(doc)) == 3
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3"
 | 
					    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3")
 | 
				
			||||||
    assert len(matcher(doc)) == 2
 | 
					    assert len(matcher(doc)) == 2
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4"
 | 
					    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4")
 | 
				
			||||||
    assert len(matcher(doc)) == 2
 | 
					    assert len(matcher(doc)) == 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # IS_SUBSET acts like "IN" for attrs other than MORPH
 | 
					    # IS_SUBSET acts like "IN" for attrs other than MORPH
 | 
				
			||||||
| 
						 | 
					@ -268,11 +268,11 @@ def test_matcher_superset_value_operator(en_vocab):
 | 
				
			||||||
    matcher.add("M", [pattern])
 | 
					    matcher.add("M", [pattern])
 | 
				
			||||||
    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
					    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
				
			||||||
    assert len(matcher(doc)) == 0
 | 
					    assert len(matcher(doc)) == 0
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val|Feat2=Val2"
 | 
					    doc[0].set_morph("Feat=Val|Feat2=Val2")
 | 
				
			||||||
    assert len(matcher(doc)) == 0
 | 
					    assert len(matcher(doc)) == 0
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3"
 | 
					    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3")
 | 
				
			||||||
    assert len(matcher(doc)) == 1
 | 
					    assert len(matcher(doc)) == 1
 | 
				
			||||||
    doc[0].morph_ = "Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4"
 | 
					    doc[0].set_morph("Feat=Val|Feat2=Val2|Feat3=Val3|Feat4=Val4")
 | 
				
			||||||
    assert len(matcher(doc)) == 1
 | 
					    assert len(matcher(doc)) == 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # IS_SUPERSET with more than one value only matches for MORPH
 | 
					    # IS_SUPERSET with more than one value only matches for MORPH
 | 
				
			||||||
| 
						 | 
					@ -310,9 +310,9 @@ def test_matcher_morph_handling(en_vocab):
 | 
				
			||||||
    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
					    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
				
			||||||
    assert len(matcher(doc)) == 0
 | 
					    assert len(matcher(doc)) == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    doc[0].morph_ = "Feat2=Val2|Feat1=Val1"
 | 
					    doc[0].set_morph("Feat2=Val2|Feat1=Val1")
 | 
				
			||||||
    assert len(matcher(doc)) == 2
 | 
					    assert len(matcher(doc)) == 2
 | 
				
			||||||
    doc[0].morph_ = "Feat1=Val1|Feat2=Val2"
 | 
					    doc[0].set_morph("Feat1=Val1|Feat2=Val2")
 | 
				
			||||||
    assert len(matcher(doc)) == 2
 | 
					    assert len(matcher(doc)) == 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # multiple values are split
 | 
					    # multiple values are split
 | 
				
			||||||
| 
						 | 
					@ -324,9 +324,9 @@ def test_matcher_morph_handling(en_vocab):
 | 
				
			||||||
    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
					    doc = Doc(en_vocab, words=["a", "b", "c"])
 | 
				
			||||||
    assert len(matcher(doc)) == 0
 | 
					    assert len(matcher(doc)) == 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    doc[0].morph_ = "Feat2=Val2,Val3|Feat1=Val1"
 | 
					    doc[0].set_morph("Feat2=Val2,Val3|Feat1=Val1")
 | 
				
			||||||
    assert len(matcher(doc)) == 1
 | 
					    assert len(matcher(doc)) == 1
 | 
				
			||||||
    doc[0].morph_ = "Feat1=Val1,Val3|Feat2=Val2"
 | 
					    doc[0].set_morph("Feat1=Val1,Val3|Feat2=Val2")
 | 
				
			||||||
    assert len(matcher(doc)) == 2
 | 
					    assert len(matcher(doc)) == 2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -405,7 +405,7 @@ def test_attr_pipeline_checks(en_vocab):
 | 
				
			||||||
    doc2 = Doc(en_vocab, words=["Test"])
 | 
					    doc2 = Doc(en_vocab, words=["Test"])
 | 
				
			||||||
    doc2[0].tag_ = "TAG"
 | 
					    doc2[0].tag_ = "TAG"
 | 
				
			||||||
    doc2[0].pos_ = "X"
 | 
					    doc2[0].pos_ = "X"
 | 
				
			||||||
    doc2[0].morph_ = "Feat=Val"
 | 
					    doc2[0].set_morph("Feat=Val")
 | 
				
			||||||
    doc2[0].lemma_ = "LEMMA"
 | 
					    doc2[0].lemma_ = "LEMMA"
 | 
				
			||||||
    doc3 = Doc(en_vocab, words=["Test"])
 | 
					    doc3 = Doc(en_vocab, words=["Test"])
 | 
				
			||||||
    # DEP requires DEP
 | 
					    # DEP requires DEP
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -190,7 +190,7 @@ def test_phrase_matcher_validation(en_vocab):
 | 
				
			||||||
    doc2 = Doc(en_vocab, words=["Test"])
 | 
					    doc2 = Doc(en_vocab, words=["Test"])
 | 
				
			||||||
    doc2[0].tag_ = "TAG"
 | 
					    doc2[0].tag_ = "TAG"
 | 
				
			||||||
    doc2[0].pos_ = "X"
 | 
					    doc2[0].pos_ = "X"
 | 
				
			||||||
    doc2[0].morph_ = "Feat=Val"
 | 
					    doc2[0].set_morph("Feat=Val")
 | 
				
			||||||
    doc3 = Doc(en_vocab, words=["Test"])
 | 
					    doc3 = Doc(en_vocab, words=["Test"])
 | 
				
			||||||
    matcher = PhraseMatcher(en_vocab, validate=True)
 | 
					    matcher = PhraseMatcher(en_vocab, validate=True)
 | 
				
			||||||
    with pytest.warns(UserWarning):
 | 
					    with pytest.warns(UserWarning):
 | 
				
			||||||
| 
						 | 
					@ -217,7 +217,7 @@ def test_attr_pipeline_checks(en_vocab):
 | 
				
			||||||
    doc2 = Doc(en_vocab, words=["Test"])
 | 
					    doc2 = Doc(en_vocab, words=["Test"])
 | 
				
			||||||
    doc2[0].tag_ = "TAG"
 | 
					    doc2[0].tag_ = "TAG"
 | 
				
			||||||
    doc2[0].pos_ = "X"
 | 
					    doc2[0].pos_ = "X"
 | 
				
			||||||
    doc2[0].morph_ = "Feat=Val"
 | 
					    doc2[0].set_morph("Feat=Val")
 | 
				
			||||||
    doc2[0].lemma_ = "LEMMA"
 | 
					    doc2[0].lemma_ = "LEMMA"
 | 
				
			||||||
    doc3 = Doc(en_vocab, words=["Test"])
 | 
					    doc3 = Doc(en_vocab, words=["Test"])
 | 
				
			||||||
    # DEP requires DEP
 | 
					    # DEP requires DEP
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -69,9 +69,9 @@ def test_attributeruler_init(nlp, pattern_dicts):
 | 
				
			||||||
        a.add(**p)
 | 
					        a.add(**p)
 | 
				
			||||||
    doc = nlp("This is a test.")
 | 
					    doc = nlp("This is a test.")
 | 
				
			||||||
    assert doc[2].lemma_ == "the"
 | 
					    assert doc[2].lemma_ == "the"
 | 
				
			||||||
    assert doc[2].morph_ == "Case=Nom|Number=Plur"
 | 
					    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
 | 
				
			||||||
    assert doc[3].lemma_ == "cat"
 | 
					    assert doc[3].lemma_ == "cat"
 | 
				
			||||||
    assert doc[3].morph_ == "Case=Nom|Number=Sing"
 | 
					    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
 | 
				
			||||||
    assert doc.has_annotation("LEMMA")
 | 
					    assert doc.has_annotation("LEMMA")
 | 
				
			||||||
    assert doc.has_annotation("MORPH")
 | 
					    assert doc.has_annotation("MORPH")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -81,9 +81,9 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
 | 
				
			||||||
    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
 | 
					    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
 | 
				
			||||||
    doc = nlp("This is a test.")
 | 
					    doc = nlp("This is a test.")
 | 
				
			||||||
    assert doc[2].lemma_ == "the"
 | 
					    assert doc[2].lemma_ == "the"
 | 
				
			||||||
    assert doc[2].morph_ == "Case=Nom|Number=Plur"
 | 
					    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
 | 
				
			||||||
    assert doc[3].lemma_ == "cat"
 | 
					    assert doc[3].lemma_ == "cat"
 | 
				
			||||||
    assert doc[3].morph_ == "Case=Nom|Number=Sing"
 | 
					    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
 | 
				
			||||||
    assert doc.has_annotation("LEMMA")
 | 
					    assert doc.has_annotation("LEMMA")
 | 
				
			||||||
    assert doc.has_annotation("MORPH")
 | 
					    assert doc.has_annotation("MORPH")
 | 
				
			||||||
    nlp.remove_pipe("attribute_ruler")
 | 
					    nlp.remove_pipe("attribute_ruler")
 | 
				
			||||||
| 
						 | 
					@ -94,9 +94,9 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    doc = nlp("This is a test.")
 | 
					    doc = nlp("This is a test.")
 | 
				
			||||||
    assert doc[2].lemma_ == "the"
 | 
					    assert doc[2].lemma_ == "the"
 | 
				
			||||||
    assert doc[2].morph_ == "Case=Nom|Number=Plur"
 | 
					    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
 | 
				
			||||||
    assert doc[3].lemma_ == "cat"
 | 
					    assert doc[3].lemma_ == "cat"
 | 
				
			||||||
    assert doc[3].morph_ == "Case=Nom|Number=Sing"
 | 
					    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
 | 
				
			||||||
    assert doc.has_annotation("LEMMA")
 | 
					    assert doc.has_annotation("LEMMA")
 | 
				
			||||||
    assert doc.has_annotation("MORPH")
 | 
					    assert doc.has_annotation("MORPH")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -106,9 +106,9 @@ def test_attributeruler_score(nlp, pattern_dicts):
 | 
				
			||||||
    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
 | 
					    nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
 | 
				
			||||||
    doc = nlp("This is a test.")
 | 
					    doc = nlp("This is a test.")
 | 
				
			||||||
    assert doc[2].lemma_ == "the"
 | 
					    assert doc[2].lemma_ == "the"
 | 
				
			||||||
    assert doc[2].morph_ == "Case=Nom|Number=Plur"
 | 
					    assert str(doc[2].morph) == "Case=Nom|Number=Plur"
 | 
				
			||||||
    assert doc[3].lemma_ == "cat"
 | 
					    assert doc[3].lemma_ == "cat"
 | 
				
			||||||
    assert doc[3].morph_ == "Case=Nom|Number=Sing"
 | 
					    assert str(doc[3].morph) == "Case=Nom|Number=Sing"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    dev_examples = [
 | 
					    dev_examples = [
 | 
				
			||||||
        Example.from_dict(
 | 
					        Example.from_dict(
 | 
				
			||||||
| 
						 | 
					@ -150,10 +150,10 @@ def test_attributeruler_tag_map(nlp, tag_map):
 | 
				
			||||||
    for i in range(len(doc)):
 | 
					    for i in range(len(doc)):
 | 
				
			||||||
        if i == 4:
 | 
					        if i == 4:
 | 
				
			||||||
            assert doc[i].pos_ == "PUNCT"
 | 
					            assert doc[i].pos_ == "PUNCT"
 | 
				
			||||||
            assert doc[i].morph_ == "PunctType=peri"
 | 
					            assert str(doc[i].morph) == "PunctType=peri"
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            assert doc[i].pos_ == ""
 | 
					            assert doc[i].pos_ == ""
 | 
				
			||||||
            assert doc[i].morph_ == ""
 | 
					            assert str(doc[i].morph) == ""
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_attributeruler_morph_rules(nlp, morph_rules):
 | 
					def test_attributeruler_morph_rules(nlp, morph_rules):
 | 
				
			||||||
| 
						 | 
					@ -168,11 +168,11 @@ def test_attributeruler_morph_rules(nlp, morph_rules):
 | 
				
			||||||
    for i in range(len(doc)):
 | 
					    for i in range(len(doc)):
 | 
				
			||||||
        if i != 2:
 | 
					        if i != 2:
 | 
				
			||||||
            assert doc[i].pos_ == ""
 | 
					            assert doc[i].pos_ == ""
 | 
				
			||||||
            assert doc[i].morph_ == ""
 | 
					            assert str(doc[i].morph) == ""
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            assert doc[2].pos_ == "DET"
 | 
					            assert doc[2].pos_ == "DET"
 | 
				
			||||||
            assert doc[2].lemma_ == "a"
 | 
					            assert doc[2].lemma_ == "a"
 | 
				
			||||||
            assert doc[2].morph_ == "Case=Nom"
 | 
					            assert str(doc[2].morph) == "Case=Nom"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_attributeruler_indices(nlp):
 | 
					def test_attributeruler_indices(nlp):
 | 
				
			||||||
| 
						 | 
					@ -194,14 +194,14 @@ def test_attributeruler_indices(nlp):
 | 
				
			||||||
    for i in range(len(doc)):
 | 
					    for i in range(len(doc)):
 | 
				
			||||||
        if i == 1:
 | 
					        if i == 1:
 | 
				
			||||||
            assert doc[i].lemma_ == "was"
 | 
					            assert doc[i].lemma_ == "was"
 | 
				
			||||||
            assert doc[i].morph_ == "Case=Nom|Number=Sing"
 | 
					            assert str(doc[i].morph) == "Case=Nom|Number=Sing"
 | 
				
			||||||
        elif i == 2:
 | 
					        elif i == 2:
 | 
				
			||||||
            assert doc[i].lemma_ == "the"
 | 
					            assert doc[i].lemma_ == "the"
 | 
				
			||||||
            assert doc[i].morph_ == "Case=Nom|Number=Plur"
 | 
					            assert str(doc[i].morph) == "Case=Nom|Number=Plur"
 | 
				
			||||||
        elif i == 3:
 | 
					        elif i == 3:
 | 
				
			||||||
            assert doc[i].lemma_ == "cat"
 | 
					            assert doc[i].lemma_ == "cat"
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            assert doc[i].morph_ == ""
 | 
					            assert str(doc[i].morph) == ""
 | 
				
			||||||
    # raises an error when trying to modify a token outside of the match
 | 
					    # raises an error when trying to modify a token outside of the match
 | 
				
			||||||
    a.add([[{"ORTH": "a"}, {"ORTH": "test"}]], {"LEMMA": "cat"}, index=2)
 | 
					    a.add([[{"ORTH": "a"}, {"ORTH": "test"}]], {"LEMMA": "cat"}, index=2)
 | 
				
			||||||
    with pytest.raises(ValueError):
 | 
					    with pytest.raises(ValueError):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -91,7 +91,7 @@ def test_overfitting_IO():
 | 
				
			||||||
    doc = nlp(test_text)
 | 
					    doc = nlp(test_text)
 | 
				
			||||||
    gold_morphs = ["Feat=N", "Feat=V", "", ""]
 | 
					    gold_morphs = ["Feat=N", "Feat=V", "", ""]
 | 
				
			||||||
    gold_pos_tags = ["NOUN", "VERB", "ADJ", ""]
 | 
					    gold_pos_tags = ["NOUN", "VERB", "ADJ", ""]
 | 
				
			||||||
    assert [t.morph_ for t in doc] == gold_morphs
 | 
					    assert [str(t.morph) for t in doc] == gold_morphs
 | 
				
			||||||
    assert [t.pos_ for t in doc] == gold_pos_tags
 | 
					    assert [t.pos_ for t in doc] == gold_pos_tags
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Also test the results are still the same after IO
 | 
					    # Also test the results are still the same after IO
 | 
				
			||||||
| 
						 | 
					@ -99,5 +99,5 @@ def test_overfitting_IO():
 | 
				
			||||||
        nlp.to_disk(tmp_dir)
 | 
					        nlp.to_disk(tmp_dir)
 | 
				
			||||||
        nlp2 = util.load_model_from_path(tmp_dir)
 | 
					        nlp2 = util.load_model_from_path(tmp_dir)
 | 
				
			||||||
        doc2 = nlp2(test_text)
 | 
					        doc2 = nlp2(test_text)
 | 
				
			||||||
        assert [t.morph_ for t in doc2] == gold_morphs
 | 
					        assert [str(t.morph) for t in doc2] == gold_morphs
 | 
				
			||||||
        assert [t.pos_ for t in doc2] == gold_pos_tags
 | 
					        assert [t.pos_ for t in doc2] == gold_pos_tags
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -76,7 +76,7 @@ def tagged_doc():
 | 
				
			||||||
    for i in range(len(tags)):
 | 
					    for i in range(len(tags)):
 | 
				
			||||||
        doc[i].tag_ = tags[i]
 | 
					        doc[i].tag_ = tags[i]
 | 
				
			||||||
        doc[i].pos_ = pos[i]
 | 
					        doc[i].pos_ = pos[i]
 | 
				
			||||||
        doc[i].morph_ = morphs[i]
 | 
					        doc[i].set_morph(morphs[i])
 | 
				
			||||||
        if i > 0:
 | 
					        if i > 0:
 | 
				
			||||||
            doc[i].is_sent_start = False
 | 
					            doc[i].is_sent_start = False
 | 
				
			||||||
    return doc
 | 
					    return doc
 | 
				
			||||||
| 
						 | 
					@ -242,7 +242,7 @@ def test_tag_score(tagged_doc):
 | 
				
			||||||
    gold = {
 | 
					    gold = {
 | 
				
			||||||
        "tags": [t.tag_ for t in tagged_doc],
 | 
					        "tags": [t.tag_ for t in tagged_doc],
 | 
				
			||||||
        "pos": [t.pos_ for t in tagged_doc],
 | 
					        "pos": [t.pos_ for t in tagged_doc],
 | 
				
			||||||
        "morphs": [t.morph_ for t in tagged_doc],
 | 
					        "morphs": [str(t.morph) for t in tagged_doc],
 | 
				
			||||||
        "sent_starts": [1 if t.is_sent_start else -1 for t in tagged_doc],
 | 
					        "sent_starts": [1 if t.is_sent_start else -1 for t in tagged_doc],
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    example = Example.from_dict(tagged_doc, gold)
 | 
					    example = Example.from_dict(tagged_doc, gold)
 | 
				
			||||||
| 
						 | 
					@ -259,7 +259,7 @@ def test_tag_score(tagged_doc):
 | 
				
			||||||
    tags[0] = "NN"
 | 
					    tags[0] = "NN"
 | 
				
			||||||
    pos = [t.pos_ for t in tagged_doc]
 | 
					    pos = [t.pos_ for t in tagged_doc]
 | 
				
			||||||
    pos[1] = "X"
 | 
					    pos[1] = "X"
 | 
				
			||||||
    morphs = [t.morph_ for t in tagged_doc]
 | 
					    morphs = [str(t.morph) for t in tagged_doc]
 | 
				
			||||||
    morphs[1] = "Number=sing"
 | 
					    morphs[1] = "Number=sing"
 | 
				
			||||||
    morphs[2] = "Number=plur"
 | 
					    morphs[2] = "Number=plur"
 | 
				
			||||||
    gold = {
 | 
					    gold = {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -113,7 +113,7 @@ def test_Example_from_dict_with_morphology(annots):
 | 
				
			||||||
    predicted = Doc(vocab, words=annots["words"])
 | 
					    predicted = Doc(vocab, words=annots["words"])
 | 
				
			||||||
    example = Example.from_dict(predicted, annots)
 | 
					    example = Example.from_dict(predicted, annots)
 | 
				
			||||||
    for i, token in enumerate(example.reference):
 | 
					    for i, token in enumerate(example.reference):
 | 
				
			||||||
        assert token.morph_ == annots["morphs"][i]
 | 
					        assert str(token.morph) == annots["morphs"][i]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.mark.parametrize(
 | 
					@pytest.mark.parametrize(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -460,7 +460,7 @@ def test_roundtrip_docs_to_docbin(doc):
 | 
				
			||||||
    idx = [t.idx for t in doc]
 | 
					    idx = [t.idx for t in doc]
 | 
				
			||||||
    tags = [t.tag_ for t in doc]
 | 
					    tags = [t.tag_ for t in doc]
 | 
				
			||||||
    pos = [t.pos_ for t in doc]
 | 
					    pos = [t.pos_ for t in doc]
 | 
				
			||||||
    morphs = [t.morph_ for t in doc]
 | 
					    morphs = [str(t.morph) for t in doc]
 | 
				
			||||||
    lemmas = [t.lemma_ for t in doc]
 | 
					    lemmas = [t.lemma_ for t in doc]
 | 
				
			||||||
    deps = [t.dep_ for t in doc]
 | 
					    deps = [t.dep_ for t in doc]
 | 
				
			||||||
    heads = [t.head.i for t in doc]
 | 
					    heads = [t.head.i for t in doc]
 | 
				
			||||||
| 
						 | 
					@ -482,7 +482,7 @@ def test_roundtrip_docs_to_docbin(doc):
 | 
				
			||||||
    assert idx == [t.idx for t in reloaded_example.reference]
 | 
					    assert idx == [t.idx for t in reloaded_example.reference]
 | 
				
			||||||
    assert tags == [t.tag_ for t in reloaded_example.reference]
 | 
					    assert tags == [t.tag_ for t in reloaded_example.reference]
 | 
				
			||||||
    assert pos == [t.pos_ for t in reloaded_example.reference]
 | 
					    assert pos == [t.pos_ for t in reloaded_example.reference]
 | 
				
			||||||
    assert morphs == [t.morph_ for t in reloaded_example.reference]
 | 
					    assert morphs == [str(t.morph) for t in reloaded_example.reference]
 | 
				
			||||||
    assert lemmas == [t.lemma_ for t in reloaded_example.reference]
 | 
					    assert lemmas == [t.lemma_ for t in reloaded_example.reference]
 | 
				
			||||||
    assert deps == [t.dep_ for t in reloaded_example.reference]
 | 
					    assert deps == [t.dep_ for t in reloaded_example.reference]
 | 
				
			||||||
    assert heads == [t.head.i for t in reloaded_example.reference]
 | 
					    assert heads == [t.head.i for t in reloaded_example.reference]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -101,7 +101,7 @@ class DocBin:
 | 
				
			||||||
            self.strings.add(token.text)
 | 
					            self.strings.add(token.text)
 | 
				
			||||||
            self.strings.add(token.tag_)
 | 
					            self.strings.add(token.tag_)
 | 
				
			||||||
            self.strings.add(token.lemma_)
 | 
					            self.strings.add(token.lemma_)
 | 
				
			||||||
            self.strings.add(token.morph_)
 | 
					            self.strings.add(str(token.morph))
 | 
				
			||||||
            self.strings.add(token.dep_)
 | 
					            self.strings.add(token.dep_)
 | 
				
			||||||
            self.strings.add(token.ent_type_)
 | 
					            self.strings.add(token.ent_type_)
 | 
				
			||||||
            self.strings.add(token.ent_kb_id_)
 | 
					            self.strings.add(token.ent_kb_id_)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1248,7 +1248,7 @@ cdef class Doc:
 | 
				
			||||||
        for token in self:
 | 
					        for token in self:
 | 
				
			||||||
            strings.add(token.tag_)
 | 
					            strings.add(token.tag_)
 | 
				
			||||||
            strings.add(token.lemma_)
 | 
					            strings.add(token.lemma_)
 | 
				
			||||||
            strings.add(token.morph_)
 | 
					            strings.add(str(token.morph))
 | 
				
			||||||
            strings.add(token.dep_)
 | 
					            strings.add(token.dep_)
 | 
				
			||||||
            strings.add(token.ent_type_)
 | 
					            strings.add(token.ent_type_)
 | 
				
			||||||
            strings.add(token.ent_kb_id_)
 | 
					            strings.add(token.ent_kb_id_)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -215,20 +215,20 @@ cdef class Token:
 | 
				
			||||||
        def __get__(self):
 | 
					        def __get__(self):
 | 
				
			||||||
            return MorphAnalysis.from_id(self.vocab, self.c.morph)
 | 
					            return MorphAnalysis.from_id(self.vocab, self.c.morph)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        def __set__(self, attr_t morph):
 | 
					        def __set__(self, MorphAnalysis morph):
 | 
				
			||||||
            if morph == 0:
 | 
					            # Check that the morph has the same vocab
 | 
				
			||||||
                self.c.morph = morph
 | 
					            if self.vocab != morph.vocab:
 | 
				
			||||||
            elif morph in self.vocab.strings:
 | 
					                raise ValueError(Errors.E1013)
 | 
				
			||||||
                self.morph_ = self.vocab.strings[morph]
 | 
					            self.c.morph = morph.c.key
 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                raise ValueError(Errors.E1009.format(val=morph))
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    property morph_:
 | 
					    def set_morph(self, features):
 | 
				
			||||||
        def __get__(self):
 | 
					        cdef hash_t key
 | 
				
			||||||
            return str(MorphAnalysis.from_id(self.vocab, self.c.morph))
 | 
					        if features is 0:
 | 
				
			||||||
 | 
					            self.c.morph = 0
 | 
				
			||||||
        def __set__(self, features):
 | 
					        else:
 | 
				
			||||||
            cdef hash_t key = self.vocab.morphology.add(features)
 | 
					            if isinstance(features, int):
 | 
				
			||||||
 | 
					                features = self.vocab.strings[features]
 | 
				
			||||||
 | 
					            key = self.vocab.morphology.add(features)
 | 
				
			||||||
            self.c.morph = key
 | 
					            self.c.morph = key
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -226,7 +226,7 @@ cdef class Example:
 | 
				
			||||||
                "TAG": [t.tag_ for t in self.reference],
 | 
					                "TAG": [t.tag_ for t in self.reference],
 | 
				
			||||||
                "LEMMA": [t.lemma_ for t in self.reference],
 | 
					                "LEMMA": [t.lemma_ for t in self.reference],
 | 
				
			||||||
                "POS": [t.pos_ for t in self.reference],
 | 
					                "POS": [t.pos_ for t in self.reference],
 | 
				
			||||||
                "MORPH": [t.morph_ for t in self.reference],
 | 
					                "MORPH": [str(t.morph) for t in self.reference],
 | 
				
			||||||
                "HEAD": [t.head.i for t in self.reference],
 | 
					                "HEAD": [t.head.i for t in self.reference],
 | 
				
			||||||
                "DEP": [t.dep_ for t in self.reference],
 | 
					                "DEP": [t.dep_ for t in self.reference],
 | 
				
			||||||
                "SENT_START": [int(bool(t.is_sent_start)) for t in self.reference]
 | 
					                "SENT_START": [int(bool(t.is_sent_start)) for t in self.reference]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -44,7 +44,7 @@ def docs_to_json(docs, doc_id=0, ner_missing_tag="O"):
 | 
				
			||||||
                if include_annotation["POS"]:
 | 
					                if include_annotation["POS"]:
 | 
				
			||||||
                    json_token["pos"] = token.pos_
 | 
					                    json_token["pos"] = token.pos_
 | 
				
			||||||
                if include_annotation["MORPH"]:
 | 
					                if include_annotation["MORPH"]:
 | 
				
			||||||
                    json_token["morph"] = token.morph_
 | 
					                    json_token["morph"] = str(token.morph)
 | 
				
			||||||
                if include_annotation["LEMMA"]:
 | 
					                if include_annotation["LEMMA"]:
 | 
				
			||||||
                    json_token["lemma"] = token.lemma_
 | 
					                    json_token["lemma"] = token.lemma_
 | 
				
			||||||
                if include_annotation["DEP"]:
 | 
					                if include_annotation["DEP"]:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user