mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-15 11:00:34 +03:00
67 lines
2.0 KiB
Python
67 lines
2.0 KiB
Python
import pytest
|
|
from typing import List
|
|
from spacy.tokens import Doc
|
|
|
|
import en_core_web_sm
|
|
|
|
|
|
@pytest.fixture
|
|
def nlp():
|
|
return en_core_web_sm.load()
|
|
|
|
|
|
def _my_custom_splitting_rule(doc: Doc) -> List[str]:
|
|
split_phrases = []
|
|
for token in doc:
|
|
if token.text == "read":
|
|
split_phrases.append("test1")
|
|
split_phrases.append("test2")
|
|
return split_phrases
|
|
|
|
|
|
def test_coordinationruler(nlp):
|
|
doc = nlp("I read and write books")
|
|
assert len(doc) == 5
|
|
assert [d.text for d in doc] == ["I", "read", "and", "write", "books"]
|
|
coord_splitter = nlp.add_pipe("coordination_splitter")
|
|
assert len(coord_splitter.rules) == 3
|
|
assert coord_splitter.name == "coordination_splitter"
|
|
doc_split = coord_splitter(doc)
|
|
assert len(doc_split) == 2
|
|
assert [t.text for t in doc_split] == ["I read books", "I write books"]
|
|
|
|
|
|
def test_coordinationruler_clear_rules(nlp):
|
|
coord_splitter = nlp.add_pipe("coordination_splitter")
|
|
assert len(coord_splitter.rules) == 3
|
|
coord_splitter.clear_rules()
|
|
assert len(coord_splitter.rules) == 0
|
|
assert coord_splitter.rules == []
|
|
|
|
|
|
def test_coordinationruler_add_rule(nlp):
|
|
coord_splitter = nlp.add_pipe("coordination_splitter")
|
|
assert len(coord_splitter.rules) == 3
|
|
coord_splitter.add_rule(_my_custom_splitting_rule)
|
|
assert len(coord_splitter.rules) == 4
|
|
|
|
|
|
def test_coordinationruler_add_rules(nlp):
|
|
doc = nlp("I read and write books")
|
|
coord_splitter = nlp.add_pipe("coordination_splitter")
|
|
coord_splitter.clear_rules()
|
|
coord_splitter.add_rules([_my_custom_splitting_rule, _my_custom_splitting_rule])
|
|
assert len(coord_splitter.rules) == 2
|
|
doc_split = coord_splitter(doc)
|
|
assert len(doc_split) == 2
|
|
|
|
assert [t.text for t in doc_split] == ["test1", "test2"]
|
|
|
|
|
|
def test_coordinationruler_add_default_rules(nlp):
|
|
coord_splitter = nlp.add_pipe("coordination_splitter")
|
|
coord_splitter.clear_rules()
|
|
assert len(coord_splitter.rules) == 0
|
|
coord_splitter.add_default_rules()
|
|
assert len(coord_splitter.rules) == 3
|