spaCy/spacy/tests/pipeline/test_coordinationruler.py
2024-02-19 12:39:40 +00:00

67 lines
2.0 KiB
Python

import pytest
from typing import List
from spacy.tokens import Doc
import en_core_web_sm
@pytest.fixture
def nlp():
return en_core_web_sm.load()
def _my_custom_splitting_rule(doc: Doc) -> List[str]:
split_phrases = []
for token in doc:
if token.text == "read":
split_phrases.append("test1")
split_phrases.append("test2")
return split_phrases
def test_coordinationruler(nlp):
doc = nlp("I read and write books")
assert len(doc) == 5
assert [d.text for d in doc] == ["I", "read", "and", "write", "books"]
coord_splitter = nlp.add_pipe("coordination_splitter")
assert len(coord_splitter.rules) == 3
assert coord_splitter.name == "coordination_splitter"
doc_split = coord_splitter(doc)
assert len(doc_split) == 2
assert [t.text for t in doc_split] == ["I read books", "I write books"]
def test_coordinationruler_clear_rules(nlp):
coord_splitter = nlp.add_pipe("coordination_splitter")
assert len(coord_splitter.rules) == 3
coord_splitter.clear_rules()
assert len(coord_splitter.rules) == 0
assert coord_splitter.rules == []
def test_coordinationruler_add_rule(nlp):
coord_splitter = nlp.add_pipe("coordination_splitter")
assert len(coord_splitter.rules) == 3
coord_splitter.add_rule(_my_custom_splitting_rule)
assert len(coord_splitter.rules) == 4
def test_coordinationruler_add_rules(nlp):
doc = nlp("I read and write books")
coord_splitter = nlp.add_pipe("coordination_splitter")
coord_splitter.clear_rules()
coord_splitter.add_rules([_my_custom_splitting_rule, _my_custom_splitting_rule])
assert len(coord_splitter.rules) == 2
doc_split = coord_splitter(doc)
assert len(doc_split) == 2
assert [t.text for t in doc_split] == ["test1", "test2"]
def test_coordinationruler_add_default_rules(nlp):
coord_splitter = nlp.add_pipe("coordination_splitter")
coord_splitter.clear_rules()
assert len(coord_splitter.rules) == 0
coord_splitter.add_default_rules()
assert len(coord_splitter.rules) == 3