mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-14 03:26:24 +03:00
63f5951f8b
Add the `AttributeRuler` to handle exceptions for token-level attributes. The `AttributeRuler` uses `Matcher` patterns to identify target spans and applies the specified attributes to the token at the provided index in the matched span. A negative index can be used to index from the end of the matched span. The retokenizer is used to "merge" the individual tokens and assign them the provided attributes. Helper functions can import existing tag maps and morph rules to the corresponding `Matcher` patterns. There is an additional minor bug fix for `MORPH` attributes in the retokenizer to correctly normalize the values and to handle `MORPH` alongside `_` in an attrs dict.
34 lines
891 B
Python
34 lines
891 B
Python
from .attributeruler import AttributeRuler
|
|
from .dep_parser import DependencyParser
|
|
from .entity_linker import EntityLinker
|
|
from .ner import EntityRecognizer
|
|
from .entityruler import EntityRuler
|
|
from .morphologizer import Morphologizer
|
|
from .pipe import Pipe
|
|
from spacy.pipeline.senter import SentenceRecognizer
|
|
from .sentencizer import Sentencizer
|
|
from .simple_ner import SimpleNER
|
|
from .tagger import Tagger
|
|
from .textcat import TextCategorizer
|
|
from .tok2vec import Tok2Vec
|
|
from .functions import merge_entities, merge_noun_chunks, merge_subtokens
|
|
|
|
__all__ = [
|
|
"AttributeRuler",
|
|
"DependencyParser",
|
|
"EntityLinker",
|
|
"EntityRecognizer",
|
|
"EntityRuler",
|
|
"Morphologizer",
|
|
"Pipe",
|
|
"SentenceRecognizer",
|
|
"Sentencizer",
|
|
"SimpleNER",
|
|
"Tagger",
|
|
"TextCategorizer",
|
|
"Tok2Vec",
|
|
"merge_entities",
|
|
"merge_noun_chunks",
|
|
"merge_subtokens",
|
|
]
|