mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
b841d3fe75
* Add sent_starts to GoldParse * Add SentTagger pipeline component Add `SentTagger` pipeline component as a subclass of `Tagger`. * Model reduces default parameters from `Tagger` to be small and fast * Hard-coded set of two labels: * S (1): token at beginning of sentence * I (0): all other sentence positions * Sets `token.sent_start` values * Add sentence segmentation to Scorer Report `sent_p/r/f` for sentence boundaries, which may be provided by various pipeline components. * Add sentence segmentation to CLI evaluate * Add senttagger metrics/scoring to train CLI * Rename SentTagger to SentenceRecognizer * Add SentenceRecognizer to spacy.pipes imports * Add SentenceRecognizer serialization test * Shorten component name to sentrec * Remove duplicates from train CLI output metrics
30 lines
793 B
Python
30 lines
793 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
from .pipes import Tagger, DependencyParser, EntityRecognizer, EntityLinker
|
|
from .pipes import TextCategorizer, Tensorizer, Pipe, Sentencizer
|
|
from .pipes import SentenceRecognizer
|
|
from .morphologizer import Morphologizer
|
|
from .entityruler import EntityRuler
|
|
from .hooks import SentenceSegmenter, SimilarityHook
|
|
from .functions import merge_entities, merge_noun_chunks, merge_subtokens
|
|
|
|
__all__ = [
|
|
"Tagger",
|
|
"DependencyParser",
|
|
"EntityRecognizer",
|
|
"EntityLinker",
|
|
"TextCategorizer",
|
|
"Tensorizer",
|
|
"Pipe",
|
|
"Morphologizer",
|
|
"EntityRuler",
|
|
"Sentencizer",
|
|
"SentenceSegmenter",
|
|
"SentenceRecognizer",
|
|
"SimilarityHook",
|
|
"merge_entities",
|
|
"merge_noun_chunks",
|
|
"merge_subtokens",
|
|
]
|