mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 08:14:15 +03:00
add spacy prefix to ngram_suggester.v1 (#8623)
This commit is contained in:
parent
733e8ceea9
commit
64fac754fe
|
@ -44,7 +44,7 @@ depth = 4
|
|||
DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"]
|
||||
|
||||
|
||||
@registry.misc("ngram_suggester.v1")
|
||||
@registry.misc("spacy.ngram_suggester.v1")
|
||||
def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
|
||||
"""Suggest all spans of the given lengths. Spans are returned as a ragged
|
||||
array of integers. The array has two columns, indicating the start and end
|
||||
|
@ -86,7 +86,7 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
|
|||
"spans_key": "sc",
|
||||
"max_positive": None,
|
||||
"model": DEFAULT_SPANCAT_MODEL,
|
||||
"suggester": {"@misc": "ngram_suggester.v1", "sizes": [1, 2, 3]},
|
||||
"suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
|
||||
},
|
||||
default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
|
||||
)
|
||||
|
|
|
@ -92,7 +92,7 @@ def test_simple_train():
|
|||
def test_ngram_suggester(en_tokenizer):
|
||||
# test different n-gram lengths
|
||||
for size in [1, 2, 3]:
|
||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[size])
|
||||
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[size])
|
||||
docs = [
|
||||
en_tokenizer(text)
|
||||
for text in [
|
||||
|
@ -124,7 +124,7 @@ def test_ngram_suggester(en_tokenizer):
|
|||
assert_equal(ngrams.lengths, [max(0, len(doc) - (size - 1)) for doc in docs])
|
||||
|
||||
# test 1-3-gram suggestions
|
||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[1, 2, 3])
|
||||
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3])
|
||||
docs = [
|
||||
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
|
||||
]
|
||||
|
@ -173,13 +173,13 @@ def test_ngram_suggester(en_tokenizer):
|
|||
)
|
||||
|
||||
# test some empty docs
|
||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[1])
|
||||
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1])
|
||||
docs = [en_tokenizer(text) for text in ["", "a", ""]]
|
||||
ngrams = ngram_suggester(docs)
|
||||
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
||||
|
||||
# test all empty docs
|
||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[1])
|
||||
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1])
|
||||
docs = [en_tokenizer(text) for text in ["", "", ""]]
|
||||
ngrams = ngram_suggester(docs)
|
||||
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
||||
|
|
|
@ -31,7 +31,7 @@ architectures and their arguments and hyperparameters.
|
|||
> "spans_key": "labeled_spans",
|
||||
> "max_positive": None,
|
||||
> "model": DEFAULT_SPANCAT_MODEL,
|
||||
> "suggester": {"@misc": "ngram_suggester.v1", "sizes": [1, 2, 3]},
|
||||
> "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
|
||||
> }
|
||||
> nlp.add_pipe("spancat", config=config)
|
||||
> ```
|
||||
|
|
Loading…
Reference in New Issue
Block a user