mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
add spacy prefix to ngram_suggester.v1 (#8623)
This commit is contained in:
parent
733e8ceea9
commit
64fac754fe
|
@ -44,7 +44,7 @@ depth = 4
|
||||||
DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"]
|
DEFAULT_SPANCAT_MODEL = Config().from_str(spancat_default_config)["model"]
|
||||||
|
|
||||||
|
|
||||||
@registry.misc("ngram_suggester.v1")
|
@registry.misc("spacy.ngram_suggester.v1")
|
||||||
def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
|
def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
|
||||||
"""Suggest all spans of the given lengths. Spans are returned as a ragged
|
"""Suggest all spans of the given lengths. Spans are returned as a ragged
|
||||||
array of integers. The array has two columns, indicating the start and end
|
array of integers. The array has two columns, indicating the start and end
|
||||||
|
@ -86,7 +86,7 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
|
||||||
"spans_key": "sc",
|
"spans_key": "sc",
|
||||||
"max_positive": None,
|
"max_positive": None,
|
||||||
"model": DEFAULT_SPANCAT_MODEL,
|
"model": DEFAULT_SPANCAT_MODEL,
|
||||||
"suggester": {"@misc": "ngram_suggester.v1", "sizes": [1, 2, 3]},
|
"suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
|
||||||
},
|
},
|
||||||
default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
|
default_score_weights={"spans_sc_f": 1.0, "spans_sc_p": 0.0, "spans_sc_r": 0.0},
|
||||||
)
|
)
|
||||||
|
|
|
@ -92,7 +92,7 @@ def test_simple_train():
|
||||||
def test_ngram_suggester(en_tokenizer):
|
def test_ngram_suggester(en_tokenizer):
|
||||||
# test different n-gram lengths
|
# test different n-gram lengths
|
||||||
for size in [1, 2, 3]:
|
for size in [1, 2, 3]:
|
||||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[size])
|
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[size])
|
||||||
docs = [
|
docs = [
|
||||||
en_tokenizer(text)
|
en_tokenizer(text)
|
||||||
for text in [
|
for text in [
|
||||||
|
@ -124,7 +124,7 @@ def test_ngram_suggester(en_tokenizer):
|
||||||
assert_equal(ngrams.lengths, [max(0, len(doc) - (size - 1)) for doc in docs])
|
assert_equal(ngrams.lengths, [max(0, len(doc) - (size - 1)) for doc in docs])
|
||||||
|
|
||||||
# test 1-3-gram suggestions
|
# test 1-3-gram suggestions
|
||||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[1, 2, 3])
|
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3])
|
||||||
docs = [
|
docs = [
|
||||||
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
|
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
|
||||||
]
|
]
|
||||||
|
@ -173,13 +173,13 @@ def test_ngram_suggester(en_tokenizer):
|
||||||
)
|
)
|
||||||
|
|
||||||
# test some empty docs
|
# test some empty docs
|
||||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[1])
|
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1])
|
||||||
docs = [en_tokenizer(text) for text in ["", "a", ""]]
|
docs = [en_tokenizer(text) for text in ["", "a", ""]]
|
||||||
ngrams = ngram_suggester(docs)
|
ngrams = ngram_suggester(docs)
|
||||||
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
||||||
|
|
||||||
# test all empty docs
|
# test all empty docs
|
||||||
ngram_suggester = registry.misc.get("ngram_suggester.v1")(sizes=[1])
|
ngram_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1])
|
||||||
docs = [en_tokenizer(text) for text in ["", "", ""]]
|
docs = [en_tokenizer(text) for text in ["", "", ""]]
|
||||||
ngrams = ngram_suggester(docs)
|
ngrams = ngram_suggester(docs)
|
||||||
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
||||||
|
|
|
@ -31,7 +31,7 @@ architectures and their arguments and hyperparameters.
|
||||||
> "spans_key": "labeled_spans",
|
> "spans_key": "labeled_spans",
|
||||||
> "max_positive": None,
|
> "max_positive": None,
|
||||||
> "model": DEFAULT_SPANCAT_MODEL,
|
> "model": DEFAULT_SPANCAT_MODEL,
|
||||||
> "suggester": {"@misc": "ngram_suggester.v1", "sizes": [1, 2, 3]},
|
> "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
|
||||||
> }
|
> }
|
||||||
> nlp.add_pipe("spancat", config=config)
|
> nlp.add_pipe("spancat", config=config)
|
||||||
> ```
|
> ```
|
||||||
|
|
Loading…
Reference in New Issue
Block a user