mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
spacy.ngram_range_suggester.v1 (#8699)
This commit is contained in:
parent
e117573822
commit
77859beb99
|
@ -78,6 +78,15 @@ def build_ngram_suggester(sizes: List[int]) -> Callable[[List[Doc]], Ragged]:
|
||||||
return ngram_suggester
|
return ngram_suggester
|
||||||
|
|
||||||
|
|
||||||
|
@registry.misc("spacy.ngram_range_suggester.v1")
|
||||||
|
def build_ngram_range_suggester(min_size: int, max_size: int) -> Callable[[List[Doc]], Ragged]:
|
||||||
|
"""Suggest all spans of the given lengths between a given min and max value - both inclusive.
|
||||||
|
Spans are returned as a ragged array of integers. The array has two columns,
|
||||||
|
indicating the start and end position."""
|
||||||
|
sizes = range(min_size, max_size+1)
|
||||||
|
return build_ngram_suggester(sizes)
|
||||||
|
|
||||||
|
|
||||||
@Language.factory(
|
@Language.factory(
|
||||||
"spancat",
|
"spancat",
|
||||||
assigns=["doc.spans"],
|
assigns=["doc.spans"],
|
||||||
|
|
|
@ -183,3 +183,22 @@ def test_ngram_suggester(en_tokenizer):
|
||||||
docs = [en_tokenizer(text) for text in ["", "", ""]]
|
docs = [en_tokenizer(text) for text in ["", "", ""]]
|
||||||
ngrams = ngram_suggester(docs)
|
ngrams = ngram_suggester(docs)
|
||||||
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
assert_equal(ngrams.lengths, [len(doc) for doc in docs])
|
||||||
|
|
||||||
|
|
||||||
|
def test_ngram_sizes(en_tokenizer):
|
||||||
|
# test that the range suggester works well
|
||||||
|
size_suggester = registry.misc.get("spacy.ngram_suggester.v1")(sizes=[1, 2, 3])
|
||||||
|
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=1, max_size=3)
|
||||||
|
docs = [
|
||||||
|
en_tokenizer(text) for text in ["a", "a b", "a b c", "a b c d", "a b c d e"]
|
||||||
|
]
|
||||||
|
ngrams_1 = size_suggester(docs)
|
||||||
|
ngrams_2 = range_suggester(docs)
|
||||||
|
assert_equal(ngrams_1.lengths, [1, 3, 6, 9, 12])
|
||||||
|
assert_equal(ngrams_1.lengths, ngrams_2.lengths)
|
||||||
|
assert_equal(ngrams_1.data, ngrams_2.data)
|
||||||
|
|
||||||
|
# one more variation
|
||||||
|
range_suggester = registry.misc.get("spacy.ngram_range_suggester.v1")(min_size=2, max_size=4)
|
||||||
|
ngrams_3 = range_suggester(docs)
|
||||||
|
assert_equal(ngrams_3.lengths, [0, 1, 3, 6, 9])
|
||||||
|
|
|
@ -451,3 +451,24 @@ integers. The array has two columns, indicating the start and end position.
|
||||||
| ----------- | -------------------------------------------------------------------------------------------------------------------- |
|
| ----------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `sizes` | The phrase lengths to suggest. For example, `[1, 2]` will suggest phrases consisting of 1 or 2 tokens. ~~List[int]~~ |
|
| `sizes` | The phrase lengths to suggest. For example, `[1, 2]` will suggest phrases consisting of 1 or 2 tokens. ~~List[int]~~ |
|
||||||
| **CREATES** | The suggester function. ~~Callable[[List[Doc]], Ragged]~~ |
|
| **CREATES** | The suggester function. ~~Callable[[List[Doc]], Ragged]~~ |
|
||||||
|
|
||||||
|
### spacy.ngram_range_suggester.v1 {#ngram_range_suggester}
|
||||||
|
|
||||||
|
> #### Example Config
|
||||||
|
>
|
||||||
|
> ```ini
|
||||||
|
> [components.spancat.suggester]
|
||||||
|
> @misc = "spacy.ngram_range_suggester.v1"
|
||||||
|
> min_size = 2
|
||||||
|
> max_size = 4
|
||||||
|
> ```
|
||||||
|
|
||||||
|
Suggest all spans of at least length `min_size` and at most length `max_size`
|
||||||
|
(both inclusive). Spans are returned as a ragged array of integers. The array
|
||||||
|
has two columns, indicating the start and end position.
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| ----------- | ------------------------------------------------------------ |
|
||||||
|
| `min_size` | The minimal phrase lengths to suggest (inclusive). ~~[int]~~ |
|
||||||
|
| `max_size` | The maximal phrase lengths to suggest (exclusive). ~~[int]~~ |
|
||||||
|
| **CREATES** | The suggester function. ~~Callable[[List[Doc]], Ragged]~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user