mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Merge pull request #9951 from explosion/master
Update develop branch with master
This commit is contained in:
commit
b8106e0f95
|
@ -1,11 +1,8 @@
|
||||||
recursive-include include *.h
|
|
||||||
recursive-include spacy *.pyi *.pyx *.pxd *.txt *.cfg *.jinja *.toml
|
recursive-include spacy *.pyi *.pyx *.pxd *.txt *.cfg *.jinja *.toml
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include README.md
|
include README.md
|
||||||
include pyproject.toml
|
include pyproject.toml
|
||||||
include spacy/py.typed
|
include spacy/py.typed
|
||||||
recursive-exclude spacy/lang *.json
|
recursive-include spacy/cli *.yml
|
||||||
recursive-include spacy/lang *.json.gz
|
|
||||||
recursive-include spacy/cli *.json *.yml
|
|
||||||
recursive-include licenses *
|
recursive-include licenses *
|
||||||
recursive-exclude spacy *.cpp
|
recursive-exclude spacy *.cpp
|
||||||
|
|
28
setup.cfg
28
setup.cfg
|
@ -77,31 +77,33 @@ transformers =
|
||||||
ray =
|
ray =
|
||||||
spacy_ray>=0.1.0,<1.0.0
|
spacy_ray>=0.1.0,<1.0.0
|
||||||
cuda =
|
cuda =
|
||||||
cupy>=5.0.0b4,<10.0.0
|
cupy>=5.0.0b4,<11.0.0
|
||||||
cuda80 =
|
cuda80 =
|
||||||
cupy-cuda80>=5.0.0b4,<10.0.0
|
cupy-cuda80>=5.0.0b4,<11.0.0
|
||||||
cuda90 =
|
cuda90 =
|
||||||
cupy-cuda90>=5.0.0b4,<10.0.0
|
cupy-cuda90>=5.0.0b4,<11.0.0
|
||||||
cuda91 =
|
cuda91 =
|
||||||
cupy-cuda91>=5.0.0b4,<10.0.0
|
cupy-cuda91>=5.0.0b4,<11.0.0
|
||||||
cuda92 =
|
cuda92 =
|
||||||
cupy-cuda92>=5.0.0b4,<10.0.0
|
cupy-cuda92>=5.0.0b4,<11.0.0
|
||||||
cuda100 =
|
cuda100 =
|
||||||
cupy-cuda100>=5.0.0b4,<10.0.0
|
cupy-cuda100>=5.0.0b4,<11.0.0
|
||||||
cuda101 =
|
cuda101 =
|
||||||
cupy-cuda101>=5.0.0b4,<10.0.0
|
cupy-cuda101>=5.0.0b4,<11.0.0
|
||||||
cuda102 =
|
cuda102 =
|
||||||
cupy-cuda102>=5.0.0b4,<10.0.0
|
cupy-cuda102>=5.0.0b4,<11.0.0
|
||||||
cuda110 =
|
cuda110 =
|
||||||
cupy-cuda110>=5.0.0b4,<10.0.0
|
cupy-cuda110>=5.0.0b4,<11.0.0
|
||||||
cuda111 =
|
cuda111 =
|
||||||
cupy-cuda111>=5.0.0b4,<10.0.0
|
cupy-cuda111>=5.0.0b4,<11.0.0
|
||||||
cuda112 =
|
cuda112 =
|
||||||
cupy-cuda112>=5.0.0b4,<10.0.0
|
cupy-cuda112>=5.0.0b4,<11.0.0
|
||||||
cuda113 =
|
cuda113 =
|
||||||
cupy-cuda113>=5.0.0b4,<10.0.0
|
cupy-cuda113>=5.0.0b4,<11.0.0
|
||||||
cuda114 =
|
cuda114 =
|
||||||
cupy-cuda114>=5.0.0b4,<10.0.0
|
cupy-cuda114>=5.0.0b4,<11.0.0
|
||||||
|
cuda115 =
|
||||||
|
cupy-cuda115>=5.0.0b4,<11.0.0
|
||||||
apple =
|
apple =
|
||||||
thinc-apple-ops>=0.0.4,<1.0.0
|
thinc-apple-ops>=0.0.4,<1.0.0
|
||||||
# Language tokenizers with external dependencies
|
# Language tokenizers with external dependencies
|
||||||
|
|
|
@ -123,7 +123,7 @@ def MultiHashEmbed(
|
||||||
attributes are NORM, PREFIX, SUFFIX and SHAPE. This lets the model take into
|
attributes are NORM, PREFIX, SUFFIX and SHAPE. This lets the model take into
|
||||||
account some subword information, without constructing a fully character-based
|
account some subword information, without constructing a fully character-based
|
||||||
representation. If pretrained vectors are available, they can be included in
|
representation. If pretrained vectors are available, they can be included in
|
||||||
the representation as well, with the vectors table will be kept static
|
the representation as well, with the vectors table kept static
|
||||||
(i.e. it's not updated).
|
(i.e. it's not updated).
|
||||||
|
|
||||||
The `width` parameter specifies the output width of the layer and the widths
|
The `width` parameter specifies the output width of the layer and the widths
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from libc.stdint cimport uint32_t
|
from libc.stdint cimport uint32_t, uint64_t
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
from libcpp.set cimport set as cppset
|
from libcpp.set cimport set as cppset
|
||||||
from murmurhash.mrmr cimport hash128_x64
|
from murmurhash.mrmr cimport hash128_x64
|
||||||
|
@ -353,12 +353,18 @@ cdef class Vectors:
|
||||||
key (str): The string key.
|
key (str): The string key.
|
||||||
RETURNS: A list of the integer hashes.
|
RETURNS: A list of the integer hashes.
|
||||||
"""
|
"""
|
||||||
cdef uint32_t[4] out
|
# MurmurHash3_x64_128 returns an array of 2 uint64_t values.
|
||||||
|
cdef uint64_t[2] out
|
||||||
chars = s.encode("utf8")
|
chars = s.encode("utf8")
|
||||||
cdef char* utf8_string = chars
|
cdef char* utf8_string = chars
|
||||||
hash128_x64(utf8_string, len(chars), self.hash_seed, &out)
|
hash128_x64(utf8_string, len(chars), self.hash_seed, &out)
|
||||||
rows = [out[i] for i in range(min(self.hash_count, 4))]
|
rows = [
|
||||||
return rows
|
out[0] & 0xffffffffu,
|
||||||
|
out[0] >> 32,
|
||||||
|
out[1] & 0xffffffffu,
|
||||||
|
out[1] >> 32,
|
||||||
|
]
|
||||||
|
return rows[:min(self.hash_count, 4)]
|
||||||
|
|
||||||
def _get_ngrams(self, unicode key):
|
def _get_ngrams(self, unicode key):
|
||||||
"""Get all padded ngram strings using the ngram settings.
|
"""Get all padded ngram strings using the ngram settings.
|
||||||
|
|
|
@ -44,7 +44,7 @@ markup is correct.
|
||||||
"id": "unique-project-id",
|
"id": "unique-project-id",
|
||||||
"title": "Project title",
|
"title": "Project title",
|
||||||
"slogan": "A short summary",
|
"slogan": "A short summary",
|
||||||
"description": "A longer description – *Mardown allowed!*",
|
"description": "A longer description – *Markdown allowed!*",
|
||||||
"github": "user/repo",
|
"github": "user/repo",
|
||||||
"pip": "package-name",
|
"pip": "package-name",
|
||||||
"code_example": [
|
"code_example": [
|
||||||
|
|
|
@ -158,7 +158,7 @@ be configured with the `attrs` argument. The suggested attributes are `NORM`,
|
||||||
`PREFIX`, `SUFFIX` and `SHAPE`. This lets the model take into account some
|
`PREFIX`, `SUFFIX` and `SHAPE`. This lets the model take into account some
|
||||||
subword information, without construction a fully character-based
|
subword information, without construction a fully character-based
|
||||||
representation. If pretrained vectors are available, they can be included in the
|
representation. If pretrained vectors are available, they can be included in the
|
||||||
representation as well, with the vectors table will be kept static (i.e. it's
|
representation as well, with the vectors table kept static (i.e. it's
|
||||||
not updated).
|
not updated).
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
|
@ -296,7 +296,7 @@ learned linear projection to control the dimensionality. Unknown tokens are
|
||||||
mapped to a zero vector. See the documentation on
|
mapped to a zero vector. See the documentation on
|
||||||
[static vectors](/usage/embeddings-transformers#static-vectors) for details.
|
[static vectors](/usage/embeddings-transformers#static-vectors) for details.
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ----------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `nO` | The output width of the layer, after the linear projection. ~~Optional[int]~~ |
|
| `nO` | The output width of the layer, after the linear projection. ~~Optional[int]~~ |
|
||||||
| `nM` | The width of the static vectors. ~~Optional[int]~~ |
|
| `nM` | The width of the static vectors. ~~Optional[int]~~ |
|
||||||
|
@ -318,7 +318,7 @@ mapped to a zero vector. See the documentation on
|
||||||
Extract arrays of input features from [`Doc`](/api/doc) objects. Expects a list
|
Extract arrays of input features from [`Doc`](/api/doc) objects. Expects a list
|
||||||
of feature names to extract, which should refer to token attributes.
|
of feature names to extract, which should refer to token attributes.
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | ------------------------------------------------------------------------ |
|
| ----------- | ------------------------------------------------------------------------ |
|
||||||
| `columns` | The token attributes to extract. ~~List[Union[int, str]]~~ |
|
| `columns` | The token attributes to extract. ~~List[Union[int, str]]~~ |
|
||||||
| **CREATES** | The created feature extraction layer. ~~Model[List[Doc], List[Ints2d]]~~ |
|
| **CREATES** | The created feature extraction layer. ~~Model[List[Doc], List[Ints2d]]~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user