mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-24 23:50:45 +03:00
Merge pull request #12979 from adrianeboyd/feature/cython-profile-312
Redesigned cython profiling and other minor updates for python 3.12
This commit is contained in:
commit
4ec41e98f6
3
.github/workflows/tests.yml
vendored
3
.github/workflows/tests.yml
vendored
|
@ -58,7 +58,7 @@ jobs:
|
||||||
fail-fast: true
|
fail-fast: true
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||||
python_version: ["3.11"]
|
python_version: ["3.11", "3.12.0-rc.2"]
|
||||||
include:
|
include:
|
||||||
- os: windows-latest
|
- os: windows-latest
|
||||||
python_version: "3.7"
|
python_version: "3.7"
|
||||||
|
@ -93,6 +93,7 @@ jobs:
|
||||||
- name: Run mypy
|
- name: Run mypy
|
||||||
run: |
|
run: |
|
||||||
python -m mypy spacy
|
python -m mypy spacy
|
||||||
|
if: matrix.python_version != '3.7'
|
||||||
|
|
||||||
- name: Delete source directory and .egg-info
|
- name: Delete source directory and .egg-info
|
||||||
run: |
|
run: |
|
||||||
|
|
|
@ -33,7 +33,7 @@ pytest-timeout>=1.3.0,<2.0.0
|
||||||
mock>=2.0.0,<3.0.0
|
mock>=2.0.0,<3.0.0
|
||||||
flake8>=3.8.0,<6.0.0
|
flake8>=3.8.0,<6.0.0
|
||||||
hypothesis>=3.27.0,<7.0.0
|
hypothesis>=3.27.0,<7.0.0
|
||||||
mypy>=0.990,<1.1.0; platform_machine != "aarch64"
|
mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
|
||||||
types-mock>=0.1.1
|
types-mock>=0.1.1
|
||||||
types-setuptools>=57.0.0
|
types-setuptools>=57.0.0
|
||||||
types-requests
|
types-requests
|
||||||
|
|
1
setup.py
1
setup.py
|
@ -78,6 +78,7 @@ COMPILER_DIRECTIVES = {
|
||||||
"language_level": -3,
|
"language_level": -3,
|
||||||
"embedsignature": True,
|
"embedsignature": True,
|
||||||
"annotation_typing": False,
|
"annotation_typing": False,
|
||||||
|
"profile": sys.version_info < (3, 12),
|
||||||
}
|
}
|
||||||
# Files to copy into the package that are otherwise not included
|
# Files to copy into the package that are otherwise not included
|
||||||
COPY_FILES = {
|
COPY_FILES = {
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
from .errors import Errors
|
from .errors import Errors
|
||||||
|
|
||||||
IOB_STRINGS = ("", "I", "O", "B")
|
IOB_STRINGS = ("", "I", "O", "B")
|
||||||
|
|
|
@ -133,7 +133,9 @@ def apply(
|
||||||
if len(text_files) > 0:
|
if len(text_files) > 0:
|
||||||
streams.append(_stream_texts(text_files))
|
streams.append(_stream_texts(text_files))
|
||||||
datagen = cast(DocOrStrStream, chain(*streams))
|
datagen = cast(DocOrStrStream, chain(*streams))
|
||||||
for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)):
|
for doc in tqdm.tqdm(
|
||||||
|
nlp.pipe(datagen, batch_size=batch_size, n_process=n_process), disable=None
|
||||||
|
):
|
||||||
docbin.add(doc)
|
docbin.add(doc)
|
||||||
if output_file.suffix == "":
|
if output_file.suffix == "":
|
||||||
output_file = output_file.with_suffix(".spacy")
|
output_file = output_file.with_suffix(".spacy")
|
||||||
|
|
|
@ -89,7 +89,7 @@ class Quartiles:
|
||||||
def annotate(
|
def annotate(
|
||||||
nlp: Language, docs: List[Doc], batch_size: Optional[int]
|
nlp: Language, docs: List[Doc], batch_size: Optional[int]
|
||||||
) -> numpy.ndarray:
|
) -> numpy.ndarray:
|
||||||
docs = nlp.pipe(tqdm(docs, unit="doc"), batch_size=batch_size)
|
docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size)
|
||||||
wps = []
|
wps = []
|
||||||
while True:
|
while True:
|
||||||
with time_context() as elapsed:
|
with time_context() as elapsed:
|
||||||
|
|
|
@ -71,7 +71,7 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) ->
|
||||||
|
|
||||||
|
|
||||||
def parse_texts(nlp: Language, texts: Sequence[str]) -> None:
|
def parse_texts(nlp: Language, texts: Sequence[str]) -> None:
|
||||||
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16):
|
for doc in nlp.pipe(tqdm.tqdm(texts, disable=None), batch_size=16):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True
|
# cython: infer_types=True
|
||||||
|
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True
|
# cython: infer_types=True
|
||||||
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, Tuple, Union
|
from typing import Iterable, Tuple, Union
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True
|
# cython: infer_types=True
|
||||||
from typing import Any, Callable, Dict, Iterable
|
from typing import Any, Callable, Dict, Iterable
|
||||||
|
|
||||||
import srsly
|
import srsly
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: embedsignature=True
|
# cython: embedsignature=True
|
||||||
|
# cython: profile=False
|
||||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True
|
# cython: infer_types=True
|
||||||
import warnings
|
import warnings
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from itertools import product
|
from itertools import product
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: profile=True, binding=True, infer_types=True
|
# cython: binding=True, infer_types=True
|
||||||
from cpython.object cimport PyObject
|
from cpython.object cimport PyObject
|
||||||
from libc.stdint cimport int64_t
|
from libc.stdint cimport int64_t
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: binding=True, infer_types=True, profile=True
|
# cython: binding=True, infer_types=True
|
||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True
|
# cython: infer_types=True
|
||||||
from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set
|
from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set
|
||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True, cdivision=True, boundscheck=False
|
# cython: infer_types=True, cdivision=True, boundscheck=False
|
||||||
|
# cython: profile=False
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from libc.math cimport exp
|
from libc.math cimport exp
|
||||||
from libc.stdlib cimport calloc, free, realloc
|
from libc.stdlib cimport calloc, free, realloc
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types
|
# cython: infer_types
|
||||||
|
# cython: profile=False
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
IDS = {
|
IDS = {
|
||||||
"": NO_TAG,
|
"": NO_TAG,
|
||||||
"ADJ": ADJ,
|
"ADJ": ADJ,
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
|
# cython: profile=False
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
from libc.stdint cimport UINT32_MAX, uint32_t
|
from libc.stdint cimport UINT32_MAX, uint32_t
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
# cython: profile=True
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
from thinc.extra.search cimport Beam
|
from thinc.extra.search cimport Beam
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
# cython: profile=False
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: profile=True, cdivision=True, infer_types=True
|
# cython: cdivision=True, infer_types=True
|
||||||
from cymem.cymem cimport Address, Pool
|
from cymem.cymem cimport Address, Pool
|
||||||
from libc.stdint cimport int32_t
|
from libc.stdint cimport int32_t
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from libc.stdint cimport int32_t
|
from libc.stdint cimport int32_t
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: profile=True, infer_types=True
|
# cython: infer_types=True
|
||||||
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
|
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
|
||||||
for doing pseudo-projective parsing implementation uses the HEAD decoration
|
for doing pseudo-projective parsing implementation uses the HEAD decoration
|
||||||
scheme.
|
scheme.
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
# cython: profile=False
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
|
||||||
from ...tokens.doc cimport Doc
|
from ...tokens.doc cimport Doc
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
# cython: profile=False
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from typing import Callable, Dict, Optional, Union
|
from typing import Callable, Dict, Optional, Union
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
import warnings
|
import warnings
|
||||||
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union
|
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from typing import Callable, List, Optional
|
from typing import Callable, List, Optional
|
||||||
|
|
||||||
import srsly
|
import srsly
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from typing import Callable, Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
|
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
|
||||||
|
|
||||||
import srsly
|
import srsly
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
|
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
|
||||||
|
# cython: profile=False
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
|
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
# cython: profile=False
|
||||||
cimport cython
|
cimport cython
|
||||||
from libc.stdint cimport uint32_t
|
from libc.stdint cimport uint32_t
|
||||||
from libc.string cimport memcpy
|
from libc.string cimport memcpy
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: optimize.unpack_method_calls=False
|
# cython: optimize.unpack_method_calls=False
|
||||||
|
# cython: profile=False
|
||||||
IDS = {
|
IDS = {
|
||||||
"": NIL,
|
"": NIL,
|
||||||
"IS_ALPHA": IS_ALPHA,
|
"IS_ALPHA": IS_ALPHA,
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: embedsignature=True, profile=True, binding=True
|
# cython: embedsignature=True, binding=True
|
||||||
cimport cython
|
cimport cython
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, bounds_check=False, profile=True
|
# cython: infer_types=True, bounds_check=False
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, bounds_check=False, profile=True
|
# cython: infer_types=True, bounds_check=False
|
||||||
from typing import Set
|
from typing import Set
|
||||||
|
|
||||||
cimport cython
|
cimport cython
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
|
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
|
||||||
|
# cython: profile=False
|
||||||
from typing import Generator, List, Tuple
|
from typing import Generator, List, Tuple
|
||||||
|
|
||||||
cimport cython
|
cimport cython
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
import struct
|
import struct
|
||||||
import weakref
|
import weakref
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
# cython: profile=False
|
||||||
# Compiler crashes on memory view coercion without this. Should report bug.
|
# Compiler crashes on memory view coercion without this. Should report bug.
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
import re
|
import re
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
from collections.abc import Iterable as IterableInstance
|
from collections.abc import Iterable as IterableInstance
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=False
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
import srsly
|
import srsly
|
||||||
|
|
|
@ -302,7 +302,7 @@ def read_vectors(
|
||||||
shape = (truncate_vectors, shape[1])
|
shape = (truncate_vectors, shape[1])
|
||||||
vectors_data = numpy.zeros(shape=shape, dtype="f")
|
vectors_data = numpy.zeros(shape=shape, dtype="f")
|
||||||
vectors_keys = []
|
vectors_keys = []
|
||||||
for i, line in enumerate(tqdm.tqdm(f)):
|
for i, line in enumerate(tqdm.tqdm(f, disable=None)):
|
||||||
line = line.rstrip()
|
line = line.rstrip()
|
||||||
pieces = line.rsplit(" ", vectors_data.shape[1])
|
pieces = line.rsplit(" ", vectors_data.shape[1])
|
||||||
word = pieces.pop(0)
|
word = pieces.pop(0)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
# cython: profile=False
|
|
@ -1068,7 +1068,10 @@ def make_tempdir() -> Generator[Path, None, None]:
|
||||||
rmfunc(path)
|
rmfunc(path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
shutil.rmtree(str(d), onerror=force_remove)
|
if sys.version_info >= (3, 12):
|
||||||
|
shutil.rmtree(str(d), onexc=force_remove)
|
||||||
|
else:
|
||||||
|
shutil.rmtree(str(d), onerror=force_remove)
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
warnings.warn(Warnings.W091.format(dir=d, msg=e))
|
warnings.warn(Warnings.W091.format(dir=d, msg=e))
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# cython: infer_types=True, profile=True, binding=True
|
# cython: infer_types=True, binding=True
|
||||||
from typing import Callable
|
from typing import Callable
|
||||||
|
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
# cython: profile=True
|
|
||||||
import functools
|
import functools
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
Loading…
Reference in New Issue
Block a user