mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Merge pull request #12979 from adrianeboyd/feature/cython-profile-312
Redesigned cython profiling and other minor updates for python 3.12
This commit is contained in:
		
						commit
						4ec41e98f6
					
				
							
								
								
									
										3
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							|  | @ -58,7 +58,7 @@ jobs: | ||||||
|       fail-fast: true |       fail-fast: true | ||||||
|       matrix: |       matrix: | ||||||
|         os: [ubuntu-latest, windows-latest, macos-latest] |         os: [ubuntu-latest, windows-latest, macos-latest] | ||||||
|         python_version: ["3.11"] |         python_version: ["3.11", "3.12.0-rc.2"] | ||||||
|         include: |         include: | ||||||
|           - os: windows-latest |           - os: windows-latest | ||||||
|             python_version: "3.7" |             python_version: "3.7" | ||||||
|  | @ -93,6 +93,7 @@ jobs: | ||||||
|       - name: Run mypy |       - name: Run mypy | ||||||
|         run: | |         run: | | ||||||
|           python -m mypy spacy |           python -m mypy spacy | ||||||
|  |         if: matrix.python_version != '3.7' | ||||||
| 
 | 
 | ||||||
|       - name: Delete source directory and .egg-info |       - name: Delete source directory and .egg-info | ||||||
|         run: | |         run: | | ||||||
|  |  | ||||||
|  | @ -33,7 +33,7 @@ pytest-timeout>=1.3.0,<2.0.0 | ||||||
| mock>=2.0.0,<3.0.0 | mock>=2.0.0,<3.0.0 | ||||||
| flake8>=3.8.0,<6.0.0 | flake8>=3.8.0,<6.0.0 | ||||||
| hypothesis>=3.27.0,<7.0.0 | hypothesis>=3.27.0,<7.0.0 | ||||||
| mypy>=0.990,<1.1.0; platform_machine != "aarch64" | mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8" | ||||||
| types-mock>=0.1.1 | types-mock>=0.1.1 | ||||||
| types-setuptools>=57.0.0 | types-setuptools>=57.0.0 | ||||||
| types-requests | types-requests | ||||||
|  |  | ||||||
							
								
								
									
										1
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								setup.py
									
									
									
									
									
								
							|  | @ -78,6 +78,7 @@ COMPILER_DIRECTIVES = { | ||||||
|     "language_level": -3, |     "language_level": -3, | ||||||
|     "embedsignature": True, |     "embedsignature": True, | ||||||
|     "annotation_typing": False, |     "annotation_typing": False, | ||||||
|  |     "profile": sys.version_info < (3, 12), | ||||||
| } | } | ||||||
| # Files to copy into the package that are otherwise not included | # Files to copy into the package that are otherwise not included | ||||||
| COPY_FILES = { | COPY_FILES = { | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| from .errors import Errors | from .errors import Errors | ||||||
| 
 | 
 | ||||||
| IOB_STRINGS = ("", "I", "O", "B") | IOB_STRINGS = ("", "I", "O", "B") | ||||||
|  |  | ||||||
|  | @ -133,7 +133,9 @@ def apply( | ||||||
|     if len(text_files) > 0: |     if len(text_files) > 0: | ||||||
|         streams.append(_stream_texts(text_files)) |         streams.append(_stream_texts(text_files)) | ||||||
|     datagen = cast(DocOrStrStream, chain(*streams)) |     datagen = cast(DocOrStrStream, chain(*streams)) | ||||||
|     for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)): |     for doc in tqdm.tqdm( | ||||||
|  |         nlp.pipe(datagen, batch_size=batch_size, n_process=n_process), disable=None | ||||||
|  |     ): | ||||||
|         docbin.add(doc) |         docbin.add(doc) | ||||||
|     if output_file.suffix == "": |     if output_file.suffix == "": | ||||||
|         output_file = output_file.with_suffix(".spacy") |         output_file = output_file.with_suffix(".spacy") | ||||||
|  |  | ||||||
|  | @ -89,7 +89,7 @@ class Quartiles: | ||||||
| def annotate( | def annotate( | ||||||
|     nlp: Language, docs: List[Doc], batch_size: Optional[int] |     nlp: Language, docs: List[Doc], batch_size: Optional[int] | ||||||
| ) -> numpy.ndarray: | ) -> numpy.ndarray: | ||||||
|     docs = nlp.pipe(tqdm(docs, unit="doc"), batch_size=batch_size) |     docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size) | ||||||
|     wps = [] |     wps = [] | ||||||
|     while True: |     while True: | ||||||
|         with time_context() as elapsed: |         with time_context() as elapsed: | ||||||
|  |  | ||||||
|  | @ -71,7 +71,7 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) -> | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def parse_texts(nlp: Language, texts: Sequence[str]) -> None: | def parse_texts(nlp: Language, texts: Sequence[str]) -> None: | ||||||
|     for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16): |     for doc in nlp.pipe(tqdm.tqdm(texts, disable=None), batch_size=16): | ||||||
|         pass |         pass | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True | # cython: infer_types=True | ||||||
| 
 | 
 | ||||||
| from typing import Iterable | from typing import Iterable | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True | # cython: infer_types=True | ||||||
| 
 | 
 | ||||||
| from pathlib import Path | from pathlib import Path | ||||||
| from typing import Iterable, Tuple, Union | from typing import Iterable, Tuple, Union | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True | # cython: infer_types=True | ||||||
| from typing import Any, Callable, Dict, Iterable | from typing import Any, Callable, Dict, Iterable | ||||||
| 
 | 
 | ||||||
| import srsly | import srsly | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: embedsignature=True | # cython: embedsignature=True | ||||||
|  | # cython: profile=False | ||||||
| # Compiler crashes on memory view coercion without this. Should report bug. | # Compiler crashes on memory view coercion without this. Should report bug. | ||||||
| cimport numpy as np | cimport numpy as np | ||||||
| from libc.string cimport memset | from libc.string cimport memset | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True | # cython: infer_types=True | ||||||
| import warnings | import warnings | ||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
| from itertools import product | from itertools import product | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: profile=True, binding=True, infer_types=True | # cython: binding=True, infer_types=True | ||||||
| from cpython.object cimport PyObject | from cpython.object cimport PyObject | ||||||
| from libc.stdint cimport int64_t | from libc.stdint cimport int64_t | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: binding=True, infer_types=True, profile=True | # cython: binding=True, infer_types=True | ||||||
| from typing import Iterable, List | from typing import Iterable, List | ||||||
| 
 | 
 | ||||||
| from cymem.cymem cimport Pool | from cymem.cymem cimport Pool | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True | # cython: infer_types=True | ||||||
| from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set | from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set | ||||||
| 
 | 
 | ||||||
| import warnings | import warnings | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True, cdivision=True, boundscheck=False | # cython: infer_types=True, cdivision=True, boundscheck=False | ||||||
|  | # cython: profile=False | ||||||
| cimport numpy as np | cimport numpy as np | ||||||
| from libc.math cimport exp | from libc.math cimport exp | ||||||
| from libc.stdlib cimport calloc, free, realloc | from libc.stdlib cimport calloc, free, realloc | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types | # cython: infer_types | ||||||
|  | # cython: profile=False | ||||||
| import warnings | import warnings | ||||||
| 
 | 
 | ||||||
| import numpy | import numpy | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| 
 | # cython: profile=False | ||||||
| IDS = { | IDS = { | ||||||
|     "": NO_TAG, |     "": NO_TAG, | ||||||
|     "ADJ": ADJ, |     "ADJ": ADJ, | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True, binding=True | # cython: infer_types=True, binding=True | ||||||
|  | # cython: profile=False | ||||||
| from cython.operator cimport dereference as deref | from cython.operator cimport dereference as deref | ||||||
| from libc.stdint cimport UINT32_MAX, uint32_t | from libc.stdint cimport UINT32_MAX, uint32_t | ||||||
| from libc.string cimport memset | from libc.string cimport memset | ||||||
|  |  | ||||||
|  | @ -1,5 +1,4 @@ | ||||||
| # cython: infer_types=True | # cython: infer_types=True | ||||||
| # cython: profile=True |  | ||||||
| import numpy | import numpy | ||||||
| 
 | 
 | ||||||
| from thinc.extra.search cimport Beam | from thinc.extra.search cimport Beam | ||||||
|  |  | ||||||
|  | @ -0,0 +1 @@ | ||||||
|  | # cython: profile=False | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: profile=True, cdivision=True, infer_types=True | # cython: cdivision=True, infer_types=True | ||||||
| from cymem.cymem cimport Address, Pool | from cymem.cymem cimport Address, Pool | ||||||
| from libc.stdint cimport int32_t | from libc.stdint cimport int32_t | ||||||
| from libcpp.vector cimport vector | from libcpp.vector cimport vector | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| from cymem.cymem cimport Pool | from cymem.cymem cimport Pool | ||||||
| from libc.stdint cimport int32_t | from libc.stdint cimport int32_t | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: profile=True, infer_types=True | # cython: infer_types=True | ||||||
| """Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005 | """Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005 | ||||||
| for doing pseudo-projective parsing implementation uses the HEAD decoration | for doing pseudo-projective parsing implementation uses the HEAD decoration | ||||||
| scheme. | scheme. | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True | # cython: infer_types=True | ||||||
|  | # cython: profile=False | ||||||
| from libcpp.vector cimport vector | from libcpp.vector cimport vector | ||||||
| 
 | 
 | ||||||
| from ...tokens.doc cimport Doc | from ...tokens.doc cimport Doc | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True | # cython: infer_types=True | ||||||
|  | # cython: profile=False | ||||||
| from __future__ import print_function | from __future__ import print_function | ||||||
| 
 | 
 | ||||||
| from cymem.cymem cimport Pool | from cymem.cymem cimport Pool | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
| from typing import Callable, Optional | from typing import Callable, Optional | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from itertools import islice | from itertools import islice | ||||||
| from typing import Callable, Dict, Optional, Union | from typing import Callable, Dict, Optional, Union | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from typing import Optional | from typing import Optional | ||||||
| 
 | 
 | ||||||
| import numpy | import numpy | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from collections import defaultdict | from collections import defaultdict | ||||||
| from typing import Callable, Optional | from typing import Callable, Optional | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| import warnings | import warnings | ||||||
| from typing import Callable, Dict, Iterable, Iterator, Tuple, Union | from typing import Callable, Dict, Iterable, Iterator, Tuple, Union | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from typing import Callable, List, Optional | from typing import Callable, List, Optional | ||||||
| 
 | 
 | ||||||
| import srsly | import srsly | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from itertools import islice | from itertools import islice | ||||||
| from typing import Callable, Optional | from typing import Callable, Optional | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from itertools import islice | from itertools import islice | ||||||
| from typing import Callable, Optional | from typing import Callable, Optional | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple | from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple | ||||||
| 
 | 
 | ||||||
| import srsly | import srsly | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True | # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True | ||||||
|  | # cython: profile=False | ||||||
| from __future__ import print_function | from __future__ import print_function | ||||||
| 
 | 
 | ||||||
| cimport numpy as np | cimport numpy as np | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True | # cython: infer_types=True | ||||||
|  | # cython: profile=False | ||||||
| cimport cython | cimport cython | ||||||
| from libc.stdint cimport uint32_t | from libc.stdint cimport uint32_t | ||||||
| from libc.string cimport memcpy | from libc.string cimport memcpy | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: optimize.unpack_method_calls=False | # cython: optimize.unpack_method_calls=False | ||||||
|  | # cython: profile=False | ||||||
| IDS = { | IDS = { | ||||||
|     "": NIL, |     "": NIL, | ||||||
|     "IS_ALPHA": IS_ALPHA, |     "IS_ALPHA": IS_ALPHA, | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: embedsignature=True, profile=True, binding=True | # cython: embedsignature=True, binding=True | ||||||
| cimport cython | cimport cython | ||||||
| from cymem.cymem cimport Pool | from cymem.cymem cimport Pool | ||||||
| from cython.operator cimport dereference as deref | from cython.operator cimport dereference as deref | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, bounds_check=False, profile=True | # cython: infer_types=True, bounds_check=False | ||||||
| from cymem.cymem cimport Pool | from cymem.cymem cimport Pool | ||||||
| from libc.string cimport memset | from libc.string cimport memset | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, bounds_check=False, profile=True | # cython: infer_types=True, bounds_check=False | ||||||
| from typing import Set | from typing import Set | ||||||
| 
 | 
 | ||||||
| cimport cython | cimport cython | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True | # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True | ||||||
|  | # cython: profile=False | ||||||
| from typing import Generator, List, Tuple | from typing import Generator, List, Tuple | ||||||
| 
 | 
 | ||||||
| cimport cython | cimport cython | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| cimport numpy as np | cimport numpy as np | ||||||
| from libc.string cimport memset | from libc.string cimport memset | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| cimport numpy as np | cimport numpy as np | ||||||
| 
 | 
 | ||||||
| import copy | import copy | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| import struct | import struct | ||||||
| import weakref | import weakref | ||||||
| from copy import deepcopy | from copy import deepcopy | ||||||
|  |  | ||||||
|  | @ -1,4 +1,5 @@ | ||||||
| # cython: infer_types=True | # cython: infer_types=True | ||||||
|  | # cython: profile=False | ||||||
| # Compiler crashes on memory view coercion without this. Should report bug. | # Compiler crashes on memory view coercion without this. Should report bug. | ||||||
| cimport numpy as np | cimport numpy as np | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| import re | import re | ||||||
| from itertools import chain | from itertools import chain | ||||||
| from typing import List, Tuple | from typing import List, Tuple | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| from typing import List | from typing import List | ||||||
| 
 | 
 | ||||||
| import numpy | import numpy | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| from collections.abc import Iterable as IterableInstance | from collections.abc import Iterable as IterableInstance | ||||||
| 
 | 
 | ||||||
| import numpy | import numpy | ||||||
|  |  | ||||||
|  | @ -1,3 +1,4 @@ | ||||||
|  | # cython: profile=False | ||||||
| import warnings | import warnings | ||||||
| 
 | 
 | ||||||
| import srsly | import srsly | ||||||
|  |  | ||||||
|  | @ -302,7 +302,7 @@ def read_vectors( | ||||||
|             shape = (truncate_vectors, shape[1]) |             shape = (truncate_vectors, shape[1]) | ||||||
|     vectors_data = numpy.zeros(shape=shape, dtype="f") |     vectors_data = numpy.zeros(shape=shape, dtype="f") | ||||||
|     vectors_keys = [] |     vectors_keys = [] | ||||||
|     for i, line in enumerate(tqdm.tqdm(f)): |     for i, line in enumerate(tqdm.tqdm(f, disable=None)): | ||||||
|         line = line.rstrip() |         line = line.rstrip() | ||||||
|         pieces = line.rsplit(" ", vectors_data.shape[1]) |         pieces = line.rsplit(" ", vectors_data.shape[1]) | ||||||
|         word = pieces.pop(0) |         word = pieces.pop(0) | ||||||
|  |  | ||||||
|  | @ -0,0 +1 @@ | ||||||
|  | # cython: profile=False | ||||||
|  | @ -1068,6 +1068,9 @@ def make_tempdir() -> Generator[Path, None, None]: | ||||||
|         rmfunc(path) |         rmfunc(path) | ||||||
| 
 | 
 | ||||||
|     try: |     try: | ||||||
|  |         if sys.version_info >= (3, 12): | ||||||
|  |             shutil.rmtree(str(d), onexc=force_remove) | ||||||
|  |         else: | ||||||
|             shutil.rmtree(str(d), onerror=force_remove) |             shutil.rmtree(str(d), onerror=force_remove) | ||||||
|     except PermissionError as e: |     except PermissionError as e: | ||||||
|         warnings.warn(Warnings.W091.format(dir=d, msg=e)) |         warnings.warn(Warnings.W091.format(dir=d, msg=e)) | ||||||
|  |  | ||||||
|  | @ -1,4 +1,4 @@ | ||||||
| # cython: infer_types=True, profile=True, binding=True | # cython: infer_types=True, binding=True | ||||||
| from typing import Callable | from typing import Callable | ||||||
| 
 | 
 | ||||||
| from cython.operator cimport dereference as deref | from cython.operator cimport dereference as deref | ||||||
|  |  | ||||||
|  | @ -1,4 +1,3 @@ | ||||||
| # cython: profile=True |  | ||||||
| import functools | import functools | ||||||
| 
 | 
 | ||||||
| import numpy | import numpy | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user