mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Merge pull request #12979 from adrianeboyd/feature/cython-profile-312
Redesigned cython profiling and other minor updates for python 3.12
This commit is contained in:
		
						commit
						4ec41e98f6
					
				
							
								
								
									
										3
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/workflows/tests.yml
									
									
									
									
										vendored
									
									
								
							|  | @ -58,7 +58,7 @@ jobs: | |||
|       fail-fast: true | ||||
|       matrix: | ||||
|         os: [ubuntu-latest, windows-latest, macos-latest] | ||||
|         python_version: ["3.11"] | ||||
|         python_version: ["3.11", "3.12.0-rc.2"] | ||||
|         include: | ||||
|           - os: windows-latest | ||||
|             python_version: "3.7" | ||||
|  | @ -93,6 +93,7 @@ jobs: | |||
|       - name: Run mypy | ||||
|         run: | | ||||
|           python -m mypy spacy | ||||
|         if: matrix.python_version != '3.7' | ||||
| 
 | ||||
|       - name: Delete source directory and .egg-info | ||||
|         run: | | ||||
|  |  | |||
|  | @ -33,7 +33,7 @@ pytest-timeout>=1.3.0,<2.0.0 | |||
| mock>=2.0.0,<3.0.0 | ||||
| flake8>=3.8.0,<6.0.0 | ||||
| hypothesis>=3.27.0,<7.0.0 | ||||
| mypy>=0.990,<1.1.0; platform_machine != "aarch64" | ||||
| mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8" | ||||
| types-mock>=0.1.1 | ||||
| types-setuptools>=57.0.0 | ||||
| types-requests | ||||
|  |  | |||
							
								
								
									
										1
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								setup.py
									
									
									
									
									
								
							|  | @ -78,6 +78,7 @@ COMPILER_DIRECTIVES = { | |||
|     "language_level": -3, | ||||
|     "embedsignature": True, | ||||
|     "annotation_typing": False, | ||||
|     "profile": sys.version_info < (3, 12), | ||||
| } | ||||
| # Files to copy into the package that are otherwise not included | ||||
| COPY_FILES = { | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| from .errors import Errors | ||||
| 
 | ||||
| IOB_STRINGS = ("", "I", "O", "B") | ||||
|  |  | |||
|  | @ -133,7 +133,9 @@ def apply( | |||
|     if len(text_files) > 0: | ||||
|         streams.append(_stream_texts(text_files)) | ||||
|     datagen = cast(DocOrStrStream, chain(*streams)) | ||||
|     for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)): | ||||
|     for doc in tqdm.tqdm( | ||||
|         nlp.pipe(datagen, batch_size=batch_size, n_process=n_process), disable=None | ||||
|     ): | ||||
|         docbin.add(doc) | ||||
|     if output_file.suffix == "": | ||||
|         output_file = output_file.with_suffix(".spacy") | ||||
|  |  | |||
|  | @ -89,7 +89,7 @@ class Quartiles: | |||
| def annotate( | ||||
|     nlp: Language, docs: List[Doc], batch_size: Optional[int] | ||||
| ) -> numpy.ndarray: | ||||
|     docs = nlp.pipe(tqdm(docs, unit="doc"), batch_size=batch_size) | ||||
|     docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size) | ||||
|     wps = [] | ||||
|     while True: | ||||
|         with time_context() as elapsed: | ||||
|  |  | |||
|  | @ -71,7 +71,7 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) -> | |||
| 
 | ||||
| 
 | ||||
| def parse_texts(nlp: Language, texts: Sequence[str]) -> None: | ||||
|     for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16): | ||||
|     for doc in nlp.pipe(tqdm.tqdm(texts, disable=None), batch_size=16): | ||||
|         pass | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True | ||||
| # cython: infer_types=True | ||||
| 
 | ||||
| from typing import Iterable | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True | ||||
| # cython: infer_types=True | ||||
| 
 | ||||
| from pathlib import Path | ||||
| from typing import Iterable, Tuple, Union | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True | ||||
| # cython: infer_types=True | ||||
| from typing import Any, Callable, Dict, Iterable | ||||
| 
 | ||||
| import srsly | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: embedsignature=True | ||||
| # cython: profile=False | ||||
| # Compiler crashes on memory view coercion without this. Should report bug. | ||||
| cimport numpy as np | ||||
| from libc.string cimport memset | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True | ||||
| # cython: infer_types=True | ||||
| import warnings | ||||
| from collections import defaultdict | ||||
| from itertools import product | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: profile=True, binding=True, infer_types=True | ||||
| # cython: binding=True, infer_types=True | ||||
| from cpython.object cimport PyObject | ||||
| from libc.stdint cimport int64_t | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: binding=True, infer_types=True, profile=True | ||||
| # cython: binding=True, infer_types=True | ||||
| from typing import Iterable, List | ||||
| 
 | ||||
| from cymem.cymem cimport Pool | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True | ||||
| # cython: infer_types=True | ||||
| from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set | ||||
| 
 | ||||
| import warnings | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True, cdivision=True, boundscheck=False | ||||
| # cython: profile=False | ||||
| cimport numpy as np | ||||
| from libc.math cimport exp | ||||
| from libc.stdlib cimport calloc, free, realloc | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types | ||||
| # cython: profile=False | ||||
| import warnings | ||||
| 
 | ||||
| import numpy | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| 
 | ||||
| # cython: profile=False | ||||
| IDS = { | ||||
|     "": NO_TAG, | ||||
|     "ADJ": ADJ, | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True, binding=True | ||||
| # cython: profile=False | ||||
| from cython.operator cimport dereference as deref | ||||
| from libc.stdint cimport UINT32_MAX, uint32_t | ||||
| from libc.string cimport memset | ||||
|  |  | |||
|  | @ -1,5 +1,4 @@ | |||
| # cython: infer_types=True | ||||
| # cython: profile=True | ||||
| import numpy | ||||
| 
 | ||||
| from thinc.extra.search cimport Beam | ||||
|  |  | |||
|  | @ -0,0 +1 @@ | |||
| # cython: profile=False | ||||
|  | @ -1,4 +1,4 @@ | |||
| # cython: profile=True, cdivision=True, infer_types=True | ||||
| # cython: cdivision=True, infer_types=True | ||||
| from cymem.cymem cimport Address, Pool | ||||
| from libc.stdint cimport int32_t | ||||
| from libcpp.vector cimport vector | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| from cymem.cymem cimport Pool | ||||
| from libc.stdint cimport int32_t | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: profile=True, infer_types=True | ||||
| # cython: infer_types=True | ||||
| """Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005 | ||||
| for doing pseudo-projective parsing implementation uses the HEAD decoration | ||||
| scheme. | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True | ||||
| # cython: profile=False | ||||
| from libcpp.vector cimport vector | ||||
| 
 | ||||
| from ...tokens.doc cimport Doc | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True | ||||
| # cython: profile=False | ||||
| from __future__ import print_function | ||||
| 
 | ||||
| from cymem.cymem cimport Pool | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from collections import defaultdict | ||||
| from typing import Callable, Optional | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from itertools import islice | ||||
| from typing import Callable, Dict, Optional, Union | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from typing import Optional | ||||
| 
 | ||||
| import numpy | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from collections import defaultdict | ||||
| from typing import Callable, Optional | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| import warnings | ||||
| from typing import Callable, Dict, Iterable, Iterator, Tuple, Union | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from typing import Callable, List, Optional | ||||
| 
 | ||||
| import srsly | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from itertools import islice | ||||
| from typing import Callable, Optional | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from itertools import islice | ||||
| from typing import Callable, Optional | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple | ||||
| 
 | ||||
| import srsly | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True | ||||
| # cython: profile=False | ||||
| from __future__ import print_function | ||||
| 
 | ||||
| cimport numpy as np | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True | ||||
| # cython: profile=False | ||||
| cimport cython | ||||
| from libc.stdint cimport uint32_t | ||||
| from libc.string cimport memcpy | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: optimize.unpack_method_calls=False | ||||
| # cython: profile=False | ||||
| IDS = { | ||||
|     "": NIL, | ||||
|     "IS_ALPHA": IS_ALPHA, | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: embedsignature=True, profile=True, binding=True | ||||
| # cython: embedsignature=True, binding=True | ||||
| cimport cython | ||||
| from cymem.cymem cimport Pool | ||||
| from cython.operator cimport dereference as deref | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, bounds_check=False, profile=True | ||||
| # cython: infer_types=True, bounds_check=False | ||||
| from cymem.cymem cimport Pool | ||||
| from libc.string cimport memset | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, bounds_check=False, profile=True | ||||
| # cython: infer_types=True, bounds_check=False | ||||
| from typing import Set | ||||
| 
 | ||||
| cimport cython | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True | ||||
| # cython: profile=False | ||||
| from typing import Generator, List, Tuple | ||||
| 
 | ||||
| cimport cython | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| cimport numpy as np | ||||
| from libc.string cimport memset | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| cimport numpy as np | ||||
| 
 | ||||
| import copy | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| import struct | ||||
| import weakref | ||||
| from copy import deepcopy | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: infer_types=True | ||||
| # cython: profile=False | ||||
| # Compiler crashes on memory view coercion without this. Should report bug. | ||||
| cimport numpy as np | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| import re | ||||
| from itertools import chain | ||||
| from typing import List, Tuple | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| from typing import List | ||||
| 
 | ||||
| import numpy | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| from collections.abc import Iterable as IterableInstance | ||||
| 
 | ||||
| import numpy | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| # cython: profile=False | ||||
| import warnings | ||||
| 
 | ||||
| import srsly | ||||
|  |  | |||
|  | @ -302,7 +302,7 @@ def read_vectors( | |||
|             shape = (truncate_vectors, shape[1]) | ||||
|     vectors_data = numpy.zeros(shape=shape, dtype="f") | ||||
|     vectors_keys = [] | ||||
|     for i, line in enumerate(tqdm.tqdm(f)): | ||||
|     for i, line in enumerate(tqdm.tqdm(f, disable=None)): | ||||
|         line = line.rstrip() | ||||
|         pieces = line.rsplit(" ", vectors_data.shape[1]) | ||||
|         word = pieces.pop(0) | ||||
|  |  | |||
|  | @ -0,0 +1 @@ | |||
| # cython: profile=False | ||||
|  | @ -1068,7 +1068,10 @@ def make_tempdir() -> Generator[Path, None, None]: | |||
|         rmfunc(path) | ||||
| 
 | ||||
|     try: | ||||
|         shutil.rmtree(str(d), onerror=force_remove) | ||||
|         if sys.version_info >= (3, 12): | ||||
|             shutil.rmtree(str(d), onexc=force_remove) | ||||
|         else: | ||||
|             shutil.rmtree(str(d), onerror=force_remove) | ||||
|     except PermissionError as e: | ||||
|         warnings.warn(Warnings.W091.format(dir=d, msg=e)) | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,4 +1,4 @@ | |||
| # cython: infer_types=True, profile=True, binding=True | ||||
| # cython: infer_types=True, binding=True | ||||
| from typing import Callable | ||||
| 
 | ||||
| from cython.operator cimport dereference as deref | ||||
|  |  | |||
|  | @ -1,4 +1,3 @@ | |||
| # cython: profile=True | ||||
| import functools | ||||
| 
 | ||||
| import numpy | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user