Merge pull request #12979 from adrianeboyd/feature/cython-profile-312

Redesigned cython profiling and other minor updates for python 3.12
This commit is contained in:
Adriane Boyd 2023-09-29 08:23:38 +02:00 committed by GitHub
commit 4ec41e98f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
55 changed files with 59 additions and 32 deletions

View File

@ -58,7 +58,7 @@ jobs:
fail-fast: true fail-fast: true
matrix: matrix:
os: [ubuntu-latest, windows-latest, macos-latest] os: [ubuntu-latest, windows-latest, macos-latest]
python_version: ["3.11"] python_version: ["3.11", "3.12.0-rc.2"]
include: include:
- os: windows-latest - os: windows-latest
python_version: "3.7" python_version: "3.7"
@ -93,6 +93,7 @@ jobs:
- name: Run mypy - name: Run mypy
run: | run: |
python -m mypy spacy python -m mypy spacy
if: matrix.python_version != '3.7'
- name: Delete source directory and .egg-info - name: Delete source directory and .egg-info
run: | run: |

View File

@ -33,7 +33,7 @@ pytest-timeout>=1.3.0,<2.0.0
mock>=2.0.0,<3.0.0 mock>=2.0.0,<3.0.0
flake8>=3.8.0,<6.0.0 flake8>=3.8.0,<6.0.0
hypothesis>=3.27.0,<7.0.0 hypothesis>=3.27.0,<7.0.0
mypy>=0.990,<1.1.0; platform_machine != "aarch64" mypy>=1.5.0,<1.6.0; platform_machine != "aarch64" and python_version >= "3.8"
types-mock>=0.1.1 types-mock>=0.1.1
types-setuptools>=57.0.0 types-setuptools>=57.0.0
types-requests types-requests

View File

@ -78,6 +78,7 @@ COMPILER_DIRECTIVES = {
"language_level": -3, "language_level": -3,
"embedsignature": True, "embedsignature": True,
"annotation_typing": False, "annotation_typing": False,
"profile": sys.version_info < (3, 12),
} }
# Files to copy into the package that are otherwise not included # Files to copy into the package that are otherwise not included
COPY_FILES = { COPY_FILES = {

View File

@ -1,3 +1,4 @@
# cython: profile=False
from .errors import Errors from .errors import Errors
IOB_STRINGS = ("", "I", "O", "B") IOB_STRINGS = ("", "I", "O", "B")

View File

@ -133,7 +133,9 @@ def apply(
if len(text_files) > 0: if len(text_files) > 0:
streams.append(_stream_texts(text_files)) streams.append(_stream_texts(text_files))
datagen = cast(DocOrStrStream, chain(*streams)) datagen = cast(DocOrStrStream, chain(*streams))
for doc in tqdm.tqdm(nlp.pipe(datagen, batch_size=batch_size, n_process=n_process)): for doc in tqdm.tqdm(
nlp.pipe(datagen, batch_size=batch_size, n_process=n_process), disable=None
):
docbin.add(doc) docbin.add(doc)
if output_file.suffix == "": if output_file.suffix == "":
output_file = output_file.with_suffix(".spacy") output_file = output_file.with_suffix(".spacy")

View File

@ -89,7 +89,7 @@ class Quartiles:
def annotate( def annotate(
nlp: Language, docs: List[Doc], batch_size: Optional[int] nlp: Language, docs: List[Doc], batch_size: Optional[int]
) -> numpy.ndarray: ) -> numpy.ndarray:
docs = nlp.pipe(tqdm(docs, unit="doc"), batch_size=batch_size) docs = nlp.pipe(tqdm(docs, unit="doc", disable=None), batch_size=batch_size)
wps = [] wps = []
while True: while True:
with time_context() as elapsed: with time_context() as elapsed:

View File

@ -71,7 +71,7 @@ def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) ->
def parse_texts(nlp: Language, texts: Sequence[str]) -> None: def parse_texts(nlp: Language, texts: Sequence[str]) -> None:
for doc in nlp.pipe(tqdm.tqdm(texts), batch_size=16): for doc in nlp.pipe(tqdm.tqdm(texts, disable=None), batch_size=16):
pass pass

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True # cython: infer_types=True
from typing import Iterable from typing import Iterable

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True # cython: infer_types=True
from pathlib import Path from pathlib import Path
from typing import Iterable, Tuple, Union from typing import Iterable, Tuple, Union

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True # cython: infer_types=True
from typing import Any, Callable, Dict, Iterable from typing import Any, Callable, Dict, Iterable
import srsly import srsly

View File

@ -1,4 +1,5 @@
# cython: embedsignature=True # cython: embedsignature=True
# cython: profile=False
# Compiler crashes on memory view coercion without this. Should report bug. # Compiler crashes on memory view coercion without this. Should report bug.
cimport numpy as np cimport numpy as np
from libc.string cimport memset from libc.string cimport memset

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True # cython: infer_types=True
import warnings import warnings
from collections import defaultdict from collections import defaultdict
from itertools import product from itertools import product

View File

@ -1,4 +1,4 @@
# cython: profile=True, binding=True, infer_types=True # cython: binding=True, infer_types=True
from cpython.object cimport PyObject from cpython.object cimport PyObject
from libc.stdint cimport int64_t from libc.stdint cimport int64_t

View File

@ -1,4 +1,4 @@
# cython: binding=True, infer_types=True, profile=True # cython: binding=True, infer_types=True
from typing import Iterable, List from typing import Iterable, List
from cymem.cymem cimport Pool from cymem.cymem cimport Pool

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True # cython: infer_types=True
from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set from preshed.maps cimport map_clear, map_get, map_init, map_iter, map_set
import warnings import warnings

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, cdivision=True, boundscheck=False # cython: infer_types=True, cdivision=True, boundscheck=False
# cython: profile=False
cimport numpy as np cimport numpy as np
from libc.math cimport exp from libc.math cimport exp
from libc.stdlib cimport calloc, free, realloc from libc.stdlib cimport calloc, free, realloc

View File

@ -1,4 +1,5 @@
# cython: infer_types # cython: infer_types
# cython: profile=False
import warnings import warnings
import numpy import numpy

View File

@ -1,4 +1,4 @@
# cython: profile=False
IDS = { IDS = {
"": NO_TAG, "": NO_TAG,
"ADJ": ADJ, "ADJ": ADJ,

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, binding=True # cython: infer_types=True, binding=True
# cython: profile=False
from cython.operator cimport dereference as deref from cython.operator cimport dereference as deref
from libc.stdint cimport UINT32_MAX, uint32_t from libc.stdint cimport UINT32_MAX, uint32_t
from libc.string cimport memset from libc.string cimport memset

View File

@ -1,5 +1,4 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=True
import numpy import numpy
from thinc.extra.search cimport Beam from thinc.extra.search cimport Beam

View File

@ -0,0 +1 @@
# cython: profile=False

View File

@ -1,4 +1,4 @@
# cython: profile=True, cdivision=True, infer_types=True # cython: cdivision=True, infer_types=True
from cymem.cymem cimport Address, Pool from cymem.cymem cimport Address, Pool
from libc.stdint cimport int32_t from libc.stdint cimport int32_t
from libcpp.vector cimport vector from libcpp.vector cimport vector

View File

@ -1,3 +1,4 @@
# cython: profile=False
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from libc.stdint cimport int32_t from libc.stdint cimport int32_t

View File

@ -1,4 +1,4 @@
# cython: profile=True, infer_types=True # cython: infer_types=True
"""Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005 """Implements the projectivize/deprojectivize mechanism in Nivre & Nilsson 2005
for doing pseudo-projective parsing implementation uses the HEAD decoration for doing pseudo-projective parsing implementation uses the HEAD decoration
scheme. scheme.

View File

@ -1,4 +1,5 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=False
from libcpp.vector cimport vector from libcpp.vector cimport vector
from ...tokens.doc cimport Doc from ...tokens.doc cimport Doc

View File

@ -1,4 +1,5 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=False
from __future__ import print_function from __future__ import print_function
from cymem.cymem cimport Pool from cymem.cymem cimport Pool

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from collections import defaultdict from collections import defaultdict
from typing import Callable, Optional from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from itertools import islice from itertools import islice
from typing import Callable, Dict, Optional, Union from typing import Callable, Dict, Optional, Union

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from typing import Optional from typing import Optional
import numpy import numpy

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from collections import defaultdict from collections import defaultdict
from typing import Callable, Optional from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
import warnings import warnings
from typing import Callable, Dict, Iterable, Iterator, Tuple, Union from typing import Callable, Dict, Iterable, Iterator, Tuple, Union

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from typing import Callable, List, Optional from typing import Callable, List, Optional
import srsly import srsly

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from itertools import islice from itertools import islice
from typing import Callable, Optional from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from itertools import islice from itertools import islice
from typing import Callable, Optional from typing import Callable, Optional

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple from typing import Callable, Dict, Iterable, Iterator, Optional, Tuple
import srsly import srsly

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
# cython: profile=False
from __future__ import print_function from __future__ import print_function
cimport numpy as np cimport numpy as np

View File

@ -1,4 +1,5 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=False
cimport cython cimport cython
from libc.stdint cimport uint32_t from libc.stdint cimport uint32_t
from libc.string cimport memcpy from libc.string cimport memcpy

View File

@ -1,4 +1,5 @@
# cython: optimize.unpack_method_calls=False # cython: optimize.unpack_method_calls=False
# cython: profile=False
IDS = { IDS = {
"": NIL, "": NIL,
"IS_ALPHA": IS_ALPHA, "IS_ALPHA": IS_ALPHA,

View File

@ -1,4 +1,4 @@
# cython: embedsignature=True, profile=True, binding=True # cython: embedsignature=True, binding=True
cimport cython cimport cython
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from cython.operator cimport dereference as deref from cython.operator cimport dereference as deref

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, bounds_check=False, profile=True # cython: infer_types=True, bounds_check=False
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from libc.string cimport memset from libc.string cimport memset

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, bounds_check=False, profile=True # cython: infer_types=True, bounds_check=False
from typing import Set from typing import Set
cimport cython cimport cython

View File

@ -1,4 +1,5 @@
# cython: infer_types=True, cdivision=True, boundscheck=False, binding=True # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
# cython: profile=False
from typing import Generator, List, Tuple from typing import Generator, List, Tuple
cimport cython cimport cython

View File

@ -1,3 +1,4 @@
# cython: profile=False
cimport numpy as np cimport numpy as np
from libc.string cimport memset from libc.string cimport memset

View File

@ -1,3 +1,4 @@
# cython: profile=False
cimport numpy as np cimport numpy as np
import copy import copy

View File

@ -1,3 +1,4 @@
# cython: profile=False
import struct import struct
import weakref import weakref
from copy import deepcopy from copy import deepcopy

View File

@ -1,4 +1,5 @@
# cython: infer_types=True # cython: infer_types=True
# cython: profile=False
# Compiler crashes on memory view coercion without this. Should report bug. # Compiler crashes on memory view coercion without this. Should report bug.
cimport numpy as np cimport numpy as np

View File

@ -1,3 +1,4 @@
# cython: profile=False
import re import re
from itertools import chain from itertools import chain
from typing import List, Tuple from typing import List, Tuple

View File

@ -1,3 +1,4 @@
# cython: profile=False
from typing import List from typing import List
import numpy import numpy

View File

@ -1,3 +1,4 @@
# cython: profile=False
from collections.abc import Iterable as IterableInstance from collections.abc import Iterable as IterableInstance
import numpy import numpy

View File

@ -1,3 +1,4 @@
# cython: profile=False
import warnings import warnings
import srsly import srsly

View File

@ -302,7 +302,7 @@ def read_vectors(
shape = (truncate_vectors, shape[1]) shape = (truncate_vectors, shape[1])
vectors_data = numpy.zeros(shape=shape, dtype="f") vectors_data = numpy.zeros(shape=shape, dtype="f")
vectors_keys = [] vectors_keys = []
for i, line in enumerate(tqdm.tqdm(f)): for i, line in enumerate(tqdm.tqdm(f, disable=None)):
line = line.rstrip() line = line.rstrip()
pieces = line.rsplit(" ", vectors_data.shape[1]) pieces = line.rsplit(" ", vectors_data.shape[1])
word = pieces.pop(0) word = pieces.pop(0)

View File

@ -0,0 +1 @@
# cython: profile=False

View File

@ -1068,7 +1068,10 @@ def make_tempdir() -> Generator[Path, None, None]:
rmfunc(path) rmfunc(path)
try: try:
shutil.rmtree(str(d), onerror=force_remove) if sys.version_info >= (3, 12):
shutil.rmtree(str(d), onexc=force_remove)
else:
shutil.rmtree(str(d), onerror=force_remove)
except PermissionError as e: except PermissionError as e:
warnings.warn(Warnings.W091.format(dir=d, msg=e)) warnings.warn(Warnings.W091.format(dir=d, msg=e))

View File

@ -1,4 +1,4 @@
# cython: infer_types=True, profile=True, binding=True # cython: infer_types=True, binding=True
from typing import Callable from typing import Callable
from cython.operator cimport dereference as deref from cython.operator cimport dereference as deref

View File

@ -1,4 +1,3 @@
# cython: profile=True
import functools import functools
import numpy import numpy