mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 12:50:20 +03:00
Merge branch 'master' into fix/el-sentence-crossing-ents
This commit is contained in:
commit
66f4d67dfa
24
.github/azure-steps.yml
vendored
24
.github/azure-steps.yml
vendored
|
@ -57,51 +57,51 @@ steps:
|
|||
python -m spacy download ca_core_news_md
|
||||
python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
|
||||
displayName: 'Test download CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -W error -m spacy info ca_core_news_sm | grep -q download_url
|
||||
displayName: 'Test download_url in info CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
|
||||
displayName: 'Test no warnings on load (#11713)'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
|
||||
displayName: 'Test convert CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m spacy init config -p ner -l ca ner.cfg
|
||||
python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
|
||||
displayName: 'Test debug config CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
# will have errors due to sparse data, check for summary in output
|
||||
python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
|
||||
displayName: 'Test debug data CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
|
||||
displayName: 'Test train CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
|
||||
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
displayName: 'Test assemble CLI'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
|
||||
python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
|
||||
displayName: 'Test assemble CLI vectors warning'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
condition: eq(variables['python_version'], '3.9')
|
||||
|
||||
- script: |
|
||||
python -m pip install -U -r requirements.txt
|
||||
|
@ -116,9 +116,3 @@ steps:
|
|||
python -m pytest --pyargs spacy
|
||||
displayName: "Run CPU tests with thinc-apple-ops"
|
||||
condition: and(startsWith(variables['imageName'], 'macos'), eq(variables['python.version'], '3.11'))
|
||||
|
||||
- script: |
|
||||
python .github/validate_universe_json.py website/meta/universe.json
|
||||
displayName: 'Test website/meta/universe.json'
|
||||
condition: eq(variables['python_version'], '3.8')
|
||||
|
||||
|
|
193
.github/workflows/tests.yml
vendored
Normal file
193
.github/workflows/tests.yml
vendored
Normal file
|
@ -0,0 +1,193 @@
|
|||
name: tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches-ignore:
|
||||
- "spacy.io"
|
||||
- "nightly.spacy.io"
|
||||
- "v2.spacy.io"
|
||||
paths-ignore:
|
||||
- "*.md"
|
||||
- "*.mdx"
|
||||
- "website/docs/**"
|
||||
- "website/src/**"
|
||||
- "website/meta/*.tsx"
|
||||
- "website/meta/*.mjs"
|
||||
- "website/meta/languages.json"
|
||||
- "website/meta/site.json"
|
||||
- "website/meta/sidebars.json"
|
||||
- "website/meta/type-annotations.json"
|
||||
- "website/pages/**"
|
||||
- ".github/workflows/**"
|
||||
pull_request:
|
||||
types: [opened, synchronize, reopened, edited]
|
||||
paths:
|
||||
- "!*.md"
|
||||
- "!*.mdx"
|
||||
- "!website/docs/**"
|
||||
- "!website/src/**"
|
||||
- "!website/meta/*.tsx"
|
||||
- "!website/meta/*.mjs"
|
||||
- "!website/meta/languages.json"
|
||||
- "!website/meta/site.json"
|
||||
- "!website/meta/sidebars.json"
|
||||
- "!website/meta/type-annotations.json"
|
||||
- "!website/pages/**"
|
||||
- "!.github/workflows/**"
|
||||
- ".github/workflows/tests.yml"
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
name: Validate
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Configure Python version
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: "3.7"
|
||||
architecture: x64
|
||||
|
||||
- name: black
|
||||
run: |
|
||||
python -m pip install black -c requirements.txt
|
||||
python -m black spacy --check
|
||||
- name: flake8
|
||||
run: |
|
||||
python -m pip install flake8==5.0.4
|
||||
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
|
||||
- name: Validate website/meta/universe.json
|
||||
run: |
|
||||
python .github/validate_universe_json.py website/meta/universe.json
|
||||
tests:
|
||||
name: Test
|
||||
needs: Validate
|
||||
strategy:
|
||||
fail-fast: true
|
||||
matrix:
|
||||
os: [ubuntu-latest, windows-latest, macos-latest]
|
||||
python_version: ["3.11"]
|
||||
include:
|
||||
- os: ubuntu-20.04
|
||||
python_version: "3.6"
|
||||
- os: windows-latest
|
||||
python_version: "3.7"
|
||||
- os: macos-latest
|
||||
python_version: "3.8"
|
||||
- os: ubuntu-latest
|
||||
python_version: "3.9"
|
||||
- os: windows-latest
|
||||
python_version: "3.10"
|
||||
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- name: Check out repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Configure Python version
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
architecture: x64
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install -U build pip setuptools
|
||||
python -m pip install -U -r requirements.txt
|
||||
|
||||
- name: Build sdist
|
||||
run: |
|
||||
python -m build --sdist
|
||||
|
||||
- name: Run mypy
|
||||
run: |
|
||||
python -m mypy spacy
|
||||
if: matrix.python_version != '3.6'
|
||||
|
||||
- name: Delete source directory and .egg-info
|
||||
run: |
|
||||
rm -rf spacy *.egg-info
|
||||
shell: bash
|
||||
|
||||
- name: Uninstall all packages
|
||||
run: |
|
||||
python -m pip freeze
|
||||
python -m pip freeze --exclude pywin32 > installed.txt
|
||||
python -m pip uninstall -y -r installed.txt
|
||||
|
||||
- name: Install from sdist
|
||||
run: |
|
||||
SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
|
||||
SPACY_NUM_BUILD_JOBS=2 python -m pip install dist/$SDIST
|
||||
shell: bash
|
||||
|
||||
- name: Test import
|
||||
run: python -W error -c "import spacy"
|
||||
|
||||
- name: "Test download CLI"
|
||||
run: |
|
||||
python -m spacy download ca_core_news_sm
|
||||
python -m spacy download ca_core_news_md
|
||||
python -c "import spacy; nlp=spacy.load('ca_core_news_sm'); doc=nlp('test')"
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test download_url in info CLI"
|
||||
run: |
|
||||
python -W error -m spacy info ca_core_news_sm | grep -q download_url
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test no warnings on load (#11713)"
|
||||
run: |
|
||||
python -W error -c "import ca_core_news_sm; nlp = ca_core_news_sm.load(); doc=nlp('test')"
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test convert CLI"
|
||||
run: |
|
||||
python -m spacy convert extra/example_data/ner_example_data/ner-token-per-line-conll2003.json .
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test debug config CLI"
|
||||
run: |
|
||||
python -m spacy init config -p ner -l ca ner.cfg
|
||||
python -m spacy debug config ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test debug data CLI"
|
||||
run: |
|
||||
# will have errors due to sparse data, check for summary in output
|
||||
python -m spacy debug data ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy | grep -q Summary
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test train CLI"
|
||||
run: |
|
||||
python -m spacy train ner.cfg --paths.train ner-token-per-line-conll2003.spacy --paths.dev ner-token-per-line-conll2003.spacy --training.max_steps 10 --gpu-id -1
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test assemble CLI"
|
||||
run: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_sm'}; config.to_disk('ner_source_sm.cfg')"
|
||||
PYTHONWARNINGS="error,ignore::DeprecationWarning" python -m spacy assemble ner_source_sm.cfg output_dir
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Test assemble CLI vectors warning"
|
||||
run: |
|
||||
python -c "import spacy; config = spacy.util.load_config('ner.cfg'); config['components']['ner'] = {'source': 'ca_core_news_md'}; config.to_disk('ner_source_md.cfg')"
|
||||
python -m spacy assemble ner_source_md.cfg output_dir 2>&1 | grep -q W113
|
||||
if: matrix.python_version == '3.9'
|
||||
|
||||
- name: "Install test requirements"
|
||||
run: |
|
||||
python -m pip install -U -r requirements.txt
|
||||
|
||||
- name: "Run CPU tests"
|
||||
run: |
|
||||
python -m pytest --pyargs spacy -W error
|
||||
|
||||
- name: "Run CPU tests with thinc-apple-ops"
|
||||
run: |
|
||||
python -m pip install 'spacy[apple]'
|
||||
python -m pytest --pyargs spacy
|
||||
if: startsWith(matrix.os, 'macos') && matrix.python_version == '3.11'
|
|
@ -48,6 +48,9 @@ jobs:
|
|||
pip install flake8==5.0.4
|
||||
python -m flake8 spacy --count --select=E901,E999,F821,F822,F823,W605 --show-source --statistics
|
||||
displayName: "flake8"
|
||||
- script: |
|
||||
python .github/validate_universe_json.py website/meta/universe.json
|
||||
displayName: 'Validate website/meta/universe.json'
|
||||
|
||||
- job: "Test"
|
||||
dependsOn: "Validate"
|
||||
|
|
|
@ -700,3 +700,19 @@ def test_span_group_copy(doc):
|
|||
assert len(doc.spans["test"]) == 3
|
||||
# check that the copy spans were not modified and this is an isolated doc
|
||||
assert len(doc_copy.spans["test"]) == 2
|
||||
|
||||
|
||||
def test_for_partial_ent_sents():
|
||||
"""Spans may be associated with multiple sentences. These .sents should always be complete, not partial, sentences,
|
||||
which this tests for.
|
||||
"""
|
||||
doc = Doc(
|
||||
English().vocab,
|
||||
words=["Mahler's", "Symphony", "No.", "8", "was", "beautiful."],
|
||||
sent_starts=[1, 0, 0, 1, 0, 0],
|
||||
)
|
||||
doc.set_ents([Span(doc, 1, 4, "WORK")])
|
||||
# The specified entity is associated with both sentences in this doc, so we expect all sentences in the doc to be
|
||||
# equal to the sentences referenced in ent.sents.
|
||||
for doc_sent, ent_sent in zip(doc.sents, doc.ents[0].sents):
|
||||
assert doc_sent == ent_sent
|
||||
|
|
|
@ -460,9 +460,8 @@ cdef class Span:
|
|||
start = i
|
||||
if start >= self.end:
|
||||
break
|
||||
if start < self.end:
|
||||
yield Span(self.doc, start, self.end)
|
||||
|
||||
elif i == self.doc.length - 1:
|
||||
yield Span(self.doc, start, self.doc.length)
|
||||
|
||||
@property
|
||||
def ents(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user