Tidy up merge conflict leftovers

2026-01-10 02:31:16 +03:00 · 2018-12-18 13:58:30 +01:00 · 2018-12-18 13:58:30 +01:00 · ae880ef912
commit ae880ef912
parent 61d09c481b
9 changed files with 3 additions and 113 deletions
--- a/spacy/cli/converters/iob2json.py
+++ b/spacy/cli/converters/iob2json.py
@ -1,11 +1,11 @@
 # coding: utf8
 from __future__ import unicode_literals

+import re
+
 from ...gold import iob_to_biluo
 from ...util import minibatch

-import re
-

 def iob2json(input_data, n_sents=10, *args, **kwargs):
    """
--- a/spacy/lang/fa/stop_words.py
+++ b/spacy/lang/fa/stop_words.py
@ -1,7 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals

-# stop words from HAZM package

 # Stop words from HAZM package
 STOP_WORDS = set(
--- a/spacy/lang/id/norm_exceptions.py
+++ b/spacy/lang/id/norm_exceptions.py
@ -1,10 +1,3 @@
-"""
-Slang and abbreviations
-
-Daftar kosakata yang sering salah dieja
-https://id.wikipedia.org/wiki/Wikipedia:Daftar_kosakata_bahasa_Indonesia_yang_sering_salah_dieja
-
-"""
 # coding: utf8
 from __future__ import unicode_literals

--- a/spacy/lang/id/stop_words.py
+++ b/spacy/lang/id/stop_words.py
@ -1,6 +1,3 @@
-"""
-List of stop words in Bahasa Indonesia.
-"""
 # coding: utf8
 from __future__ import unicode_literals

--- a/spacy/lang/id/tokenizer_exceptions.py
+++ b/spacy/lang/id/tokenizer_exceptions.py
@ -1,7 +1,3 @@
-"""
-Daftar singkatan dan Akronim dari:
-https://id.wiktionary.org/wiki/Wiktionary:Daftar_singkatan_dan_akronim_bahasa_Indonesia#A
-"""
 # coding: utf8
 from __future__ import unicode_literals

--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -291,8 +291,6 @@ cdef char get_quantifier(PatternStateC state) nogil:

 DEF PADDING = 5

-DEF PADDING = 5
-

 cdef TokenPatternC* init_pattern(Pool mem, attr_t entity_id,
                                 object token_specs) except NULL:
--- a/spacy/tests/doc/test_span_merge.py
+++ b/spacy/tests/doc/test_span_merge.py
@ -53,24 +53,7 @@ def test_spans_merge_heads(en_tokenizer):

 def test_spans_merge_non_disjoint(en_tokenizer):
    text = "Los Angeles start."
-    tokens = en_tokenizer(text)
-    doc = get_doc(tokens.vocab, [t.text for t in tokens])
-    with pytest.raises(ValueError):
-        with doc.retokenize() as retokenizer:
-            retokenizer.merge(
-                doc[0:2],
-                attrs={"tag": "NNP", "lemma": "Los Angeles", "ent_type": "GPE"},
-            )
-            retokenizer.merge(
-                doc[0:1],
-                attrs={"tag": "NNP", "lemma": "Los Angeles", "ent_type": "GPE"},
-            )
-
-
-def test_spans_merge_non_disjoint(en_tokenizer):
-    text = "Los Angeles start."
-    tokens = en_tokenizer(text)
-    doc = get_doc(tokens.vocab, [t.text for t in tokens])
+    doc = en_tokenizer(text)
    with pytest.raises(ValueError):
        with doc.retokenize() as retokenizer:
            retokenizer.merge(
--- a/spacy/tests/regression/test_issue2800.py
+++ b/spacy/tests/regression/test_issue2800.py
@ -1,36 +0,0 @@
-'''Test issue that arises when too many labels are added to NER model.'''
-from __future__ import unicode_literals
-
-import random
-from ...lang.en import English
-
-def train_model(train_data, entity_types):
-    nlp = English(pipeline=[])
-
-    ner = nlp.create_pipe("ner")
-    nlp.add_pipe(ner)
-
-    for entity_type in list(entity_types):
-        ner.add_label(entity_type)
-
-    optimizer = nlp.begin_training()
-
-    # Start training
-    for i in range(20):
-        losses = {}
-        index = 0
-        random.shuffle(train_data)
-
-        for statement, entities in train_data:
-            nlp.update([statement], [entities], sgd=optimizer, losses=losses, drop=0.5)
-    return nlp
-
-
-def test_train_with_many_entity_types():
-    train_data = []
-    train_data.extend([("One sentence", {"entities": []})])
-    entity_types = [str(i) for i in range(1000)]
-
-    model = train_model(train_data, entity_types)
-
-    
--- a/spacy/tests/test_symlink_windows.py
+++ b/spacy/tests/test_symlink_windows.py
@ -1,40 +0,0 @@
-# coding: utf-8
-from __future__ import unicode_literals
-
-import pytest
-import os
-from pathlib import Path
-
-from ..compat import symlink_to, symlink_remove, path2str
-
-
-def target_local_path():
-    return "./foo-target"
-
-
-def link_local_path():
-    return "./foo-symlink"
-
-
-@pytest.fixture(scope="function")
-def setup_target(request):
-    target = Path(target_local_path())
-    if not target.exists():
-        os.mkdir(path2str(target))
-
-    # yield -- need to cleanup even if assertion fails
-    # https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
-    def cleanup():
-        symlink_remove(Path(link_local_path()))
-        os.rmdir(target_local_path())
-
-    request.addfinalizer(cleanup)
-
-
-def test_create_symlink_windows(setup_target):
-    target = Path(target_local_path())
-    link = Path(link_local_path())
-    assert target.exists()
-
-    symlink_to(link, target)
-    assert link.exists()