Fix circular import

2025-10-18 01:34:16 +03:00 · 2017-10-03 09:33:21 -05:00 · 2017-10-03 09:33:21 -05:00 · 1289187279
commit 1289187279
parent a44c4c3a5b e49cd7aeaf
4 changed files with 25 additions and 11 deletions
--- a/spacy/init.py
+++ b/spacy/init.py
@ -3,12 +3,15 @@ from __future__ import unicode_literals

 from .cli.info import info as cli_info
 from .glossary import explain
+<<<<<<< HEAD
 from .deprecated import resolve_load_name
 #from .about import __version__
+from .about import __version__
 from . import util


 def load(name, **overrides):
+    from .deprecated import resolve_load_name
    name = resolve_load_name(name, **overrides)
    return util.load_model(name, **overrides)

--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -14,7 +14,7 @@ from ..util import prints
 CONVERTERS = {
    '.conllu': conllu2json,
    '.conll': conllu2json,
-    '.iob': iob2json
+    '.iob': iob2json,
 }


--- a/spacy/cli/converters/iob2json.py
+++ b/spacy/cli/converters/iob2json.py
@ -1,5 +1,6 @@
 # coding: utf8
 from __future__ import unicode_literals
+from cytoolz import partition_all, concat

 from ...compat import json_dumps, path2str
 from ...util import prints
@ -10,11 +11,9 @@ def iob2json(input_path, output_path, n_sents=10, *a, **k):
    """
    Convert IOB files into JSON format for use with train cli.
    """
-    # TODO: This isn't complete yet -- need to map from IOB to
-    # BILUO
    with input_path.open('r', encoding='utf8') as file_:
-        docs = read_iob(file_)
-
+        sentences = read_iob(file_)
+    docs = merge_sentences(sentences, n_sents)
    output_filename = input_path.parts[-1].replace(".iob", ".json")
    output_file = output_path / output_filename
    with output_file.open('w', encoding='utf-8') as f:
@ -23,9 +22,9 @@ def iob2json(input_path, output_path, n_sents=10, *a, **k):
           title="Generated output file %s" % path2str(output_file))


-def read_iob(file_):
+def read_iob(raw_sents):
    sentences = []
-    for line in file_:
+    for line in raw_sents:
        if not line.strip():
            continue
        tokens = [t.split('|') for t in line.split()]
@ -43,3 +42,15 @@ def read_iob(file_):
    paragraphs = [{'sentences': [sent]} for sent in sentences]
    docs = [{'id': 0, 'paragraphs': [para]} for para in paragraphs]
    return docs
+
+def merge_sentences(docs, n_sents):
+    counter = 0
+    merged = []
+    for group in partition_all(n_sents, docs):
+        group = list(group)
+        first = group.pop(0)
+        to_extend = first['paragraphs'][0]['sentences']
+        for sent in group[1:]:
+            to_extend.extend(sent['paragraphs'][0]['sentences'])
+        merged.append(first)
+    return merged
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -69,10 +69,10 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
        prints("Expected dict but got: {}".format(type(meta)),
               title="Not a valid meta.json format", exits=1)

-    pipeline = ['tags', 'dependencies', 'entities']
-    if no_tagger and 'tags' in pipeline: pipeline.remove('tags')
-    if no_parser and 'dependencies' in pipeline: pipeline.remove('dependencies')
-    if no_entities and 'entities' in pipeline: pipeline.remove('entities')
+    pipeline = ['tagger', 'parser', 'ner']
+    if no_tagger and 'tagger' in pipeline: pipeline.remove('tagger')
+    if no_parser and 'parser' in pipeline: pipeline.remove('parser')
+    if no_entities and 'ner' in pipeline: pipeline.remove('ner')

    # Take dropout and batch size as generators of values -- dropout
    # starts high and decays sharply, to force the optimizer to explore.