mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Avoid shipping the spacy.munge package
This commit is contained in:
parent
7d265a9c62
commit
00a0dfcb59
2
setup.py
2
setup.py
|
@ -103,7 +103,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args):
|
|||
def run_setup(exts):
|
||||
setup(
|
||||
name='spacy',
|
||||
packages=['spacy', 'spacy.en', 'spacy.syntax', "spacy.munge"],
|
||||
packages=['spacy', 'spacy.en', 'spacy.syntax'],
|
||||
description="Industrial-strength NLP",
|
||||
author='Matthew Honnibal',
|
||||
author_email='honnibal@gmail.com',
|
||||
|
|
|
@ -7,10 +7,36 @@ import re
|
|||
import os
|
||||
from os import path
|
||||
|
||||
from spacy.munge.read_ner import tags_to_entities
|
||||
from libc.string cimport memset
|
||||
|
||||
|
||||
def tags_to_entities(tags):
|
||||
entities = []
|
||||
start = None
|
||||
for i, tag in enumerate(tags):
|
||||
if tag.startswith('O'):
|
||||
# TODO: We shouldn't be getting these malformed inputs. Fix this.
|
||||
if start is not None:
|
||||
start = None
|
||||
continue
|
||||
elif tag == '-':
|
||||
continue
|
||||
elif tag.startswith('I'):
|
||||
assert start is not None, tags[:i]
|
||||
continue
|
||||
if tag.startswith('U'):
|
||||
entities.append((tag[2:], i, i))
|
||||
elif tag.startswith('B'):
|
||||
start = i
|
||||
elif tag.startswith('L'):
|
||||
entities.append((tag[2:], start, i))
|
||||
start = None
|
||||
else:
|
||||
raise Exception(tag)
|
||||
return entities
|
||||
|
||||
|
||||
|
||||
def align(cand_words, gold_words):
|
||||
cost, edit_path = _min_edit_path(cand_words, gold_words)
|
||||
alignment = []
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
from __future__ import division
|
||||
|
||||
from spacy.munge.read_ner import tags_to_entities
|
||||
from .gold import tags_to_entities
|
||||
|
||||
|
||||
class PRFScore(object):
|
||||
|
|
Loading…
Reference in New Issue
Block a user