mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-11 00:32:40 +03:00
Whitespace
This commit is contained in:
parent
7769bc31e3
commit
3d4e389d23
|
@ -1,3 +1,4 @@
|
||||||
|
# cython: profile=True
|
||||||
from __future__ import unicode_literals, print_function
|
from __future__ import unicode_literals, print_function
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
@ -90,9 +91,9 @@ def _min_edit_path(cand_words, gold_words):
|
||||||
# TODO: Fix this --- just do it properly, make the full edit matrix and
|
# TODO: Fix this --- just do it properly, make the full edit matrix and
|
||||||
# then walk back over it...
|
# then walk back over it...
|
||||||
# Preprocess inputs
|
# Preprocess inputs
|
||||||
cand_words = [punct_re.sub('', w) for w in cand_words]
|
cand_words = [punct_re.sub('', w) for w in cand_words]
|
||||||
gold_words = [punct_re.sub('', w) for w in gold_words]
|
gold_words = [punct_re.sub('', w) for w in gold_words]
|
||||||
|
|
||||||
if cand_words == gold_words:
|
if cand_words == gold_words:
|
||||||
return 0, ''.join(['M' for _ in gold_words])
|
return 0, ''.join(['M' for _ in gold_words])
|
||||||
mem = Pool()
|
mem = Pool()
|
||||||
|
@ -132,7 +133,7 @@ def _min_edit_path(cand_words, gold_words):
|
||||||
else:
|
else:
|
||||||
best_cost = d_cost
|
best_cost = d_cost
|
||||||
best_hist = previous_row[j + 1] + 'D'
|
best_hist = previous_row[j + 1] + 'D'
|
||||||
|
|
||||||
current_row.append(best_hist)
|
current_row.append(best_hist)
|
||||||
curr_costs[j+1] = best_cost
|
curr_costs[j+1] = best_cost
|
||||||
previous_row = current_row
|
previous_row = current_row
|
||||||
|
@ -306,7 +307,7 @@ cdef class GoldParse:
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
"""Get the number of gold-standard tokens.
|
"""Get the number of gold-standard tokens.
|
||||||
|
|
||||||
Returns (int): The number of gold-standard tokens.
|
Returns (int): The number of gold-standard tokens.
|
||||||
"""
|
"""
|
||||||
return self.length
|
return self.length
|
||||||
|
@ -330,7 +331,7 @@ def biluo_tags_from_offsets(doc, entities):
|
||||||
entities (sequence):
|
entities (sequence):
|
||||||
A sequence of (start, end, label) triples. start and end should be
|
A sequence of (start, end, label) triples. start and end should be
|
||||||
character-offset integers denoting the slice into the original string.
|
character-offset integers denoting the slice into the original string.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tags (list):
|
tags (list):
|
||||||
A list of unicode strings, describing the tags. Each tag string will
|
A list of unicode strings, describing the tags. Each tag string will
|
||||||
|
@ -348,7 +349,7 @@ def biluo_tags_from_offsets(doc, entities):
|
||||||
doc = nlp.tokenizer(text)
|
doc = nlp.tokenizer(text)
|
||||||
|
|
||||||
tags = biluo_tags_from_offsets(doc, entities)
|
tags = biluo_tags_from_offsets(doc, entities)
|
||||||
|
|
||||||
assert tags == ['O', 'O', 'U-LOC', 'O']
|
assert tags == ['O', 'O', 'U-LOC', 'O']
|
||||||
'''
|
'''
|
||||||
starts = {token.idx: token.i for token in doc}
|
starts = {token.idx: token.i for token in doc}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user