mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
tracked the bug down to PreshCounter.inc - still unclear what goes wrong
This commit is contained in:
parent
a89fecce97
commit
e080412385
|
@ -11,13 +11,17 @@ from spacy.lang.en import English
|
||||||
"sentence",
|
"sentence",
|
||||||
[
|
[
|
||||||
'The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.',
|
'The story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.',
|
||||||
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s #1.'
|
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s #1.',
|
||||||
|
'The next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale\'s number one',
|
||||||
|
'Indeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.',
|
||||||
|
"It was a missed assignment, but it shouldn't have resulted in a turnover ..."
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_issue3869(sentence):
|
def test_issue3869(sentence):
|
||||||
"""Test that the Doc's count_by function works consistently"""
|
"""Test that the Doc's count_by function works consistently"""
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
||||||
|
print()
|
||||||
doc = nlp(sentence)
|
doc = nlp(sentence)
|
||||||
|
|
||||||
count = 0
|
count = 0
|
||||||
|
|
|
@ -8,6 +8,7 @@ from ..typedefs cimport attr_t
|
||||||
from ..attrs cimport attr_id_t
|
from ..attrs cimport attr_id_t
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil
|
cdef attr_t get_token_attr(const TokenC* token, attr_id_t feat_name) nogil
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ from libc.math cimport sqrt
|
||||||
import numpy
|
import numpy
|
||||||
import numpy.linalg
|
import numpy.linalg
|
||||||
import struct
|
import struct
|
||||||
|
from libc.stdint cimport int64_t
|
||||||
import srsly
|
import srsly
|
||||||
from thinc.neural.util import get_array_module, copy_array
|
from thinc.neural.util import get_array_module, copy_array
|
||||||
|
|
||||||
|
@ -710,22 +711,52 @@ cdef class Doc:
|
||||||
cdef int i
|
cdef int i
|
||||||
cdef attr_t attr
|
cdef attr_t attr
|
||||||
cdef size_t count
|
cdef size_t count
|
||||||
|
cdef int64_t this_value
|
||||||
|
|
||||||
|
print("COUNTING")
|
||||||
|
|
||||||
if counts is None:
|
if counts is None:
|
||||||
counts = PreshCounter()
|
counts = PreshCounter()
|
||||||
output_dict = True
|
output_dict = True
|
||||||
|
print("counts None")
|
||||||
else:
|
else:
|
||||||
output_dict = False
|
output_dict = False
|
||||||
# Take this check out of the loop, for a bit of extra speed
|
# Take this check out of the loop, for a bit of extra speed
|
||||||
if exclude is None:
|
if exclude is None:
|
||||||
|
print("exclude None")
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
counts.inc(get_token_attr(&self.c[i], attr_id), 1)
|
print()
|
||||||
|
print("token", self[i])
|
||||||
|
this_value = get_token_attr(&self.c[i], attr_id)
|
||||||
|
print("token attr value", this_value)
|
||||||
|
print("type attr value", type(this_value))
|
||||||
|
|
||||||
|
print(i, "key this_value before", counts.c_map.cells[this_value].key)
|
||||||
|
print(i, "value this_value before", <int64_t>counts.c_map.cells[this_value].value)
|
||||||
|
counts.inc(this_value, 1)
|
||||||
|
print(i, "key this_value after", counts.c_map.cells[this_value].key)
|
||||||
|
print(i, "value this_value after", <int64_t>counts.c_map.cells[this_value].value)
|
||||||
|
|
||||||
|
print(i, "key 0", counts.c_map.cells[0].key)
|
||||||
|
print(i, "value 0", <int64_t>counts.c_map.cells[0].value)
|
||||||
|
print(i, "key 1", counts.c_map.cells[1].key)
|
||||||
|
print(i, "value 1", <int64_t>counts.c_map.cells[1].value)
|
||||||
else:
|
else:
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
if not exclude(self[i]):
|
if not exclude(self[i]):
|
||||||
attr = get_token_attr(&self.c[i], attr_id)
|
attr = get_token_attr(&self.c[i], attr_id)
|
||||||
counts.inc(attr, 1)
|
counts.inc(attr, 1)
|
||||||
if output_dict:
|
if output_dict:
|
||||||
|
print("output_dict")
|
||||||
|
print(counts.length)
|
||||||
|
print(counts.total)
|
||||||
|
print("key 0", counts.c_map.cells[0].key)
|
||||||
|
print("value 0", <int64_t>counts.c_map.cells[0].value)
|
||||||
|
print("key 1", counts.c_map.cells[1].key)
|
||||||
|
print("value 1", <int64_t>counts.c_map.cells[1].value)
|
||||||
|
print()
|
||||||
|
print(dict(counts))
|
||||||
|
print()
|
||||||
return dict(counts)
|
return dict(counts)
|
||||||
|
|
||||||
def _realloc(self, new_size):
|
def _realloc(self, new_size):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user