mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
counter instead of preshcounter
This commit is contained in:
parent
e080412385
commit
0f0f07318a
|
@ -5,7 +5,6 @@ import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from gensim.models import Word2Vec
|
from gensim.models import Word2Vec
|
||||||
from preshed.counter import PreshCounter
|
|
||||||
import plac
|
import plac
|
||||||
import spacy
|
import spacy
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from preshed.counter cimport PreshCounter
|
|
||||||
|
|
||||||
from ..vocab cimport Vocab
|
from ..vocab cimport Vocab
|
||||||
from ..structs cimport TokenC, LexemeC
|
from ..structs cimport TokenC, LexemeC
|
||||||
|
|
|
@ -9,6 +9,7 @@ cimport cython
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
from libc.string cimport memcpy, memset
|
from libc.string cimport memcpy, memset
|
||||||
from libc.math cimport sqrt
|
from libc.math cimport sqrt
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
import numpy.linalg
|
import numpy.linalg
|
||||||
|
@ -698,7 +699,7 @@ cdef class Doc:
|
||||||
# Handle 1d case
|
# Handle 1d case
|
||||||
return output if len(attr_ids) >= 2 else output.reshape((self.length,))
|
return output if len(attr_ids) >= 2 else output.reshape((self.length,))
|
||||||
|
|
||||||
def count_by(self, attr_id_t attr_id, exclude=None, PreshCounter counts=None):
|
def count_by(self, attr_id_t attr_id, exclude=None, object counts=None):
|
||||||
"""Count the frequencies of a given attribute. Produces a dict of
|
"""Count the frequencies of a given attribute. Produces a dict of
|
||||||
`{attribute (int): count (ints)}` frequencies, keyed by the values of
|
`{attribute (int): count (ints)}` frequencies, keyed by the values of
|
||||||
the given attribute ID.
|
the given attribute ID.
|
||||||
|
@ -713,50 +714,22 @@ cdef class Doc:
|
||||||
cdef size_t count
|
cdef size_t count
|
||||||
cdef int64_t this_value
|
cdef int64_t this_value
|
||||||
|
|
||||||
print("COUNTING")
|
|
||||||
|
|
||||||
if counts is None:
|
if counts is None:
|
||||||
counts = PreshCounter()
|
counts = Counter()
|
||||||
output_dict = True
|
output_dict = True
|
||||||
print("counts None")
|
|
||||||
else:
|
else:
|
||||||
output_dict = False
|
output_dict = False
|
||||||
# Take this check out of the loop, for a bit of extra speed
|
# Take this check out of the loop, for a bit of extra speed
|
||||||
if exclude is None:
|
if exclude is None:
|
||||||
print("exclude None")
|
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
print()
|
|
||||||
print("token", self[i])
|
|
||||||
this_value = get_token_attr(&self.c[i], attr_id)
|
this_value = get_token_attr(&self.c[i], attr_id)
|
||||||
print("token attr value", this_value)
|
counts[this_value] += 1
|
||||||
print("type attr value", type(this_value))
|
|
||||||
|
|
||||||
print(i, "key this_value before", counts.c_map.cells[this_value].key)
|
|
||||||
print(i, "value this_value before", <int64_t>counts.c_map.cells[this_value].value)
|
|
||||||
counts.inc(this_value, 1)
|
|
||||||
print(i, "key this_value after", counts.c_map.cells[this_value].key)
|
|
||||||
print(i, "value this_value after", <int64_t>counts.c_map.cells[this_value].value)
|
|
||||||
|
|
||||||
print(i, "key 0", counts.c_map.cells[0].key)
|
|
||||||
print(i, "value 0", <int64_t>counts.c_map.cells[0].value)
|
|
||||||
print(i, "key 1", counts.c_map.cells[1].key)
|
|
||||||
print(i, "value 1", <int64_t>counts.c_map.cells[1].value)
|
|
||||||
else:
|
else:
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
if not exclude(self[i]):
|
if not exclude(self[i]):
|
||||||
attr = get_token_attr(&self.c[i], attr_id)
|
attr = get_token_attr(&self.c[i], attr_id)
|
||||||
counts.inc(attr, 1)
|
counts[attr] += 1
|
||||||
if output_dict:
|
if output_dict:
|
||||||
print("output_dict")
|
|
||||||
print(counts.length)
|
|
||||||
print(counts.total)
|
|
||||||
print("key 0", counts.c_map.cells[0].key)
|
|
||||||
print("value 0", <int64_t>counts.c_map.cells[0].value)
|
|
||||||
print("key 1", counts.c_map.cells[1].key)
|
|
||||||
print("value 1", <int64_t>counts.c_map.cells[1].value)
|
|
||||||
print()
|
|
||||||
print(dict(counts))
|
|
||||||
print()
|
|
||||||
return dict(counts)
|
return dict(counts)
|
||||||
|
|
||||||
def _realloc(self, new_size):
|
def _realloc(self, new_size):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user