mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Switch to qsort
This commit is contained in:
parent
b2a162361f
commit
a98d71a942
|
@ -5,6 +5,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
from cython.operator cimport preincrement as preinc
|
from cython.operator cimport preincrement as preinc
|
||||||
|
from libc.stdlib cimport qsort
|
||||||
from libc.string cimport memcpy, memset
|
from libc.string cimport memcpy, memset
|
||||||
from libcpp.set cimport set as stdset
|
from libcpp.set cimport set as stdset
|
||||||
from libc.stdio cimport printf
|
from libc.stdio cimport printf
|
||||||
|
@ -324,7 +325,7 @@ cdef class Tokenizer:
|
||||||
cdef int seen_i
|
cdef int seen_i
|
||||||
cdef MatchStruct span
|
cdef MatchStruct span
|
||||||
cdef stdset[int] seen_tokens
|
cdef stdset[int] seen_tokens
|
||||||
stdsort(original.begin(), original.end(), len_start_cmp)
|
qsort(&original[0], original.size(), sizeof(MatchStruct), len_start_cmp)
|
||||||
cdef int orig_i = original.size() - 1
|
cdef int orig_i = original.size() - 1
|
||||||
while orig_i >= 0:
|
while orig_i >= 0:
|
||||||
span = original[orig_i]
|
span = original[orig_i]
|
||||||
|
@ -333,7 +334,8 @@ cdef class Tokenizer:
|
||||||
for seen_i in range(span.start, span.end):
|
for seen_i in range(span.start, span.end):
|
||||||
seen_tokens.insert(seen_i)
|
seen_tokens.insert(seen_i)
|
||||||
orig_i -= 1
|
orig_i -= 1
|
||||||
stdsort(filtered.begin(), filtered.end(), start_cmp)
|
qsort(&filtered[0], filtered.size(), sizeof(MatchStruct), start_cmp)
|
||||||
|
#stdsort(filtered.begin(), filtered.end(), start_cmp)
|
||||||
|
|
||||||
cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
|
cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
|
||||||
cached = <_Cached*>self._cache.get(key)
|
cached = <_Cached*>self._cache.get(key)
|
||||||
|
@ -689,17 +691,15 @@ def _get_regex_pattern(regex):
|
||||||
return None if regex is None else regex.__self__.pattern
|
return None if regex is None else regex.__self__.pattern
|
||||||
|
|
||||||
|
|
||||||
cdef extern from "<algorithm>" namespace "std" nogil:
|
cdef int len_start_cmp(const void *a_p, const void *b_p) nogil:
|
||||||
void stdsort "sort"(vector[MatchStruct].iterator,
|
cdef MatchStruct a = (<MatchStruct*>a_p)[0];
|
||||||
vector[MatchStruct].iterator,
|
cdef MatchStruct b = (<MatchStruct*>b_p)[0];
|
||||||
bint (*)(MatchStruct, MatchStruct))
|
|
||||||
|
|
||||||
|
|
||||||
cdef bint len_start_cmp(MatchStruct a, MatchStruct b) nogil:
|
|
||||||
if a.end - a.start == b.end - b.start:
|
if a.end - a.start == b.end - b.start:
|
||||||
return a.start < b.start
|
return a.start - b.start
|
||||||
return a.end - a.start < b.end - b.start
|
return (a.end - a.start) - (b.end - b.start)
|
||||||
|
|
||||||
|
|
||||||
cdef bint start_cmp(MatchStruct a, MatchStruct b) nogil:
|
cdef int start_cmp(const void *a_p, const void *b_p) nogil:
|
||||||
return a.start < b.start
|
cdef MatchStruct a = (<MatchStruct*>a_p)[0];
|
||||||
|
cdef MatchStruct b = (<MatchStruct*>b_p)[0];
|
||||||
|
return a.start - b.start
|
||||||
|
|
Loading…
Reference in New Issue
Block a user