Switch to qsort

This commit is contained in:
Adriane Boyd 2019-09-27 15:30:58 +02:00
parent b2a162361f
commit a98d71a942

View File

@ -5,6 +5,7 @@ from __future__ import unicode_literals
from cython.operator cimport dereference as deref from cython.operator cimport dereference as deref
from cython.operator cimport preincrement as preinc from cython.operator cimport preincrement as preinc
from libc.stdlib cimport qsort
from libc.string cimport memcpy, memset from libc.string cimport memcpy, memset
from libcpp.set cimport set as stdset from libcpp.set cimport set as stdset
from libc.stdio cimport printf from libc.stdio cimport printf
@ -324,7 +325,7 @@ cdef class Tokenizer:
cdef int seen_i cdef int seen_i
cdef MatchStruct span cdef MatchStruct span
cdef stdset[int] seen_tokens cdef stdset[int] seen_tokens
stdsort(original.begin(), original.end(), len_start_cmp) qsort(&original[0], original.size(), sizeof(MatchStruct), len_start_cmp)
cdef int orig_i = original.size() - 1 cdef int orig_i = original.size() - 1
while orig_i >= 0: while orig_i >= 0:
span = original[orig_i] span = original[orig_i]
@ -333,7 +334,8 @@ cdef class Tokenizer:
for seen_i in range(span.start, span.end): for seen_i in range(span.start, span.end):
seen_tokens.insert(seen_i) seen_tokens.insert(seen_i)
orig_i -= 1 orig_i -= 1
stdsort(filtered.begin(), filtered.end(), start_cmp) qsort(&filtered[0], filtered.size(), sizeof(MatchStruct), start_cmp)
#stdsort(filtered.begin(), filtered.end(), start_cmp)
cdef int _try_cache(self, hash_t key, Doc tokens) except -1: cdef int _try_cache(self, hash_t key, Doc tokens) except -1:
cached = <_Cached*>self._cache.get(key) cached = <_Cached*>self._cache.get(key)
@ -689,17 +691,15 @@ def _get_regex_pattern(regex):
return None if regex is None else regex.__self__.pattern return None if regex is None else regex.__self__.pattern
cdef extern from "<algorithm>" namespace "std" nogil: cdef int len_start_cmp(const void *a_p, const void *b_p) nogil:
void stdsort "sort"(vector[MatchStruct].iterator, cdef MatchStruct a = (<MatchStruct*>a_p)[0];
vector[MatchStruct].iterator, cdef MatchStruct b = (<MatchStruct*>b_p)[0];
bint (*)(MatchStruct, MatchStruct))
cdef bint len_start_cmp(MatchStruct a, MatchStruct b) nogil:
if a.end - a.start == b.end - b.start: if a.end - a.start == b.end - b.start:
return a.start < b.start return a.start - b.start
return a.end - a.start < b.end - b.start return (a.end - a.start) - (b.end - b.start)
cdef bint start_cmp(MatchStruct a, MatchStruct b) nogil: cdef int start_cmp(const void *a_p, const void *b_p) nogil:
return a.start < b.start cdef MatchStruct a = (<MatchStruct*>a_p)[0];
cdef MatchStruct b = (<MatchStruct*>b_p)[0];
return a.start - b.start