mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Fix r/l and distance features.
This commit is contained in:
		
							parent
							
								
									d51a86478e
								
							
						
					
					
						commit
						77d7e79c7e
					
				| 
						 | 
					@ -85,7 +85,7 @@ cdef int fill_context(atom_t* context, State* state) except -1:
 | 
				
			||||||
    fill_token(&context[E0w], get_e0(state))
 | 
					    fill_token(&context[E0w], get_e0(state))
 | 
				
			||||||
    fill_token(&context[E1w], get_e1(state))
 | 
					    fill_token(&context[E1w], get_e1(state))
 | 
				
			||||||
    if state.stack_len >= 1:
 | 
					    if state.stack_len >= 1:
 | 
				
			||||||
        context[dist] = min(state.stack[0] - state.i, 5)
 | 
					        context[dist] = min(state.i - state.stack[0], 5)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        context[dist] = 0
 | 
					        context[dist] = 0
 | 
				
			||||||
    context[N0lv] = min(count_left_kids(get_n0(state)), 5)
 | 
					    context[N0lv] = min(count_left_kids(get_n0(state)), 5)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -89,9 +89,9 @@ cdef inline TokenC* get_s2(const State *s) nogil:
 | 
				
			||||||
    # Rely on our padding to ensure we don't go out of bounds here
 | 
					    # Rely on our padding to ensure we don't go out of bounds here
 | 
				
			||||||
    return &s.sent[s.stack[-2]]
 | 
					    return &s.sent[s.stack[-2]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil
 | 
					cdef const TokenC* get_right(const State* s, const TokenC* head, int idx) nogil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) nogil
 | 
					cdef const TokenC* get_left(const State* s, const TokenC* head, int idx) nogil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef inline bint at_eol(const State *s) nogil:
 | 
					cdef inline bint at_eol(const State *s) nogil:
 | 
				
			||||||
    return s.i >= s.sent_len
 | 
					    return s.i >= s.sent_len
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -16,10 +16,8 @@ cdef int add_dep(State *s, int head, int child, int label) except -1:
 | 
				
			||||||
    cdef int dist = head - child
 | 
					    cdef int dist = head - child
 | 
				
			||||||
    s.sent[child].head = dist
 | 
					    s.sent[child].head = dist
 | 
				
			||||||
    s.sent[child].dep = label
 | 
					    s.sent[child].dep = label
 | 
				
			||||||
    # Keep a bit-vector tracking child dependencies.  If a word has a child at
 | 
					 | 
				
			||||||
    # offset i from it, set that bit (tracking left and right separately)
 | 
					 | 
				
			||||||
    if child > head:
 | 
					    if child > head:
 | 
				
			||||||
        s.sent[head].r_kids |= 1 << (-dist)
 | 
					        s.sent[head].r_kids += 1
 | 
				
			||||||
        s.sent[head].r_edge = child - head
 | 
					        s.sent[head].r_edge = child - head
 | 
				
			||||||
        # Walk up the tree, setting right edge
 | 
					        # Walk up the tree, setting right edge
 | 
				
			||||||
        n_iter = 0
 | 
					        n_iter = 0
 | 
				
			||||||
| 
						 | 
					@ -34,7 +32,7 @@ cdef int add_dep(State *s, int head, int child, int label) except -1:
 | 
				
			||||||
                msg = msg % (start, child, tree)
 | 
					                msg = msg % (start, child, tree)
 | 
				
			||||||
                raise Exception(msg)
 | 
					                raise Exception(msg)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        s.sent[head].l_kids |= 1 << dist
 | 
					        s.sent[head].l_kids += 1
 | 
				
			||||||
        s.sent[head].l_edge = (child + s.sent[child].l_edge) - head
 | 
					        s.sent[head].l_edge = (child + s.sent[child].l_edge) - head
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,14 +40,14 @@ cdef int del_dep(State *s, int head, int child) except -1:
 | 
				
			||||||
    cdef const TokenC* next_child
 | 
					    cdef const TokenC* next_child
 | 
				
			||||||
    cdef int dist = head - child
 | 
					    cdef int dist = head - child
 | 
				
			||||||
    if child > head:
 | 
					    if child > head:
 | 
				
			||||||
        s.sent[head].r_kids &= ~(1 << (-dist))
 | 
					        s.sent[head].r_kids -= 1
 | 
				
			||||||
        next_child = get_right(s, &s.sent[head], 1)
 | 
					        next_child = get_right(s, &s.sent[head], 1)
 | 
				
			||||||
        if next_child == NULL:
 | 
					        if next_child == NULL:
 | 
				
			||||||
            s.sent[head].r_edge = 0
 | 
					            s.sent[head].r_edge = 0
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            s.sent[head].r_edge = next_child.r_edge
 | 
					            s.sent[head].r_edge = next_child.r_edge
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        s.sent[head].l_kids &= ~(1 << dist)
 | 
					        s.sent[head].l_kids -= 1
 | 
				
			||||||
        next_child = get_left(s, &s.sent[head], 1)
 | 
					        next_child = get_left(s, &s.sent[head], 1)
 | 
				
			||||||
        if next_child == NULL:
 | 
					        if next_child == NULL:
 | 
				
			||||||
            s.sent[head].l_edge = 0
 | 
					            s.sent[head].l_edge = 0
 | 
				
			||||||
| 
						 | 
					@ -113,36 +111,60 @@ cdef bint has_head(const TokenC* t) nogil:
 | 
				
			||||||
    return t.head != 0
 | 
					    return t.head != 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) nogil:
 | 
					cdef const TokenC* get_left(const State* s, const TokenC* target, int idx) nogil:
 | 
				
			||||||
    cdef uint32_t kids = head.l_kids
 | 
					    if target.l_kids == 0:
 | 
				
			||||||
    if kids == 0:
 | 
					 | 
				
			||||||
        return NULL
 | 
					        return NULL
 | 
				
			||||||
    cdef int offset = _nth_significant_bit(kids, idx)
 | 
					    if idx > target.l_kids:
 | 
				
			||||||
    cdef const TokenC* child = head - offset
 | 
					 | 
				
			||||||
    if child >= s.sent:
 | 
					 | 
				
			||||||
        return child
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return NULL
 | 
					        return NULL
 | 
				
			||||||
 | 
					    if idx < 1:
 | 
				
			||||||
 | 
					        return NULL
 | 
				
			||||||
 | 
					    cdef const TokenC* ptr = s.sent
 | 
				
			||||||
 | 
					    while ptr < target:
 | 
				
			||||||
 | 
					        # If this head is still to the right of us, we can skip to it
 | 
				
			||||||
 | 
					        # No token that's between this token and this head could be our
 | 
				
			||||||
 | 
					        # child.
 | 
				
			||||||
 | 
					        if (ptr.head >= 1) and (ptr + ptr.head) < target:
 | 
				
			||||||
 | 
					            ptr += ptr.head
 | 
				
			||||||
 | 
					        elif ptr + ptr.head == target:
 | 
				
			||||||
 | 
					            idx -= 1
 | 
				
			||||||
 | 
					            if idx == 0:
 | 
				
			||||||
 | 
					                return ptr
 | 
				
			||||||
 | 
					            ptr += 1
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            ptr += 1
 | 
				
			||||||
 | 
					    return NULL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil:
 | 
					cdef const TokenC* get_right(const State* s, const TokenC* target, int idx) nogil:
 | 
				
			||||||
    cdef uint32_t kids = head.r_kids
 | 
					    if target.r_kids == 0:
 | 
				
			||||||
    if kids == 0:
 | 
					 | 
				
			||||||
        return NULL
 | 
					        return NULL
 | 
				
			||||||
    cdef int offset = _nth_significant_bit(kids, idx)
 | 
					    if idx > target.r_kids:
 | 
				
			||||||
    cdef const TokenC* child = head + offset
 | 
					 | 
				
			||||||
    if child < (s.sent + s.sent_len):
 | 
					 | 
				
			||||||
        return child
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return NULL
 | 
					        return NULL
 | 
				
			||||||
 | 
					    if idx < 1:
 | 
				
			||||||
 | 
					        return NULL
 | 
				
			||||||
 | 
					    cdef const TokenC* ptr = s.sent + (s.sent_len - 1)
 | 
				
			||||||
 | 
					    while ptr > target:
 | 
				
			||||||
 | 
					        # If this head is still to the right of us, we can skip to it
 | 
				
			||||||
 | 
					        # No token that's between this token and this head could be our
 | 
				
			||||||
 | 
					        # child.
 | 
				
			||||||
 | 
					        if (ptr.head < 0) and ((ptr + ptr.head) > target):
 | 
				
			||||||
 | 
					            ptr += ptr.head
 | 
				
			||||||
 | 
					        elif ptr + ptr.head == target:
 | 
				
			||||||
 | 
					            idx -= 1
 | 
				
			||||||
 | 
					        if idx == 0:
 | 
				
			||||||
 | 
					            return ptr
 | 
				
			||||||
 | 
					            ptr -= 1
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            ptr -= 1
 | 
				
			||||||
 | 
					    return NULL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef int count_left_kids(const TokenC* head) nogil:
 | 
					cdef int count_left_kids(const TokenC* head) nogil:
 | 
				
			||||||
    return _popcount(head.l_kids)
 | 
					    return head.l_kids
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef int count_right_kids(const TokenC* head) nogil:
 | 
					cdef int count_right_kids(const TokenC* head) nogil:
 | 
				
			||||||
    return _popcount(head.r_kids)
 | 
					    return head.r_kids
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef State* new_state(Pool mem, const TokenC* sent, const int sent_len) except NULL:
 | 
					cdef State* new_state(Pool mem, const TokenC* sent, const int sent_len) except NULL:
 | 
				
			||||||
| 
						 | 
					@ -185,23 +207,3 @@ cdef int copy_state(State* dest, const State* src) except -1:
 | 
				
			||||||
    for i in range(src.ents_len):
 | 
					    for i in range(src.ents_len):
 | 
				
			||||||
        dest.ent[-i] = src.ent[-i]
 | 
					        dest.ent[-i] = src.ent[-i]
 | 
				
			||||||
    dest.ents_len = src.ents_len
 | 
					    dest.ents_len = src.ents_len
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# From https://en.wikipedia.org/wiki/Hamming_weight
 | 
					 | 
				
			||||||
cdef inline uint32_t _popcount(uint32_t x) nogil:
 | 
					 | 
				
			||||||
    """Find number of non-zero bits."""
 | 
					 | 
				
			||||||
    cdef int count = 0
 | 
					 | 
				
			||||||
    while x != 0:
 | 
					 | 
				
			||||||
        x &= x - 1
 | 
					 | 
				
			||||||
        count += 1
 | 
					 | 
				
			||||||
    return count
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cdef inline uint32_t _nth_significant_bit(uint32_t bits, int n) nogil:
 | 
					 | 
				
			||||||
    cdef int i
 | 
					 | 
				
			||||||
    for i in range(32):
 | 
					 | 
				
			||||||
        if bits & (1 << i):
 | 
					 | 
				
			||||||
            n -= 1
 | 
					 | 
				
			||||||
            if n < 1:
 | 
					 | 
				
			||||||
                return i
 | 
					 | 
				
			||||||
    return 0
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user