From e66d45bf663996eedf8dcc5a034da2c171c28b32 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 6 Feb 2016 13:37:41 +0100 Subject: [PATCH] * Restore previous patch to Span.root, as it seems it wasn't the cause of the problem. --- spacy/tests/website/test_api.py | 2 +- spacy/tokens/span.pyx | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/spacy/tests/website/test_api.py b/spacy/tests/website/test_api.py index fc5eb3c7a..6a7379d87 100644 --- a/spacy/tests/website/test_api.py +++ b/spacy/tests/website/test_api.py @@ -150,7 +150,7 @@ def test_example_i_like_new_york5(toks, autumn, dot): assert toks[autumn].head.orth_ == 'in' assert toks[dot].head.orth_ == 'like' autumn_dot = toks[autumn:] - assert autumn_dot.root.orth_ == '.' + assert autumn_dot.root.orth_ == 'Autumn' def test_navigating_the_parse_tree_lefts(doc): diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 82501dbc5..801c98523 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -241,6 +241,12 @@ cdef class Span: cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1: + # Don't allow spaces to be the root, if there are + # better candidates + if Lexeme.c_check_flag(token.lex, IS_SPACE) and token.l_kids == 0 and token.r_kids == 0: + return sent_length-1 + if Lexeme.c_check_flag(token.lex, IS_PUNCT) and token.l_kids == 0 and token.r_kids == 0: + return sent_length-1 cdef int n = 0 while token.head != 0: token += token.head