diff --git a/spacy/tests/website/test_api.py b/spacy/tests/website/test_api.py index fc5eb3c7a..6a7379d87 100644 --- a/spacy/tests/website/test_api.py +++ b/spacy/tests/website/test_api.py @@ -150,7 +150,7 @@ def test_example_i_like_new_york5(toks, autumn, dot): assert toks[autumn].head.orth_ == 'in' assert toks[dot].head.orth_ == 'like' autumn_dot = toks[autumn:] - assert autumn_dot.root.orth_ == '.' + assert autumn_dot.root.orth_ == 'Autumn' def test_navigating_the_parse_tree_lefts(doc): diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 82501dbc5..801c98523 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -241,6 +241,12 @@ cdef class Span: cdef int _count_words_to_root(const TokenC* token, int sent_length) except -1: + # Don't allow spaces to be the root, if there are + # better candidates + if Lexeme.c_check_flag(token.lex, IS_SPACE) and token.l_kids == 0 and token.r_kids == 0: + return sent_length-1 + if Lexeme.c_check_flag(token.lex, IS_PUNCT) and token.l_kids == 0 and token.r_kids == 0: + return sent_length-1 cdef int n = 0 while token.head != 0: token += token.head