From 11810be33e6836c5ed803efcf3158587df3e9bd9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 4 Feb 2016 13:04:16 +0100 Subject: [PATCH] * Add Python hooks for is_bracket/is_quote/is_left_punct/is_right_punct --- spacy/lexeme.pyx | 18 ++++++++++++++++++ spacy/tokens/token.pyx | 13 +++++++++++++ 2 files changed, 31 insertions(+) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 845b29314..9a2ffe9a1 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -18,6 +18,7 @@ import numpy from .attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE from .attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP +from .attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT from .attrs cimport IS_OOV @@ -183,6 +184,23 @@ cdef class Lexeme: def __get__(self): return Lexeme.c_check_flag(self.c, IS_SPACE) def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_SPACE, x) + property is_bracket: + def __get__(self): return Lexeme.c_check_flag(self.c, IS_BRACKET) + def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_BRACKET, x) + + property is_quote: + def __get__(self): return Lexeme.c_check_flag(self.c, IS_QUOTE) + def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_QUOTE, x) + + property is_left_punct: + def __get__(self): return Lexeme.c_check_flag(self.c, IS_LEFT_PUNCT) + def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_LEFT_PUNCT, x) + + property is_right_punct: + def __get__(self): return Lexeme.c_check_flag(self.c, IS_RIGHT_PUNCT) + def __set__(self, bint x): Lexeme.c_set_flag(self.c, IS_RIGHT_PUNCT, x) + + property like_url: def __get__(self): return Lexeme.c_check_flag(self.c, LIKE_URL) def __set__(self, bint x): Lexeme.c_set_flag(self.c, LIKE_URL, x) diff --git a/spacy/tokens/token.pyx b/spacy/tokens/token.pyx index 95515b9c3..9334fb466 100644 --- a/spacy/tokens/token.pyx +++ b/spacy/tokens/token.pyx @@ -18,6 +18,7 @@ from ..attrs cimport POS, LEMMA, TAG, DEP from ..parts_of_speech cimport CONJ, PUNCT from ..attrs cimport IS_ALPHA, IS_ASCII, IS_DIGIT, IS_LOWER, IS_PUNCT, IS_SPACE +from ..attrs cimport IS_BRACKET, IS_QUOTE, IS_LEFT_PUNCT, IS_RIGHT_PUNCT from ..attrs cimport IS_TITLE, IS_UPPER, LIKE_URL, LIKE_NUM, LIKE_EMAIL, IS_STOP from ..attrs cimport IS_OOV @@ -362,6 +363,18 @@ cdef class Token: property is_space: def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_SPACE) + + property is_bracket: + def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_BRACKET) + + property is_quote: + def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_QUOTE) + + property is_left_punct: + def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_LEFT_PUNCT) + + property is_right_punct: + def __get__(self): return Lexeme.c_check_flag(self.c.lex, IS_RIGHT_PUNCT) property like_url: def __get__(self): return Lexeme.c_check_flag(self.c.lex, LIKE_URL)