From cc4e395927692989d2da1cbac5d1de0e9d19cc1c Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Fri, 17 Apr 2015 04:44:24 +0200 Subject: [PATCH] * Add some ad hoc regexes, for multi-word location prepositions --- spacy/en/regexes.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/spacy/en/regexes.py b/spacy/en/regexes.py index 89f16d7f5..98e745239 100644 --- a/spacy/en/regexes.py +++ b/spacy/en/regexes.py @@ -17,6 +17,7 @@ _mw_prepositions = [ 'on our way', 'on their way to', 'on their way', + 'along the route from' ] @@ -29,6 +30,11 @@ TIME_RE = re.compile( one_two_digits=r'[0-2]?[0-9]', am_pm=r'[ap]\.?m\.?')) +DATE_RE = re.compile( + '(?:this|last|next|the) (?:week|weekend|{days})'.format( + days='Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday' + )) + MONEY_RE = re.compile('\$\d+(?:\.\d+)?|\d+ dollars(?: \d+ cents)?') @@ -37,4 +43,5 @@ DAYS_RE = re.compile('Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday') REGEXES = [('IN', 'O', MW_PREPOSITIONS_RE), ('CD', 'TIME', TIME_RE), + ('NNP', 'DATE', DATE_RE), ('NNP', 'DATE', DAYS_RE), ('CD', 'MONEY', MONEY_RE)]