diff --git a/spacy/tests/lang/en/test_text.py b/spacy/tests/lang/en/test_text.py index 1769f1262..2061a47e3 100644 --- a/spacy/tests/lang/en/test_text.py +++ b/spacy/tests/lang/en/test_text.py @@ -35,7 +35,6 @@ def test_tokenizer_handles_cnts(en_tokenizer, text, length): assert len(tokens) == length - @pytest.mark.parametrize('text,match', [ ('10', True), ('1', True), ('10,000', True), ('10,00', True), ('999.0', True), ('one', True), ('two', True), ('billion', True), diff --git a/spacy/tests/regression/test_issue429.py b/spacy/tests/regression/test_issue429.py index c5dc6989b..53d4dfc4d 100644 --- a/spacy/tests/regression/test_issue429.py +++ b/spacy/tests/regression/test_issue429.py @@ -17,9 +17,8 @@ def test_issue429(EN): doc = EN('a') matcher = Matcher(EN.vocab) - matcher.add('TEST', [{'ORTH': 'a'}], on_match=merge_phrases) + matcher.add('TEST', merge_phrases, [{'ORTH': 'a'}]) doc = EN.make_doc('a b c') - EN.tagger(doc) matcher(doc) EN.entity(doc) diff --git a/website/docs/api/lexeme.jade b/website/docs/api/lexeme.jade index f23d37a94..dba6fdf59 100644 --- a/website/docs/api/lexeme.jade +++ b/website/docs/api/lexeme.jade @@ -4,7 +4,7 @@ include ../../_includes/_mixins p | An entry in the vocabulary. A #[code Lexeme] has no string context – it's - | a word-type, as opposed to a word token. It therefore has no + | a word type, as opposed to a word token. It therefore has no | part-of-speech tag, dependency parse, or lemma (if lemmatization depends | on the part-of-speech tag). diff --git a/website/docs/api/matcher.jade b/website/docs/api/matcher.jade index 5e15f852c..5d0e8af95 100644 --- a/website/docs/api/matcher.jade +++ b/website/docs/api/matcher.jade @@ -52,7 +52,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc]. matcher = Matcher(nlp.vocab) pattern = [{'LOWER': "hello"}, {'LOWER': "world"}] - matcher.add("HelloWorld", on_match=None, pattern) + matcher.add("HelloWorld", None, pattern) doc = nlp(u'hello world!') matches = matcher(doc) diff --git a/website/docs/api/vocab.jade b/website/docs/api/vocab.jade index 1e77a5b41..bd18a17da 100644 --- a/website/docs/api/vocab.jade +++ b/website/docs/api/vocab.jade @@ -3,7 +3,7 @@ include ../../_includes/_mixins p - | A look-up table that allows you to access #[code Lexeme] objects. The + | A lookup table that allows you to access #[code Lexeme] objects. The | #[code Vocab] instance also provides access to the #[code StringStore], | and owns underlying C-data that is shared between #[code Doc] objects. diff --git a/website/docs/usage/adding-languages.jade b/website/docs/usage/adding-languages.jade index ed602f8fa..d1cb1887c 100644 --- a/website/docs/usage/adding-languages.jade +++ b/website/docs/usage/adding-languages.jade @@ -384,8 +384,6 @@ p "ababábites": "ababábite" } -+aside("Where can I find lemmatizer data?") - p | To add a lookup lemmatizer to your language, import the #[code LOOKUP] | table and #[code Lemmatizer], and create a new classmethod: diff --git a/website/docs/usage/rule-based-matching.jade b/website/docs/usage/rule-based-matching.jade index ae9e4d086..a54b70b89 100644 --- a/website/docs/usage/rule-based-matching.jade +++ b/website/docs/usage/rule-based-matching.jade @@ -47,8 +47,8 @@ p nlp = spacy.load('en') matcher = Matcher(nlp.vocab) # add match ID "HelloWorld" with no callback and one pattern - matcher.add('HelloWorld', on_match=None, - [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}]) + pattern = [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}] + matcher.add('HelloWorld', None, pattern) doc = nlp(u'Hello, world! Hello world!') matches = matcher(doc) @@ -61,7 +61,7 @@ p | without punctuation between "hello" and "world": +code. - matcher.add('HelloWorld', on_match=None, + matcher.add('HelloWorld', None, [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}], [{'LOWER': 'hello'}, {'LOWER': 'world'}]) @@ -104,7 +104,7 @@ p match_id, start, end = matches[i] doc.ents += ((EVENT, start, end),) - matcher.add('GoogleIO', on_match=add_event_ent, + matcher.add('GoogleIO', add_event_ent, [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}], [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}]) @@ -127,7 +127,7 @@ p span.merge(is_stop=True) # merge (and mark it as a stop word, just in case) span.set_flag(BAD_HTML_FLAG, True) # set BAD_HTML_FLAG - matcher.add('BAD_HTML', on_match=merge_and_flag, + matcher.add('BAD_HTML', merge_and_flag, [{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}], [{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}]) @@ -283,7 +283,6 @@ p # set manual=True to make displaCy render straight from a dictionary displacy.serve(matched_sents, style='ent', manual=True) - +h(3, "quantifiers-example2") Quantifiers example: Phone numbers p