mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 02:36:32 +03:00
Resolve conflict on test
This commit is contained in:
commit
5418bcf5d7
|
@ -35,7 +35,6 @@ def test_tokenizer_handles_cnts(en_tokenizer, text, length):
|
||||||
assert len(tokens) == length
|
assert len(tokens) == length
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('text,match', [
|
@pytest.mark.parametrize('text,match', [
|
||||||
('10', True), ('1', True), ('10,000', True), ('10,00', True),
|
('10', True), ('1', True), ('10,000', True), ('10,00', True),
|
||||||
('999.0', True), ('one', True), ('two', True), ('billion', True),
|
('999.0', True), ('one', True), ('two', True), ('billion', True),
|
||||||
|
|
|
@ -17,9 +17,8 @@ def test_issue429(EN):
|
||||||
|
|
||||||
doc = EN('a')
|
doc = EN('a')
|
||||||
matcher = Matcher(EN.vocab)
|
matcher = Matcher(EN.vocab)
|
||||||
matcher.add('TEST', [{'ORTH': 'a'}], on_match=merge_phrases)
|
matcher.add('TEST', merge_phrases, [{'ORTH': 'a'}])
|
||||||
doc = EN.make_doc('a b c')
|
doc = EN.make_doc('a b c')
|
||||||
|
|
||||||
EN.tagger(doc)
|
EN.tagger(doc)
|
||||||
matcher(doc)
|
matcher(doc)
|
||||||
EN.entity(doc)
|
EN.entity(doc)
|
||||||
|
|
|
@ -4,7 +4,7 @@ include ../../_includes/_mixins
|
||||||
|
|
||||||
p
|
p
|
||||||
| An entry in the vocabulary. A #[code Lexeme] has no string context – it's
|
| An entry in the vocabulary. A #[code Lexeme] has no string context – it's
|
||||||
| a word-type, as opposed to a word token. It therefore has no
|
| a word type, as opposed to a word token. It therefore has no
|
||||||
| part-of-speech tag, dependency parse, or lemma (if lemmatization depends
|
| part-of-speech tag, dependency parse, or lemma (if lemmatization depends
|
||||||
| on the part-of-speech tag).
|
| on the part-of-speech tag).
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
|
||||||
|
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
pattern = [{'LOWER': "hello"}, {'LOWER': "world"}]
|
pattern = [{'LOWER': "hello"}, {'LOWER': "world"}]
|
||||||
matcher.add("HelloWorld", on_match=None, pattern)
|
matcher.add("HelloWorld", None, pattern)
|
||||||
doc = nlp(u'hello world!')
|
doc = nlp(u'hello world!')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
include ../../_includes/_mixins
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
p
|
p
|
||||||
| A look-up table that allows you to access #[code Lexeme] objects. The
|
| A lookup table that allows you to access #[code Lexeme] objects. The
|
||||||
| #[code Vocab] instance also provides access to the #[code StringStore],
|
| #[code Vocab] instance also provides access to the #[code StringStore],
|
||||||
| and owns underlying C-data that is shared between #[code Doc] objects.
|
| and owns underlying C-data that is shared between #[code Doc] objects.
|
||||||
|
|
||||||
|
|
|
@ -384,8 +384,6 @@ p
|
||||||
"ababábites": "ababábite"
|
"ababábites": "ababábite"
|
||||||
}
|
}
|
||||||
|
|
||||||
+aside("Where can I find lemmatizer data?")
|
|
||||||
|
|
||||||
p
|
p
|
||||||
| To add a lookup lemmatizer to your language, import the #[code LOOKUP]
|
| To add a lookup lemmatizer to your language, import the #[code LOOKUP]
|
||||||
| table and #[code Lemmatizer], and create a new classmethod:
|
| table and #[code Lemmatizer], and create a new classmethod:
|
||||||
|
|
|
@ -47,8 +47,8 @@ p
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
# add match ID "HelloWorld" with no callback and one pattern
|
# add match ID "HelloWorld" with no callback and one pattern
|
||||||
matcher.add('HelloWorld', on_match=None,
|
pattern = [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}]
|
||||||
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}])
|
matcher.add('HelloWorld', None, pattern)
|
||||||
|
|
||||||
doc = nlp(u'Hello, world! Hello world!')
|
doc = nlp(u'Hello, world! Hello world!')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
@ -61,7 +61,7 @@ p
|
||||||
| without punctuation between "hello" and "world":
|
| without punctuation between "hello" and "world":
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
matcher.add('HelloWorld', on_match=None,
|
matcher.add('HelloWorld', None,
|
||||||
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}],
|
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}],
|
||||||
[{'LOWER': 'hello'}, {'LOWER': 'world'}])
|
[{'LOWER': 'hello'}, {'LOWER': 'world'}])
|
||||||
|
|
||||||
|
@ -104,7 +104,7 @@ p
|
||||||
match_id, start, end = matches[i]
|
match_id, start, end = matches[i]
|
||||||
doc.ents += ((EVENT, start, end),)
|
doc.ents += ((EVENT, start, end),)
|
||||||
|
|
||||||
matcher.add('GoogleIO', on_match=add_event_ent,
|
matcher.add('GoogleIO', add_event_ent,
|
||||||
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}],
|
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}],
|
||||||
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}])
|
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}])
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ p
|
||||||
span.merge(is_stop=True) # merge (and mark it as a stop word, just in case)
|
span.merge(is_stop=True) # merge (and mark it as a stop word, just in case)
|
||||||
span.set_flag(BAD_HTML_FLAG, True) # set BAD_HTML_FLAG
|
span.set_flag(BAD_HTML_FLAG, True) # set BAD_HTML_FLAG
|
||||||
|
|
||||||
matcher.add('BAD_HTML', on_match=merge_and_flag,
|
matcher.add('BAD_HTML', merge_and_flag,
|
||||||
[{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}],
|
[{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}],
|
||||||
[{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}])
|
[{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}])
|
||||||
|
|
||||||
|
@ -283,7 +283,6 @@ p
|
||||||
# set manual=True to make displaCy render straight from a dictionary
|
# set manual=True to make displaCy render straight from a dictionary
|
||||||
displacy.serve(matched_sents, style='ent', manual=True)
|
displacy.serve(matched_sents, style='ent', manual=True)
|
||||||
|
|
||||||
|
|
||||||
+h(3, "quantifiers-example2") Quantifiers example: Phone numbers
|
+h(3, "quantifiers-example2") Quantifiers example: Phone numbers
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
Loading…
Reference in New Issue
Block a user