mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Resolve conflict on test
This commit is contained in:
		
						commit
						5418bcf5d7
					
				|  | @ -35,7 +35,6 @@ def test_tokenizer_handles_cnts(en_tokenizer, text, length): | ||||||
|     assert len(tokens) == length |     assert len(tokens) == length | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| @pytest.mark.parametrize('text,match', [ | @pytest.mark.parametrize('text,match', [ | ||||||
|     ('10', True), ('1', True), ('10,000', True), ('10,00', True), |     ('10', True), ('1', True), ('10,000', True), ('10,00', True), | ||||||
|     ('999.0', True), ('one', True), ('two', True), ('billion', True), |     ('999.0', True), ('one', True), ('two', True), ('billion', True), | ||||||
|  |  | ||||||
|  | @ -17,9 +17,8 @@ def test_issue429(EN): | ||||||
| 
 | 
 | ||||||
|     doc = EN('a') |     doc = EN('a') | ||||||
|     matcher = Matcher(EN.vocab) |     matcher = Matcher(EN.vocab) | ||||||
|     matcher.add('TEST', [{'ORTH': 'a'}], on_match=merge_phrases) |     matcher.add('TEST', merge_phrases, [{'ORTH': 'a'}]) | ||||||
|     doc = EN.make_doc('a b c') |     doc = EN.make_doc('a b c') | ||||||
| 
 |  | ||||||
|     EN.tagger(doc) |     EN.tagger(doc) | ||||||
|     matcher(doc) |     matcher(doc) | ||||||
|     EN.entity(doc) |     EN.entity(doc) | ||||||
|  |  | ||||||
|  | @ -4,7 +4,7 @@ include ../../_includes/_mixins | ||||||
| 
 | 
 | ||||||
| p | p | ||||||
|     |  An entry in the vocabulary. A #[code Lexeme] has no string context – it's |     |  An entry in the vocabulary. A #[code Lexeme] has no string context – it's | ||||||
|     |  a word-type, as opposed to a word token. It therefore has no |     |  a word type, as opposed to a word token. It therefore has no | ||||||
|     |  part-of-speech tag, dependency parse, or lemma (if lemmatization depends |     |  part-of-speech tag, dependency parse, or lemma (if lemmatization depends | ||||||
|     |  on the part-of-speech tag). |     |  on the part-of-speech tag). | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -52,7 +52,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc]. | ||||||
| 
 | 
 | ||||||
|     matcher = Matcher(nlp.vocab) |     matcher = Matcher(nlp.vocab) | ||||||
|     pattern = [{'LOWER': "hello"}, {'LOWER': "world"}] |     pattern = [{'LOWER': "hello"}, {'LOWER': "world"}] | ||||||
|     matcher.add("HelloWorld", on_match=None, pattern) |     matcher.add("HelloWorld", None, pattern) | ||||||
|     doc = nlp(u'hello world!') |     doc = nlp(u'hello world!') | ||||||
|     matches = matcher(doc) |     matches = matcher(doc) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -3,7 +3,7 @@ | ||||||
| include ../../_includes/_mixins | include ../../_includes/_mixins | ||||||
| 
 | 
 | ||||||
| p | p | ||||||
|     |  A look-up table that allows you to access #[code Lexeme] objects. The |     |  A lookup table that allows you to access #[code Lexeme] objects. The | ||||||
|     |  #[code Vocab] instance also provides access to the #[code StringStore], |     |  #[code Vocab] instance also provides access to the #[code StringStore], | ||||||
|     |  and owns underlying C-data that is shared between #[code Doc] objects. |     |  and owns underlying C-data that is shared between #[code Doc] objects. | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -384,8 +384,6 @@ p | ||||||
|         "ababábites": "ababábite" |         "ababábites": "ababábite" | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| +aside("Where can I find lemmatizer data?") |  | ||||||
| 
 |  | ||||||
| p | p | ||||||
|     |  To add a lookup lemmatizer to your language, import the #[code LOOKUP] |     |  To add a lookup lemmatizer to your language, import the #[code LOOKUP] | ||||||
|     |  table and #[code Lemmatizer], and create a new classmethod: |     |  table and #[code Lemmatizer], and create a new classmethod: | ||||||
|  |  | ||||||
|  | @ -47,8 +47,8 @@ p | ||||||
|     nlp = spacy.load('en') |     nlp = spacy.load('en') | ||||||
|     matcher = Matcher(nlp.vocab) |     matcher = Matcher(nlp.vocab) | ||||||
|     # add match ID "HelloWorld" with no callback and one pattern |     # add match ID "HelloWorld" with no callback and one pattern | ||||||
|     matcher.add('HelloWorld', on_match=None, |     pattern = [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}] | ||||||
|                 [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}]) |     matcher.add('HelloWorld', None, pattern) | ||||||
| 
 | 
 | ||||||
|     doc = nlp(u'Hello, world! Hello world!') |     doc = nlp(u'Hello, world! Hello world!') | ||||||
|     matches = matcher(doc) |     matches = matcher(doc) | ||||||
|  | @ -61,7 +61,7 @@ p | ||||||
|     |  without punctuation between "hello" and "world": |     |  without punctuation between "hello" and "world": | ||||||
| 
 | 
 | ||||||
| +code. | +code. | ||||||
|     matcher.add('HelloWorld', on_match=None, |     matcher.add('HelloWorld', None, | ||||||
|                 [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}], |                 [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}], | ||||||
|                 [{'LOWER': 'hello'}, {'LOWER': 'world'}]) |                 [{'LOWER': 'hello'}, {'LOWER': 'world'}]) | ||||||
| 
 | 
 | ||||||
|  | @ -104,7 +104,7 @@ p | ||||||
|         match_id, start, end = matches[i] |         match_id, start, end = matches[i] | ||||||
|         doc.ents += ((EVENT, start, end),) |         doc.ents += ((EVENT, start, end),) | ||||||
| 
 | 
 | ||||||
|     matcher.add('GoogleIO', on_match=add_event_ent, |     matcher.add('GoogleIO', add_event_ent, | ||||||
|                 [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}], |                 [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}], | ||||||
|                 [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}]) |                 [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}]) | ||||||
| 
 | 
 | ||||||
|  | @ -127,7 +127,7 @@ p | ||||||
|         span.merge(is_stop=True) # merge (and mark it as a stop word, just in case) |         span.merge(is_stop=True) # merge (and mark it as a stop word, just in case) | ||||||
|         span.set_flag(BAD_HTML_FLAG, True) # set BAD_HTML_FLAG |         span.set_flag(BAD_HTML_FLAG, True) # set BAD_HTML_FLAG | ||||||
| 
 | 
 | ||||||
|     matcher.add('BAD_HTML', on_match=merge_and_flag, |     matcher.add('BAD_HTML', merge_and_flag, | ||||||
|                 [{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}], |                 [{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}], | ||||||
|                 [{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}]) |                 [{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}]) | ||||||
| 
 | 
 | ||||||
|  | @ -283,7 +283,6 @@ p | ||||||
|     # set manual=True to make displaCy render straight from a dictionary |     # set manual=True to make displaCy render straight from a dictionary | ||||||
|     displacy.serve(matched_sents, style='ent', manual=True) |     displacy.serve(matched_sents, style='ent', manual=True) | ||||||
| 
 | 
 | ||||||
| 
 |  | ||||||
| +h(3, "quantifiers-example2") Quantifiers example: Phone numbers | +h(3, "quantifiers-example2") Quantifiers example: Phone numbers | ||||||
| 
 | 
 | ||||||
| p | p | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user