mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix Custom Tokenizer docs
- Fix mismatched quotations - Make it more clear where ORTH, LEMMA, and POS symbols come from - Make strings consistent - Fix lemma_ assertion s/-PRON-/me/
This commit is contained in:
parent
dbe8dafb52
commit
7ec710af0e
|
@ -26,6 +26,9 @@ p
|
||||||
| #[+api("tokenizer") #[code Tokenizer]] instance:
|
| #[+api("tokenizer") #[code Tokenizer]] instance:
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
|
import spacy
|
||||||
|
from spacy.symbols import ORTH, LEMMA, POS
|
||||||
|
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
assert [w.text for w in nlp(u'gimme that')] == [u'gimme', u'that']
|
assert [w.text for w in nlp(u'gimme that')] == [u'gimme', u'that']
|
||||||
nlp.tokenizer.add_special_case(u'gimme',
|
nlp.tokenizer.add_special_case(u'gimme',
|
||||||
|
@ -37,7 +40,7 @@ p
|
||||||
{
|
{
|
||||||
ORTH: u'me'}])
|
ORTH: u'me'}])
|
||||||
assert [w.text for w in nlp(u'gimme that')] == [u'gim', u'me', u'that']
|
assert [w.text for w in nlp(u'gimme that')] == [u'gim', u'me', u'that']
|
||||||
assert [w.lemma_ for w in nlp(u'gimme that')] == [u'give', u'-PRON-', u'that']
|
assert [w.lemma_ for w in nlp(u'gimme that')] == [u'give', u'me', u'that']
|
||||||
|
|
||||||
p
|
p
|
||||||
| The special case doesn't have to match an entire whitespace-delimited
|
| The special case doesn't have to match an entire whitespace-delimited
|
||||||
|
@ -52,9 +55,9 @@ p
|
||||||
| The special case rules have precedence over the punctuation splitting:
|
| The special case rules have precedence over the punctuation splitting:
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
nlp.tokenizer.add_special_case(u"...gimme...?",
|
nlp.tokenizer.add_special_case(u'...gimme...?',
|
||||||
[{
|
[{
|
||||||
ORTH: u'...gimme...?", LEMMA: "give", TAG: "VB"}])
|
ORTH: u'...gimme...?', LEMMA: u'give', TAG: u'VB'}])
|
||||||
assert len(nlp(u'...gimme...?')) == 1
|
assert len(nlp(u'...gimme...?')) == 1
|
||||||
|
|
||||||
p
|
p
|
||||||
|
|
Loading…
Reference in New Issue
Block a user