mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Add note on tags matching tokenization (see #1613)
This commit is contained in:
parent
ac235c0baf
commit
ec08996000
|
@ -30,8 +30,11 @@ TAG_MAP = {
|
|||
'J': {'pos': 'ADJ'}
|
||||
}
|
||||
|
||||
# Usually you'll read this in, of course. Data formats vary.
|
||||
# Ensure your strings are unicode.
|
||||
# Usually you'll read this in, of course. Data formats vary. Ensure your
|
||||
# strings are unicode and that the number of tags assigned matches spaCy's
|
||||
# tokenization. If not, you can always add a 'words' key to the annotations
|
||||
# that specifies the gold-standard tokenization, e.g.:
|
||||
# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'] 'tags': ['V', 'J', 'N']})
|
||||
TRAIN_DATA = [
|
||||
("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
|
||||
("Eat blue ham", {'tags': ['V', 'J', 'N']})
|
||||
|
|
Loading…
Reference in New Issue
Block a user