mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-12 17:22:25 +03:00
Merge remote-tracking branch 'upstream/master' into web-doc-patches
This commit is contained in:
commit
76cee1a1c3
|
@ -89,7 +89,7 @@ p Match a stream of documents, yielding them in turn.
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
for doc in matcher.pipe(texts, batch_size=50, n_threads=4):
|
for doc in matcher.pipe(docs, batch_size=50, n_threads=4):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
|
|
|
@ -88,8 +88,8 @@ p
|
||||||
+cell The exact verbatim text of a token.
|
+cell The exact verbatim text of a token.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell.u-nowrap #[code LOWER], #[code UPPER]
|
+cell.u-nowrap #[code LOWER]
|
||||||
+cell The lowercase, uppercase form of the token text.
|
+cell The lowercase form of the token text.
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell.u-nowrap #[code IS_ALPHA], #[code IS_ASCII], #[code IS_DIGIT]
|
+cell.u-nowrap #[code IS_ALPHA], #[code IS_ASCII], #[code IS_DIGIT]
|
||||||
|
@ -161,7 +161,7 @@ p
|
||||||
|
|
||||||
p
|
p
|
||||||
| The #[code +] and #[code *] operators are usually interpretted
|
| The #[code +] and #[code *] operators are usually interpretted
|
||||||
| "greedily", i.e. longer matches are returned where possible.
|
| "greedily", i.e. longer matches are returned where possible.
|
||||||
|
|
||||||
+h(3, "adding-phrase-patterns") Adding phrase patterns
|
+h(3, "adding-phrase-patterns") Adding phrase patterns
|
||||||
|
|
||||||
|
@ -222,8 +222,8 @@ p
|
||||||
doc.ents += ((EVENT, start, end),)
|
doc.ents += ((EVENT, start, end),)
|
||||||
|
|
||||||
matcher.add('GoogleIO', add_event_ent,
|
matcher.add('GoogleIO', add_event_ent,
|
||||||
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}],
|
[{'ORTH': 'Google'}, {'ORTH': 'I'}, {'ORTH': '/'}, {'ORTH': 'O'}],
|
||||||
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}])
|
[{'ORTH': 'Google'}, {'ORTH': 'I'}, {'ORTH': '/'}, {'ORTH': 'O'}, {'IS_DIGIT': True}])
|
||||||
|
|
||||||
p
|
p
|
||||||
| In addition to mentions of "Google I/O", your data also contains some
|
| In addition to mentions of "Google I/O", your data also contains some
|
||||||
|
|
|
@ -231,7 +231,7 @@ p
|
||||||
def set_sentiment(matcher, doc, i, matches):
|
def set_sentiment(matcher, doc, i, matches):
|
||||||
doc.sentiment += 0.1
|
doc.sentiment += 0.1
|
||||||
|
|
||||||
pattern1 = [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}]
|
pattern1 = [{'ORTH': 'Google'}, {'ORTH': 'I'}, {'ORTH': '/'}, {'ORTH': 'O'}]
|
||||||
pattern2 = [[{'ORTH': emoji, 'OP': '+'}] for emoji in ['😀', '😂', '🤣', '😍']]
|
pattern2 = [[{'ORTH': emoji, 'OP': '+'}] for emoji in ['😀', '😂', '🤣', '😍']]
|
||||||
matcher.add('GoogleIO', None, pattern1) # match "Google I/O" or "Google i/o"
|
matcher.add('GoogleIO', None, pattern1) # match "Google I/O" or "Google i/o"
|
||||||
matcher.add('HAPPY', set_sentiment, *pattern2) # match one or more happy emoji
|
matcher.add('HAPPY', set_sentiment, *pattern2) # match one or more happy emoji
|
||||||
|
|
|
@ -127,7 +127,7 @@ p
|
||||||
| #[+api("pipe") #[code Pipe]], fully trainable and serializable,
|
| #[+api("pipe") #[code Pipe]], fully trainable and serializable,
|
||||||
| and follow the same API. Instead of updating the model and telling
|
| and follow the same API. Instead of updating the model and telling
|
||||||
| spaCy when to #[em stop], you can now explicitly call
|
| spaCy when to #[em stop], you can now explicitly call
|
||||||
| #[+api("language#begin_training") #[code begin_taining]], which
|
| #[+api("language#begin_training") #[code begin_training]], which
|
||||||
| returns an optimizer you can pass into the
|
| returns an optimizer you can pass into the
|
||||||
| #[+api("language#update") #[code update]] function. While #[code update]
|
| #[+api("language#update") #[code update]] function. While #[code update]
|
||||||
| still accepts sequences of #[code Doc] and #[code GoldParse] objects,
|
| still accepts sequences of #[code Doc] and #[code GoldParse] objects,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user