mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Add details on syntax iterators
This commit is contained in:
parent
8a29308d0b
commit
e9816daa6a
|
@ -42,6 +42,7 @@ p
|
|||
+item #[+a("#tokenizer-exceptions") Tokenizer exceptions]
|
||||
+item #[+a("#norm-exceptions") Norm exceptions]
|
||||
+item #[+a("#lex-attrs") Lexical attributes]
|
||||
+item #[+a("#syntax-iterators") Syntax iterators]
|
||||
+item #[+a("#lemmatizer") Lemmatizer]
|
||||
+item #[+a("#tag-map") Tag map]
|
||||
+item #[+a("#morph-rules") Morph rules]
|
||||
|
@ -104,6 +105,13 @@ p
|
|||
+cell dict
|
||||
+cell Attribute ID mapped to function.
|
||||
|
||||
+row
|
||||
+cell #[code SYNTAX_ITERATORS]
|
||||
+cell dict
|
||||
+cell
|
||||
| Iterator ID mapped to function. Currently only supports
|
||||
| #[code 'noun_chunks'].
|
||||
|
||||
+row
|
||||
+cell #[code LOOKUP]
|
||||
+cell dict
|
||||
|
@ -449,6 +457,33 @@ p
|
|||
| #[code lex_attr_getters.update(LEX_ATTRS)], only the new custom functions
|
||||
| are overwritten.
|
||||
|
||||
+h(3, "syntax-iterators") Syntax iterators
|
||||
|
||||
p
|
||||
| Syntax iterators are functions that compute views of a #[code Doc]
|
||||
| object based on its syntax. At the moment, this data is only used for
|
||||
| extracting
|
||||
| #[+a("/docs/usage/dependency-parse#noun-chunks") noun chunks], which
|
||||
| are available as the #[+api("doc#noun_chunks") #[code Doc.noun_chunks]]
|
||||
| property. Because base noun phrases work differently across languages,
|
||||
| the rules to compute them are part of the individual language's data. If
|
||||
| a language does not include a noun chunks iterator, the property won't
|
||||
| be available. For examples, see the existing syntax iterators:
|
||||
|
||||
+aside-code("Noun chunks example").
|
||||
doc = nlp(u'A phrase with another phrase occurs.')
|
||||
chunks = list(doc.noun_chunks)
|
||||
assert chunks[0].text == "A phrase"
|
||||
assert chunks[1].text == "another phrase"
|
||||
|
||||
+table(["Language", "Source"])
|
||||
for lang, lang_id in {en: "English", de: "German", es: "Spanish"}
|
||||
+row
|
||||
+cell=lang
|
||||
+cell
|
||||
+src(gh("spaCy", "spacy/lang/" + lang_id + "/syntax_iterators.py"))
|
||||
| lang/#{lang_id}/syntax_iterators.py
|
||||
|
||||
+h(3, "lemmatizer") Lemmatizer
|
||||
|
||||
p
|
||||
|
|
Loading…
Reference in New Issue
Block a user