From d7926de1e5a0f436c11cc03221afd8bf92d1a0ee Mon Sep 17 00:00:00 2001 From: Bram Vanroy Date: Wed, 5 Aug 2020 14:34:12 +0200 Subject: [PATCH] Update universe details spacy_conll (#5871) --- website/meta/universe.json | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/website/meta/universe.json b/website/meta/universe.json index a9a402a66..cf361435f 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1795,26 +1795,33 @@ { "id": "spacy-conll", "title": "spacy_conll", - "slogan": "Parse text with spaCy and gets its output in CoNLL-U format", - "description": "This module allows you to parse a text to CoNLL-U format. It contains a pipeline component for spaCy that adds CoNLL-U properties to a Doc and its sentences. It can also be used as a command-line tool.", + "slogan": "Parsing to CoNLL with spaCy, spacy-stanza, and spacy-udpipe", + "description": "This module allows you to parse text into CoNLL-U format. You can use it as a command line tool, or embed it in your own scripts by adding it as a custom pipeline component to a spaCy, spacy-stanfordnlp, spacy-stanza, or spacy-udpipe pipeline. It also provides an easy-to-use function to quickly initialize a parser. CoNLL-related properties are added to Doc elements, sentence Spans, and Tokens.", "code_example": [ - "import spacy", - "from spacy_conll import ConllFormatter", + "from spacy_conll import init_parser", "", - "nlp = spacy.load('en')", - "conllformatter = ConllFormatter(nlp)", - "nlp.add_pipe(conllformatter, after='parser')", - "doc = nlp('I like cookies. Do you?')", - "conll = doc._.conll", - "print(doc._.conll_str_headers)", - "print(doc._.conll_str)" + "", + "# Initialise English parser, already including the ConllFormatter as a pipeline component.", + "# Indicate that we want to get the CoNLL headers in the string output.", + "# `use_gpu` and `verbose` are specific to stanza (and stanfordnlp). These keywords arguments", + "# are passed onto their Pipeline() initialisation", + "nlp = init_parser(\"stanza\",", + " \"en\",", + " parser_opts={\"use_gpu\": True, \"verbose\": False},", + " include_headers=True)", + "# Parse a given string", + "doc = nlp(\"A cookie is a baked or cooked food that is typically small, flat and sweet. It usually contains flour, sugar and some type of oil or fat.\")", + "", + "# Get the CoNLL representation of the whole document, including headers", + "conll = doc._.conll_str", + "print(conll)" ], "code_language": "python", "author": "Bram Vanroy", "author_links": { "github": "BramVanroy", "twitter": "BramVanroy", - "website": "https://bramvanroy.be" + "website": "http://bramvanroy.be" }, "github": "BramVanroy/spacy_conll", "category": ["standalone", "pipeline"],