diff --git a/website/usage/_data.json b/website/usage/_data.json index 3c37ee4d1..b34304ed6 100644 --- a/website/usage/_data.json +++ b/website/usage/_data.json @@ -167,7 +167,15 @@ "visualizers": { "title": "Visualizers", - "next": "resources" + "tag_new": 2, + "teaser": "Visualize dependencies and entities in your browser and notebook, or export HTML.", + "next": "resources", + "menu": { + "Dependencies": "dep", + "Entities": "ent", + "Jupyter Notebooks": "jupyter", + "Rendering HTML": "html" + } }, "resources": { diff --git a/website/usage/_visualizers/_dep.jade b/website/usage/_visualizers/_dep.jade new file mode 100644 index 000000000..b028ba4cf --- /dev/null +++ b/website/usage/_visualizers/_dep.jade @@ -0,0 +1,62 @@ +//- 💫 DOCS > USAGE > VISUALIZERS > DEPENDENCIES + +p + | The dependency visualizer, #[code dep], shows part-of-speech tags + | and syntactic dependencies. + ++code("Dependency example"). + import spacy + from spacy import displacy + + nlp = spacy.load('en') + doc = nlp(u'This is a sentence.') + displacy.serve(doc, style='dep') + ++codepen("f0e85b64d469d6617251d8241716d55f", 370) + +p + | The argument #[code options] lets you specify a dictionary of settings + | to customise the layout, for example: + ++aside("Important note") + | There's currently a known issue with the #[code compact] mode for + | sentences with short arrows and long dependency labels, that causes labels + | longer than the arrow to wrap. So if you come across this problem, + | especially when using custom labels, you'll have to increase the + | #[code distance] setting in the #[code options] to allow longer arcs. + ++table(["Name", "Type", "Description", "Default"]) + +row + +cell #[code compact] + +cell bool + +cell "Compact mode" with square arrows that takes up less space. + +cell #[code False] + + +row + +cell #[code color] + +cell unicode + +cell Text color (HEX, RGB or color names). + +cell #[code '#000000'] + + +row + +cell #[code bg] + +cell unicode + +cell Background color (HEX, RGB or color names). + +cell #[code '#ffffff'] + + +row + +cell #[code font] + +cell unicode + +cell Font name or font family for all text. + +cell #[code 'Arial'] + +p + | For a list of all available options, see the + | #[+api("displacy#options") #[code displacy] API documentation]. + ++aside-code("Options example"). + options = {'compact': True, 'bg': '#09a3d5', + 'color': 'white', 'font': 'Source Sans Pro'} + displacy.serve(doc, style='dep', options=options) + ++codepen("39c02c893a84794353de77a605d817fd", 360) diff --git a/website/usage/_visualizers/_ent.jade b/website/usage/_visualizers/_ent.jade new file mode 100644 index 000000000..e9174cc55 --- /dev/null +++ b/website/usage/_visualizers/_ent.jade @@ -0,0 +1,80 @@ +//- 💫 DOCS > USAGE > VISUALIZERS > ENTITIES + +p + | The entity visualizer, #[code ent], highlights named entities and + | their labels in a text. + ++code("Named Entity example"). + import spacy + from spacy import displacy + + text = """But Google is starting from behind. The company made a late push + into hardware, and Apple’s Siri, available on iPhones, and Amazon’s Alexa + software, which runs on its Echo and Dot devices, have clear leads in + consumer adoption.""" + + nlp = spacy.load('custom_ner_model') + doc = nlp(text) + displacy.serve(doc, style='ent') + ++codepen("a73f8b68f9af3157855962b283b364e4", 345) + +p The entity visualizer lets you customise the following #[code options]: + ++table(["Name", "Type", "Description", "Default"]) + +row + +cell #[code ents] + +cell list + +cell + | Entity types to highlight (#[code None] for all types). + +cell #[code None] + + +row + +cell #[code colors] + +cell dict + +cell + | Color overrides. Entity types in lowercase should be mapped to + | color names or values. + +cell #[code {}] + +p + | If you specify a list of #[code ents], only those entity types will be + | rendered – for example, you can choose to display #[code PERSON] entities. + | Internally, the visualizer knows nothing about available entity types and + | will render whichever spans and labels it receives. This makes it + | especially easy to work with custom entity types. By default, displaCy + | comes with colours for all + | #[+a("/api/annotation#named-entities") entity types supported by spaCy]. + | If you're using custom entity types, you can use the #[code colors] + | setting to add your own colours for them. + ++aside-code("Options example"). + colors = {'ORG': 'linear-gradient(90deg, #aa9cfc, #fc9ce7)'} + options = {'ents': ['ORG'], 'colors': colors} + displacy.serve(doc, style='ent', options=options) + ++codepen("f42ec690762b6f007022a7acd6d0c7d4", 300) + +p + | The above example uses a little trick: Since the background colour values + | are added as the #[code background] style attribute, you can use any + | #[+a("https://tympanus.net/codrops/css_reference/background/") valid background value] + | or shorthand — including gradients and even images! + ++h(3, "ent-titles") Adding titles to documents + +p + | Rendering several large documents on one page can easily become confusing. + | To add a headline to each visualization, you can add a #[code title] to + | its #[code user_data]. User data is never touched or modified by spaCy. + ++code. + doc = nlp(u'This is a sentence about Google.') + doc.user_data['title'] = 'This is a title' + displacy.serve(doc, style='ent') + +p + | This feature is espeically handy if you're using displaCy to compare + | performance at different stages of a process, e.g. during training. Here + | you could use the title for a brief description of the text example and + | the number of iterations. diff --git a/website/usage/_visualizers/_html.jade b/website/usage/_visualizers/_html.jade new file mode 100644 index 000000000..701d4b683 --- /dev/null +++ b/website/usage/_visualizers/_html.jade @@ -0,0 +1,162 @@ +//- 💫 DOCS > USAGE > VISUALIZERS > HTML + +p + | If you don't need the web server and just want to generate the markup + | – for example, to export it to a file or serve it in a custom + | way – you can use #[+api("displacy#render") #[code displacy.render]]. + | It works the same way, but returns a string containing the markup. + ++code("Example"). + import spacy + from spacy import displacy + + nlp = spacy.load('en') + doc1 = nlp(u'This is a sentence.') + doc2 = nlp(u'This is another sentence.') + html = displacy.render([doc1, doc2], style='dep', page=True) + +p + | #[code page=True] renders the markup wrapped as a full HTML page. + | For minified and more compact HTML markup, you can set #[code minify=True]. + | If you're rendering a dependency parse, you can also export it as an + | #[code .svg] file. + ++aside("What's SVG?") + | Unlike other image formats, the SVG (Scalable Vector Graphics) uses XML + | markup that's easy to manipulate + | #[+a("https://www.smashingmagazine.com/2014/11/styling-and-animating-svgs-with-css/") using CSS] or + | #[+a("https://css-tricks.com/smil-is-dead-long-live-smil-a-guide-to-alternatives-to-smil-features/") JavaScript]. + | Essentially, SVG lets you design with code, which makes it a perfect fit + | for visualizing dependency trees. SVGs can be embedded online in an + | #[code <img>] tag, or inlined in an HTML document. They're also + | pretty easy to #[+a("https://convertio.co/image-converter/") convert]. + ++code. + svg = displacy.render(doc, style='dep') + output_path = Path('/images/sentence.svg') + output_path.open('w', encoding='utf-8').write(svg) + ++infobox("Important note") + | Since each visualization is generated as a separate SVG, exporting + | #[code .svg] files only works if you're rendering #[strong one single doc] + | at a time. (This makes sense – after all, each visualization should be + | a standalone graphic.) So instead of rendering all #[code Doc]s at one, + | loop over them and export them separately. + + ++h(3, "examples-export-svg") Example: Export SVG graphics of dependency parses + ++code("Example"). + import spacy + from spacy import displacy + from pathlib import Path + + nlp = spacy.load('en') + sentences = ["This is an example.", "This is another one."] + for sent in sentences: + doc = nlp(sentence) + svg = displacy.render(doc, style='dep') + file_name = '-'.join([w.text for w in doc if not w.is_punct]) + '.svg' + output_path = Path('/images/' + file_name) + output_path.open('w', encoding='utf-8').write(svg) + +p + | The above code will generate the dependency visualizations and them to + | two files, #[code This-is-an-example.svg] and #[code This-is-another-one.svg]. + + ++h(3, "manual-usage") Rendering data manually + +p + | You can also use displaCy to manually render data. This can be useful if + | you want to visualize output from other libraries, like + | #[+a("http://www.nltk.org") NLTK] or + | #[+a("https://github.com/tensorflow/models/tree/master/syntaxnet") SyntaxNet]. + | Simply convert the dependency parse or recognised entities to displaCy's + | format and set #[code manual=True] on either #[code render()] or + | #[code serve()]. + ++aside-code("Example"). + ex = [{'text': 'But Google is starting from behind.', + 'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}], + 'title': None}] + html = displacy.render(ex, style='ent', manual=True) + ++code("DEP input"). + { + 'words': [ + {'text': 'This', 'tag': 'DT'}, + {'text': 'is', 'tag': 'VBZ'}, + {'text': 'a', 'tag': 'DT'}, + {'text': 'sentence', 'tag': 'NN'}], + 'arcs': [ + {'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'}, + {'start': 2, 'end': 3, 'label': 'det', 'dir': 'left'}, + {'start': 1, 'end': 3, 'label': 'attr', 'dir': 'right'}] + } + ++code("ENT input"). + { + 'text': 'But Google is starting from behind.', + 'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}], + 'title': None + } + ++h(3, "webapp") Using displaCy in a web application + +p + | If you want to use the visualizers as part of a web application, for + | example to create something like our + | #[+a(DEMOS_URL + "/displacy") online demo], it's not recommended to + | simply wrap and serve the displaCy renderer. Instead, you should only + | rely on the server to perform spaCy's processing capabilities, and use + | #[+a(gh("displacy")) displaCy.js] to render the JSON-formatted output. + ++aside("Why not return the HTML by the server?") + | It's certainly possible to just have your server return the markup. + | But outputting raw, unsanitised HTML is risky and makes your app vulnerable to + | #[+a("https://en.wikipedia.org/wiki/Cross-site_scripting") cross-site scripting] + | (XSS). All your user needs to do is find a way to make spaCy return text + | like #[code <script src="malicious-code.js"><script>], which + | is pretty easy in NER mode. Instead of relying on the server to render + | and sanitise HTML, you can do this on the client in JavaScript. + | displaCy.js creates the markup as DOM nodes and will never insert raw + | HTML. + +p + | The #[code parse_deps] function takes a #[code Doc] object and returns + | a dictionary in a format that can be rendered by displaCy. + ++code("Example"). + import spacy + from spacy import displacy + + nlp = spacy.load('en') + + def displacy_service(text): + doc = nlp(text) + return displacy.parse_deps(doc) + +p + | Using a library like #[+a("https://falconframework.org/") Falcon] or + | #[+a("http://www.hug.rest/") Hug], you can easily turn the above code + | into a simple REST API that receives a text and returns a JSON-formatted + | parse. In your front-end, include #[+a(gh("displacy")) displacy.js] and + | initialise it with the API URL and the ID or query selector of the + | container to render the visualisation in, e.g. #[code '#displacy'] for + | #[code <div id="displacy">]. + ++code("script.js", "javascript"). + var displacy = new displaCy('http://localhost:8080', { + container: '#displacy' + }) + + function parse(text) { + displacy.parse(text); + } + +p + | When you call #[code parse()], it will make a request to your API, + | receive the JSON-formatted parse and render it in your container. To + | create an interactive experience, you could trigger this function by + | a button and read the text from an #[code <input>] field. diff --git a/website/usage/_visualizers/_jupyter.jade b/website/usage/_visualizers/_jupyter.jade new file mode 100644 index 000000000..f7227e4d1 --- /dev/null +++ b/website/usage/_visualizers/_jupyter.jade @@ -0,0 +1,36 @@ +//- 💫 DOCS > USAGE > VISUALIZERS > JUPYTER + +p + | displaCy is able to detect whether you're working in a + | #[+a("https://jupyter.org") Jupyter] notebook, and will return markup + | that can be rendered in a cell straight away. When you export your + | notebook, the visualizations will be included as HTML. + ++code("Jupyter Example"). + # don't forget to install a model, e.g.: spacy download en + import spacy + from spacy import displacy + + doc = nlp(u'Rats are various medium-sized, long-tailed rodents.') + displacy.render(doc, style='dep') + + doc2 = nlp(LONG_NEWS_ARTICLE) + displacy.render(doc2, style='ent') + ++aside("Enabling or disabling Jupyter mode") + | To explicitly enable or disable "Jupyter mode", you can use the + | #[code jupyter] keyword argument – e.g. to return raw HTML in a notebook, + | or to force Jupyter rendering if auto-detection fails. + ++image("/assets/img/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook") + +p + | Internally, displaCy imports #[code display] and #[code HTML] from + | #[code IPython.core.display] and returns a Jupyter HTML object. If you + | were doing it manually, it'd look like this: + ++code. + from IPython.core.display import display, HTML + + html = displacy.render(doc, style='dep') + return display(HTML(html)) diff --git a/website/usage/visualizers.jade b/website/usage/visualizers.jade index 39d34aea6..a092404ac 100644 --- a/website/usage/visualizers.jade +++ b/website/usage/visualizers.jade @@ -2,383 +2,47 @@ include ../_includes/_mixins -p - | As of v2.0, our popular visualizers, #[+a(DEMOS_URL + "/displacy") displaCy] - | and #[+a(DEMOS_URL + "/displacy-ent") displaCy #[sup ENT]] are finally an - | official part of the library. Visualizing a dependency parse or named - | entities in a text is not only a fun NLP demo – it can also be incredibly - | helpful in speeding up development and debugging your code and training - | process. Instead of printing a list of dependency labels or entity spans, - | you can simply pass your #[code Doc] objects to #[code displacy] and view - | the visualizations in your browser, or export them as HTML files or - | vector graphics. - -p - | If you're running a #[+a("https://jupyter.org") Jupyter] notebook, - | displaCy will detect this and return the markup in a format - | #[+a("#jupyter") ready to be rendered and exported]. - -+aside("What about the old visualizers?") - | Our JavaScript-based visualizers #[+src(gh("displacy")) #[code displacy.js]] and - | #[+src(gh("displacy-ent")) #[code displacy-ent.js]] will still be available on - | GitHub. If you're looking to implement web-based visualizations, we - | generally recommend using those instead of spaCy's built-in - | #[code displacy] module. It'll allow your application to perform all - | rendering on the client and only rely on the server for the text - | processing. The generated markup is also more compatible with modern web - | standards. - -+h(2, "getting-started") Getting started - +tag-new(2) - -p - | The quickest way visualize #[code Doc] is to use - | #[+api("displacy#serve") #[code displacy.serve]]. This will spin up a - | simple web server and let you view the result straight from your browser. - | displaCy can either take a single #[code Doc] or a list of #[code Doc] - | objects as its first argument. This lets you construct them however you - | like – using any model or modifications you like. - -+h(3, "dep") Visualizing the dependency parse - -p - | The dependency visualizer, #[code dep], shows part-of-speech tags - | and syntactic dependencies. - -+code("Dependency example"). - import spacy - from spacy import displacy - - nlp = spacy.load('en') - doc = nlp(u'This is a sentence.') - displacy.serve(doc, style='dep') - -+codepen("f0e85b64d469d6617251d8241716d55f", 370) - -p - | The argument #[code options] lets you specify a dictionary of settings - | to customise the layout, for example: - -+aside("Important note") - | There's currently a known issue with the #[code compact] mode for - | sentences with short arrows and long dependency labels, that causes labels - | longer than the arrow to wrap. So if you come across this problem, - | especially when using custom labels, you'll have to increase the - | #[code distance] setting in the #[code options] to allow longer arcs. - -+table(["Name", "Type", "Description", "Default"]) - +row - +cell #[code compact] - +cell bool - +cell "Compact mode" with square arrows that takes up less space. - +cell #[code False] - - +row - +cell #[code color] - +cell unicode - +cell Text color (HEX, RGB or color names). - +cell #[code '#000000'] - - +row - +cell #[code bg] - +cell unicode - +cell Background color (HEX, RGB or color names). - +cell #[code '#ffffff'] - - +row - +cell #[code font] - +cell unicode - +cell Font name or font family for all text. - +cell #[code 'Arial'] - -p - | For a list of all available options, see the - | #[+api("displacy#options") #[code displacy] API documentation]. - -+aside-code("Options example"). - options = {'compact': True, 'bg': '#09a3d5', - 'color': 'white', 'font': 'Source Sans Pro'} - displacy.serve(doc, style='dep', options=options) - -+codepen("39c02c893a84794353de77a605d817fd", 360) - -+h(3, "ent") Visualizing the entity recognizer - -p - | The entity visualizer, #[code ent], highlights named entities and - | their labels in a text. - -+code("Named Entity example"). - import spacy - from spacy import displacy - - text = """But Google is starting from behind. The company made a late push - into hardware, and Apple’s Siri, available on iPhones, and Amazon’s Alexa - software, which runs on its Echo and Dot devices, have clear leads in - consumer adoption.""" - - nlp = spacy.load('custom_ner_model') - doc = nlp(text) - displacy.serve(doc, style='ent') - -+codepen("a73f8b68f9af3157855962b283b364e4", 345) - -p The entity visualizer lets you customise the following #[code options]: - -+table(["Name", "Type", "Description", "Default"]) - +row - +cell #[code ents] - +cell list - +cell - | Entity types to highlight (#[code None] for all types). - +cell #[code None] - - +row - +cell #[code colors] - +cell dict - +cell - | Color overrides. Entity types in lowercase should be mapped to - | color names or values. - +cell #[code {}] - -p - | If you specify a list of #[code ents], only those entity types will be - | rendered – for example, you can choose to display #[code PERSON] entities. - | Internally, the visualizer knows nothing about available entity types and - | will render whichever spans and labels it receives. This makes it - | especially easy to work with custom entity types. By default, displaCy - | comes with colours for all - | #[+a("/api/annotation#named-entities") entity types supported by spaCy]. - | If you're using custom entity types, you can use the #[code colors] - | setting to add your own colours for them. - -+aside-code("Options example"). - colors = {'ORG': 'linear-gradient(90deg, #aa9cfc, #fc9ce7)'} - options = {'ents': ['ORG'], 'colors': colors} - displacy.serve(doc, style='ent', options=options) - -+codepen("f42ec690762b6f007022a7acd6d0c7d4", 300) - -p - | The above example uses a little trick: Since the background colour values - | are added as the #[code background] style attribute, you can use any - | #[+a("https://tympanus.net/codrops/css_reference/background/") valid background value] - | or shorthand — including gradients and even images! - -+h(3, "ent-titles") Adding titles to documents - -p - | Rendering several large documents on one page can easily become confusing. - | To add a headline to each visualization, you can add a #[code title] to - | its #[code user_data]. User data is never touched or modified by spaCy. - -+code. - doc = nlp(u'This is a sentence about Google.') - doc.user_data['title'] = 'This is a title' - displacy.serve(doc, style='ent') - -p - | This feature is espeically handy if you're using displaCy to compare - | performance at different stages of a process, e.g. during training. Here - | you could use the title for a brief description of the text example and - | the number of iterations. - -+h(2, "render") Rendering visualizations - -p - | If you don't need the web server and just want to generate the markup - | – for example, to export it to a file or serve it in a custom - | way – you can use #[+api("displacy#render") #[code displacy.render]]. - | It works the same way, but returns a string containing the markup. - -+code("Example"). - import spacy - from spacy import displacy - - nlp = spacy.load('en') - doc1 = nlp(u'This is a sentence.') - doc2 = nlp(u'This is another sentence.') - html = displacy.render([doc1, doc2], style='dep', page=True) - -p - | #[code page=True] renders the markup wrapped as a full HTML page. - | For minified and more compact HTML markup, you can set #[code minify=True]. - | If you're rendering a dependency parse, you can also export it as an - | #[code .svg] file. - -+aside("What's SVG?") - | Unlike other image formats, the SVG (Scalable Vector Graphics) uses XML - | markup that's easy to manipulate - | #[+a("https://www.smashingmagazine.com/2014/11/styling-and-animating-svgs-with-css/") using CSS] or - | #[+a("https://css-tricks.com/smil-is-dead-long-live-smil-a-guide-to-alternatives-to-smil-features/") JavaScript]. - | Essentially, SVG lets you design with code, which makes it a perfect fit - | for visualizing dependency trees. SVGs can be embedded online in an - | #[code <img>] tag, or inlined in an HTML document. They're also - | pretty easy to #[+a("https://convertio.co/image-converter/") convert]. - -+code. - svg = displacy.render(doc, style='dep') - output_path = Path('/images/sentence.svg') - output_path.open('w', encoding='utf-8').write(svg) - -+infobox("Important note") - | Since each visualization is generated as a separate SVG, exporting - | #[code .svg] files only works if you're rendering #[strong one single doc] - | at a time. (This makes sense – after all, each visualization should be - | a standalone graphic.) So instead of rendering all #[code Doc]s at one, - | loop over them and export them separately. - - -+h(3, "examples-export-svg") Example: Export SVG graphics of dependency parses - -+code("Example"). - import spacy - from spacy import displacy - from pathlib import Path - - nlp = spacy.load('en') - sentences = ["This is an example.", "This is another one."] - for sent in sentences: - doc = nlp(sentence) - svg = displacy.render(doc, style='dep') - file_name = '-'.join([w.text for w in doc if not w.is_punct]) + '.svg' - output_path = Path('/images/' + file_name) - output_path.open('w', encoding='utf-8').write(svg) - -p - | The above code will generate the dependency visualizations and them to - | two files, #[code This-is-an-example.svg] and #[code This-is-another-one.svg]. - - -+h(2, "jupyter") Using displaCy in Jupyter notebooks - -p - | displaCy is able to detect whether you're working in a - | #[+a("https://jupyter.org") Jupyter] notebook, and will return markup - | that can be rendered in a cell straight away. When you export your - | notebook, the visualizations will be included as HTML. - -+code("Jupyter Example"). - # don't forget to install a model, e.g.: spacy download en - import spacy - from spacy import displacy - - doc = nlp(u'Rats are various medium-sized, long-tailed rodents.') - displacy.render(doc, style='dep') - - doc2 = nlp(LONG_NEWS_ARTICLE) - displacy.render(doc2, style='ent') - -+aside("Enabling or disabling Jupyter mode") - | To explicitly enable or disable "Jupyter mode", you can use the - | #[code jupyter] keyword argument – e.g. to return raw HTML in a notebook, - | or to force Jupyter rendering if auto-detection fails. - -+image("/assets/img/displacy_jupyter.jpg", 700, false, "Example of using the displaCy dependency and named entity visualizer in a Jupyter notebook") - -p - | Internally, displaCy imports #[code display] and #[code HTML] from - | #[code IPython.core.display] and returns a Jupyter HTML object. If you - | were doing it manually, it'd look like this: - -+code. - from IPython.core.display import display, HTML - - html = displacy.render(doc, style='dep') - return display(HTML(html)) - -+h(2, "manual-usage") Rendering data manually - -p - | You can also use displaCy to manually render data. This can be useful if - | you want to visualize output from other libraries, like - | #[+a("http://www.nltk.org") NLTK] or - | #[+a("https://github.com/tensorflow/models/tree/master/syntaxnet") SyntaxNet]. - | Simply convert the dependency parse or recognised entities to displaCy's - | format and set #[code manual=True] on either #[code render()] or - | #[code serve()]. - -+aside-code("Example"). - ex = [{'text': 'But Google is starting from behind.', - 'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}], - 'title': None}] - html = displacy.render(ex, style='ent', manual=True) - -+code("DEP input"). - { - 'words': [ - {'text': 'This', 'tag': 'DT'}, - {'text': 'is', 'tag': 'VBZ'}, - {'text': 'a', 'tag': 'DT'}, - {'text': 'sentence', 'tag': 'NN'}], - 'arcs': [ - {'start': 0, 'end': 1, 'label': 'nsubj', 'dir': 'left'}, - {'start': 2, 'end': 3, 'label': 'det', 'dir': 'left'}, - {'start': 1, 'end': 3, 'label': 'attr', 'dir': 'right'}] - } - -+code("ENT input"). - { - 'text': 'But Google is starting from behind.', - 'ents': [{'start': 4, 'end': 10, 'label': 'ORG'}], - 'title': None - } - -+h(2, "webapp") Using displaCy in a web application - -p - | If you want to use the visualizers as part of a web application, for - | example to create something like our - | #[+a(DEMOS_URL + "/displacy") online demo], it's not recommended to - | simply wrap and serve the displaCy renderer. Instead, you should only - | rely on the server to perform spaCy's processing capabilities, and use - | #[+a(gh("displacy")) displaCy.js] to render the JSON-formatted output. - -+aside("Why not return the HTML by the server?") - | It's certainly possible to just have your server return the markup. - | But outputting raw, unsanitised HTML is risky and makes your app vulnerable to - | #[+a("https://en.wikipedia.org/wiki/Cross-site_scripting") cross-site scripting] - | (XSS). All your user needs to do is find a way to make spaCy return text - | like #[code <script src="malicious-code.js"><script>], which - | is pretty easy in NER mode. Instead of relying on the server to render - | and sanitise HTML, you can do this on the client in JavaScript. - | displaCy.js creates the markup as DOM nodes and will never insert raw - | HTML. - -p - | The #[code parse_deps] function takes a #[code Doc] object and returns - | a dictionary in a format that can be rendered by displaCy. - -+code("Example"). - import spacy - from spacy import displacy - - nlp = spacy.load('en') - - def displacy_service(text): - doc = nlp(text) - return displacy.parse_deps(doc) - -p - | Using a library like #[+a("https://falconframework.org/") Falcon] or - | #[+a("http://www.hug.rest/") Hug], you can easily turn the above code - | into a simple REST API that receives a text and returns a JSON-formatted - | parse. In your front-end, include #[+a(gh("displacy")) displacy.js] and - | initialise it with the API URL and the ID or query selector of the - | container to render the visualisation in, e.g. #[code '#displacy'] for - | #[code <div id="displacy">]. - -+code("script.js", "javascript"). - var displacy = new displaCy('http://localhost:8080', { - container: '#displacy' - }) - - function parse(text) { - displacy.parse(text); - } - -p - | When you call #[code parse()], it will make a request to your API, - | receive the JSON-formatted parse and render it in your container. To - | create an interactive experience, you could trigger this function by - | a button and read the text from an #[code <input>] field. ++section + p + | As of v2.0, our popular visualizers, #[+a(DEMOS_URL + "/displacy") displaCy] + | and #[+a(DEMOS_URL + "/displacy-ent") displaCy #[sup ENT]] are finally an + | official part of the library. Visualizing a dependency parse or named + | entities in a text is not only a fun NLP demo – it can also be incredibly + | helpful in speeding up development and debugging your code and training + | process. If you're running a #[+a("https://jupyter.org") Jupyter] notebook, + | displaCy will detect this and return the markup in a format + | #[+a("#jupyter") ready to be rendered and exported]. + + +aside("What about the old visualizers?") + | Our JavaScript-based visualizers #[+src(gh("displacy")) #[code displacy.js]] and + | #[+src(gh("displacy-ent")) #[code displacy-ent.js]] will still be available on + | GitHub. If you're looking to implement web-based visualizations, we + | generally recommend using those instead of spaCy's built-in + | #[code displacy] module. It'll allow your application to perform all + | rendering on the client and only rely on the server for the text + | processing. The generated markup is also more compatible with modern web + | standards. + + p + | The quickest way visualize #[code Doc] is to use + | #[+api("displacy#serve") #[code displacy.serve]]. This will spin up a + | simple web server and let you view the result straight from your browser. + | displaCy can either take a single #[code Doc] or a list of #[code Doc] + | objects as its first argument. This lets you construct them however you + | like – using any model or modifications you like. + ++section("dep") + +h(2, "dep") Visualizing the dependency parse + include _visualizers/_dep + ++section("ent") + +h(2, "ent") Visualizing the entity recognizer + include _visualizers/_ent + ++section("jupyter") + +h(2, "jupyter") Using displaCy in Jupyter notebooks + include _visualizers/_jupyter + ++section("html") + +h(2, "html") Rendering HTML + include _visualizers/_html