mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Add Streamlit example [ci skip]
This commit is contained in:
parent
38b6e69389
commit
867e93aae2
153
examples/streamlit_spacy.py
Normal file
153
examples/streamlit_spacy.py
Normal file
|
@ -0,0 +1,153 @@
|
|||
# coding: utf-8
|
||||
"""
|
||||
Example of a Streamlit app for an interactive spaCy model visualizer. You can
|
||||
either download the script, or point streamlit run to the raw URL of this
|
||||
file. For more details, see https://streamlit.io.
|
||||
|
||||
Installation:
|
||||
pip install streamlit
|
||||
python -m spacy download en_core_web_sm
|
||||
python -m spacy download en_core_web_md
|
||||
python -m spacy download de_core_news_sm
|
||||
|
||||
Usage:
|
||||
streamlit run streamlit_spacy.py
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import streamlit as st
|
||||
import spacy
|
||||
from spacy import displacy
|
||||
import pandas as pd
|
||||
|
||||
|
||||
SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md", "de_core_news_sm"]
|
||||
DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook."
|
||||
HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
|
||||
|
||||
|
||||
@st.cache(ignore_hash=True)
|
||||
def load_model(name):
|
||||
return spacy.load(name)
|
||||
|
||||
|
||||
@st.cache(ignore_hash=True)
|
||||
def process_text(model_name, text):
|
||||
nlp = load_model(model_name)
|
||||
return nlp(text)
|
||||
|
||||
|
||||
st.sidebar.title("Interactive spaCy visualizer")
|
||||
st.sidebar.markdown(
|
||||
"""
|
||||
Process text with [spaCy](https://spacy.io) models and visualize named entities,
|
||||
dependencies and more. Uses spaCy's built-in
|
||||
[displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.
|
||||
"""
|
||||
)
|
||||
|
||||
spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
|
||||
model_load_state = st.info(f"Loading model '{spacy_model}'...")
|
||||
nlp = load_model(spacy_model)
|
||||
model_load_state.empty()
|
||||
|
||||
text = st.text_area("Text to analyze", DEFAULT_TEXT)
|
||||
doc = process_text(spacy_model, text)
|
||||
|
||||
if "parser" in nlp.pipe_names:
|
||||
st.header("Dependency Parse & Part-of-speech tags")
|
||||
st.sidebar.header("Dependency Parse")
|
||||
split_sents = st.sidebar.checkbox("Split sentences", value=True)
|
||||
collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
|
||||
collapse_phrases = st.sidebar.checkbox("Collapse phrases")
|
||||
compact = st.sidebar.checkbox("Compact mode")
|
||||
options = {
|
||||
"collapse_punct": collapse_punct,
|
||||
"collapse_phrases": collapse_phrases,
|
||||
"compact": compact,
|
||||
}
|
||||
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
|
||||
for sent in docs:
|
||||
html = displacy.render(sent, options=options)
|
||||
# Double newlines seem to mess with the rendering
|
||||
html = html.replace("\n\n", "\n")
|
||||
if split_sents and len(docs) > 1:
|
||||
st.markdown(f"> {sent.text}")
|
||||
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
|
||||
|
||||
if "ner" in nlp.pipe_names:
|
||||
st.header("Named Entities")
|
||||
st.sidebar.header("Named Entities")
|
||||
default_labels = ["PERSON", "ORG", "GPE", "LOC"]
|
||||
labels = st.sidebar.multiselect(
|
||||
"Entity labels", nlp.get_pipe("ner").labels, default_labels
|
||||
)
|
||||
html = displacy.render(doc, style="ent", options={"ents": labels})
|
||||
# Newlines seem to mess with the rendering
|
||||
html = html.replace("\n", " ")
|
||||
st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
|
||||
attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
|
||||
if "entity_linker" in nlp.pipe_names:
|
||||
attrs.append("kb_id_")
|
||||
data = [
|
||||
[str(getattr(ent, attr)) for attr in attrs]
|
||||
for ent in doc.ents
|
||||
if ent.label_ in labels
|
||||
]
|
||||
df = pd.DataFrame(data, columns=attrs)
|
||||
st.dataframe(df)
|
||||
|
||||
|
||||
if "textcat" in nlp.pipe_names:
|
||||
st.header("Text Classification")
|
||||
st.markdown(f"> {text}")
|
||||
df = pd.DataFrame(doc.cats.items(), columns=("Label", "Score"))
|
||||
st.dataframe(df)
|
||||
|
||||
|
||||
vector_size = nlp.meta.get("vectors", {}).get("width", 0)
|
||||
if vector_size:
|
||||
st.header("Vectors & Similarity")
|
||||
st.code(nlp.meta["vectors"])
|
||||
text1 = st.text_input("Text or word 1", "apple")
|
||||
text2 = st.text_input("Text or word 2", "orange")
|
||||
doc1 = process_text(spacy_model, text1)
|
||||
doc2 = process_text(spacy_model, text2)
|
||||
similarity = doc1.similarity(doc2)
|
||||
if similarity > 0.5:
|
||||
st.success(similarity)
|
||||
else:
|
||||
st.error(similarity)
|
||||
|
||||
st.header("Token attributes")
|
||||
|
||||
if st.button("Show token attributes"):
|
||||
attrs = [
|
||||
"idx",
|
||||
"text",
|
||||
"lemma_",
|
||||
"pos_",
|
||||
"tag_",
|
||||
"dep_",
|
||||
"head",
|
||||
"ent_type_",
|
||||
"ent_iob_",
|
||||
"shape_",
|
||||
"is_alpha",
|
||||
"is_ascii",
|
||||
"is_digit",
|
||||
"is_punct",
|
||||
"like_num",
|
||||
]
|
||||
data = [[str(getattr(token, attr)) for attr in attrs] for token in doc]
|
||||
df = pd.DataFrame(data, columns=attrs)
|
||||
st.dataframe(df)
|
||||
|
||||
|
||||
st.header("JSON Doc")
|
||||
if st.button("Show JSON Doc"):
|
||||
st.json(doc.to_json())
|
||||
|
||||
st.header("JSON model meta")
|
||||
if st.button("Show JSON model meta"):
|
||||
st.json(nlp.meta)
|
Loading…
Reference in New Issue
Block a user