mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	* Update streamlit app [ci skip] * Add all labels by default * Tidy up and auto-format Co-authored-by: Ines Montani <ines@ines.io>
		
			
				
	
	
		
			154 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			154 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # coding: utf-8
 | |
| """
 | |
| Example of a Streamlit app for an interactive spaCy model visualizer. You can
 | |
| either download the script, or point streamlit run to the raw URL of this
 | |
| file. For more details, see https://streamlit.io.
 | |
| 
 | |
| Installation:
 | |
| pip install streamlit
 | |
| python -m spacy download en_core_web_sm
 | |
| python -m spacy download en_core_web_md
 | |
| python -m spacy download de_core_news_sm
 | |
| 
 | |
| Usage:
 | |
| streamlit run streamlit_spacy.py
 | |
| """
 | |
| from __future__ import unicode_literals
 | |
| 
 | |
| import streamlit as st
 | |
| import spacy
 | |
| from spacy import displacy
 | |
| import pandas as pd
 | |
| 
 | |
| 
 | |
| SPACY_MODEL_NAMES = ["en_core_web_sm", "en_core_web_md", "de_core_news_sm"]
 | |
| DEFAULT_TEXT = "Mark Zuckerberg is the CEO of Facebook."
 | |
| HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
 | |
| 
 | |
| 
 | |
| @st.cache(allow_output_mutation=True)
 | |
| def load_model(name):
 | |
|     return spacy.load(name)
 | |
| 
 | |
| 
 | |
| @st.cache(allow_output_mutation=True)
 | |
| def process_text(model_name, text):
 | |
|     nlp = load_model(model_name)
 | |
|     return nlp(text)
 | |
| 
 | |
| 
 | |
| st.sidebar.title("Interactive spaCy visualizer")
 | |
| st.sidebar.markdown(
 | |
|     """
 | |
| Process text with [spaCy](https://spacy.io) models and visualize named entities,
 | |
| dependencies and more. Uses spaCy's built-in
 | |
| [displaCy](http://spacy.io/usage/visualizers) visualizer under the hood.
 | |
| """
 | |
| )
 | |
| 
 | |
| spacy_model = st.sidebar.selectbox("Model name", SPACY_MODEL_NAMES)
 | |
| model_load_state = st.info(f"Loading model '{spacy_model}'...")
 | |
| nlp = load_model(spacy_model)
 | |
| model_load_state.empty()
 | |
| 
 | |
| text = st.text_area("Text to analyze", DEFAULT_TEXT)
 | |
| doc = process_text(spacy_model, text)
 | |
| 
 | |
| if "parser" in nlp.pipe_names:
 | |
|     st.header("Dependency Parse & Part-of-speech tags")
 | |
|     st.sidebar.header("Dependency Parse")
 | |
|     split_sents = st.sidebar.checkbox("Split sentences", value=True)
 | |
|     collapse_punct = st.sidebar.checkbox("Collapse punctuation", value=True)
 | |
|     collapse_phrases = st.sidebar.checkbox("Collapse phrases")
 | |
|     compact = st.sidebar.checkbox("Compact mode")
 | |
|     options = {
 | |
|         "collapse_punct": collapse_punct,
 | |
|         "collapse_phrases": collapse_phrases,
 | |
|         "compact": compact,
 | |
|     }
 | |
|     docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
 | |
|     for sent in docs:
 | |
|         html = displacy.render(sent, options=options)
 | |
|         # Double newlines seem to mess with the rendering
 | |
|         html = html.replace("\n\n", "\n")
 | |
|         if split_sents and len(docs) > 1:
 | |
|             st.markdown(f"> {sent.text}")
 | |
|         st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
 | |
| 
 | |
| if "ner" in nlp.pipe_names:
 | |
|     st.header("Named Entities")
 | |
|     st.sidebar.header("Named Entities")
 | |
|     label_set = nlp.get_pipe("ner").labels
 | |
|     labels = st.sidebar.multiselect(
 | |
|         "Entity labels", options=label_set, default=list(label_set)
 | |
|     )
 | |
|     html = displacy.render(doc, style="ent", options={"ents": labels})
 | |
|     # Newlines seem to mess with the rendering
 | |
|     html = html.replace("\n", " ")
 | |
|     st.write(HTML_WRAPPER.format(html), unsafe_allow_html=True)
 | |
|     attrs = ["text", "label_", "start", "end", "start_char", "end_char"]
 | |
|     if "entity_linker" in nlp.pipe_names:
 | |
|         attrs.append("kb_id_")
 | |
|     data = [
 | |
|         [str(getattr(ent, attr)) for attr in attrs]
 | |
|         for ent in doc.ents
 | |
|         if ent.label_ in labels
 | |
|     ]
 | |
|     df = pd.DataFrame(data, columns=attrs)
 | |
|     st.dataframe(df)
 | |
| 
 | |
| 
 | |
| if "textcat" in nlp.pipe_names:
 | |
|     st.header("Text Classification")
 | |
|     st.markdown(f"> {text}")
 | |
|     df = pd.DataFrame(doc.cats.items(), columns=("Label", "Score"))
 | |
|     st.dataframe(df)
 | |
| 
 | |
| 
 | |
| vector_size = nlp.meta.get("vectors", {}).get("width", 0)
 | |
| if vector_size:
 | |
|     st.header("Vectors & Similarity")
 | |
|     st.code(nlp.meta["vectors"])
 | |
|     text1 = st.text_input("Text or word 1", "apple")
 | |
|     text2 = st.text_input("Text or word 2", "orange")
 | |
|     doc1 = process_text(spacy_model, text1)
 | |
|     doc2 = process_text(spacy_model, text2)
 | |
|     similarity = doc1.similarity(doc2)
 | |
|     if similarity > 0.5:
 | |
|         st.success(similarity)
 | |
|     else:
 | |
|         st.error(similarity)
 | |
| 
 | |
| st.header("Token attributes")
 | |
| 
 | |
| if st.button("Show token attributes"):
 | |
|     attrs = [
 | |
|         "idx",
 | |
|         "text",
 | |
|         "lemma_",
 | |
|         "pos_",
 | |
|         "tag_",
 | |
|         "dep_",
 | |
|         "head",
 | |
|         "ent_type_",
 | |
|         "ent_iob_",
 | |
|         "shape_",
 | |
|         "is_alpha",
 | |
|         "is_ascii",
 | |
|         "is_digit",
 | |
|         "is_punct",
 | |
|         "like_num",
 | |
|     ]
 | |
|     data = [[str(getattr(token, attr)) for attr in attrs] for token in doc]
 | |
|     df = pd.DataFrame(data, columns=attrs)
 | |
|     st.dataframe(df)
 | |
| 
 | |
| 
 | |
| st.header("JSON Doc")
 | |
| if st.button("Show JSON Doc"):
 | |
|     st.json(doc.to_json())
 | |
| 
 | |
| st.header("JSON model meta")
 | |
| if st.button("Show JSON model meta"):
 | |
|     st.json(nlp.meta)
 |