mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Add load_from_docbin example [ci skip]
TODO: upload the file somewhere
This commit is contained in:
parent
4ec7623288
commit
83381018d3
45
examples/load_from_docbin.py
Normal file
45
examples/load_from_docbin.py
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
# coding: utf-8
|
||||||
|
"""
|
||||||
|
Example of loading previously parsed text using spaCy's DocBin class. The example
|
||||||
|
performs an entity count to show that the annotations are available.
|
||||||
|
For more details, see https://spacy.io/usage/saving-loading#docs
|
||||||
|
Installation:
|
||||||
|
python -m spacy download en_core_web_lg
|
||||||
|
Usage:
|
||||||
|
python examples/load_from_docbin.py en_core_web_lg RC_2015-03-9.spacy
|
||||||
|
"""
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import spacy
|
||||||
|
from spacy.tokens import DocBin
|
||||||
|
from timeit import default_timer as timer
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
EXAMPLE_PARSES_PATH = "RC_2015-03-9.spacy"
|
||||||
|
|
||||||
|
|
||||||
|
def main(model="en_core_web_lg", docbin_path=EXAMPLE_PARSES_PATH):
|
||||||
|
nlp = spacy.load(model)
|
||||||
|
print("Reading data from {}".format(docbin_path))
|
||||||
|
with open(docbin_path, "rb") as file_:
|
||||||
|
bytes_data = file_.read()
|
||||||
|
nr_word = 0
|
||||||
|
start_time = timer()
|
||||||
|
entities = Counter()
|
||||||
|
docbin = DocBin().from_bytes(bytes_data)
|
||||||
|
for doc in docbin.get_docs(nlp.vocab):
|
||||||
|
nr_word += len(doc)
|
||||||
|
entities.update((e.label_, e.text) for e in doc.ents)
|
||||||
|
end_time = timer()
|
||||||
|
msg = "Loaded {nr_word} words in {seconds} seconds ({wps} words per second)"
|
||||||
|
wps = nr_word / (end_time - start_time)
|
||||||
|
print(msg.format(nr_word=nr_word, seconds=end_time - start_time, wps=wps))
|
||||||
|
print("Most common entities:")
|
||||||
|
for (label, entity), freq in entities.most_common(30):
|
||||||
|
print(freq, entity, label)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import plac
|
||||||
|
|
||||||
|
plac.call(main)
|
Loading…
Reference in New Issue
Block a user