mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Added spacypdfreader to universe.json (#9963)
This commit is contained in:
parent
f40e237c5a
commit
6f65e2b544
|
@ -1,5 +1,43 @@
|
|||
{
|
||||
"resources": [
|
||||
{
|
||||
"id": "spacypdfreader",
|
||||
"title": "spadypdfreader",
|
||||
"category": ["pipeline"],
|
||||
"tags": ["PDF"],
|
||||
"slogan": "Easy PDF to text to spaCy text extraction in Python.",
|
||||
"description": "*spacypdfreader* is a Python library that allows you to convert PDF files directly into *spaCy* `Doc` objects. The library provides several built in parsers or bring your own parser. `Doc` objects are annotated with several custom attributes including: `token._.page_number`, `doc._.page_range`, `doc._.first_page`, `doc._.last_page`, `doc._.pdf_file_name`, and `doc._.page(int)`.",
|
||||
"github": "SamEdwardes/spacypdfreader",
|
||||
"pip": "spacypdfreader",
|
||||
"url": "https://samedwardes.github.io/spacypdfreader/",
|
||||
"code_language": "python",
|
||||
"author": "Sam Edwardes",
|
||||
"author_links": {
|
||||
"twitter": "TheReaLSamlam",
|
||||
"github": "SamEdwardes",
|
||||
"website": "https://samedwardes.com"
|
||||
},
|
||||
"code_example": [
|
||||
"import spacy",
|
||||
"from spacypdfreader import pdf_reader",
|
||||
"",
|
||||
"nlp = spacy.load('en_core_web_sm')",
|
||||
"doc = pdf_reader('tests/data/test_pdf_01.pdf', nlp)",
|
||||
"",
|
||||
"# Get the page number of any token.",
|
||||
"print(doc[0]._.page_number) # 1",
|
||||
"print(doc[-1]._.page_number) # 4",
|
||||
"",
|
||||
"# Get page meta data about the PDF document.",
|
||||
"print(doc._.pdf_file_name) # 'tests/data/test_pdf_01.pdf'",
|
||||
"print(doc._.page_range) # (1, 4)",
|
||||
"print(doc._.first_page) # 1",
|
||||
"print(doc._.last_page) # 4",
|
||||
"",
|
||||
"# Get all of the text from a specific PDF page.",
|
||||
"print(doc._.page(4)) # 'able to display the destination page (unless...'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "nlpcloud",
|
||||
"title": "NLPCloud.io",
|
||||
|
|
Loading…
Reference in New Issue
Block a user