diff --git a/website/meta/universe.json b/website/meta/universe.json index f379ea87b..b7842bddc 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -1,5 +1,74 @@ { "resources": [ + { + "id": "TeNs", + "title": "Temporal Expressions Normalization spaCy", + "thumb": "https://github-production-user-asset-6210df.s3.amazonaws.com/40547052/433595900-fae3c9d9-7181-4d8b-8b49-e6dc4fca930b.png?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAVCODYLSA53PQK4ZA%2F20250414%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250414T235545Z&X-Amz-Expires=300&X-Amz-Signature=e21d3c06300ceb15fa1dadd7cb60081cc9f1b35e5a7bfd07f6e8b90dd7fad9d0&X-Amz-SignedHeaders=host", + "url": "https://pypi.org/project/temporal-normalization-spacy/", + "slogan": "A temporal expression normalization plugin for Romanian using rule-based methods and DBpedia mappings.", + "description": "**[Temporal Expressions Normalization spaCy (TeNs)](https://github.com/iliedorobat/timespan-normalization-spacy)** is a powerful pipeline component for spaCy that seamlessly identifies and parses date entities in text. It leverages the **[Temporal Expressions Normalization Framework]( https://github.com/iliedorobat/timespan-normalization)** to recognize a wide variety of date formats using an extensive set of regular expressions (RegEx), ensuring robust and adaptable date extraction across diverse textual sources.\n\nUnlike conventional solutions that primarily focus on well-structured date formats, TeNs excels in handling real-world text by **identifying** not only standard date representations but also **abbreviated, informal, or even misspelled temporal expressions.** This makes it particularly effective for processing noisy or unstructured data, such as historical records, user-generated content, and scanned documents with OCR inaccuracies.", + "github": "iliedorobat/timespan-normalization-spacy", + "pip": "temporal-normalization-spacy", + "code_example": [ + "import subprocess", + "", + "import spacy", + "", + "from temporal_normalization.commons.print_utils import console", + "from temporal_normalization.index import create_normalized_component, TemporalNormalization # noqa: F401", + "", + "", + "try:", + " # Load the spaCy model if it has already been downloaded", + " nlp = spacy.load('ro_core_news_sm')", + "except OSError:", + " console.warning(f'Started downloading ro_core_news_sm...')", + " # Download the Romanian model if it wasn't already downloaded", + " subprocess.run(['python', '-m', 'spacy', 'download', 'ro_core_news_sm'])", + " # Load the spaCy model", + " nlp = spacy.load('ro_core_news_sm')", + "", + "# Add 'temporal_normalization' component to the spaCy pipeline", + "nlp.add_pipe('temporal_normalization', last=True)", + "doc = nlp('Sec al II-lea a.ch. - I d.ch reprezintă o perioadă de mari schimbări.')", + "", + "# Display information about the identified and normalized dates in the text.", + "for entity in doc.ents:", + " edges = entity._.time_series.edges", + "", + " print('Start Edge:')", + " print(edges.start.serialize('\\t'))", + " print()", + "", + " print('End Edge:')", + " print(edges.end.serialize('\\t'))", + " print()", + "", + " print('Periods:')", + " for period in entity._.time_series.periods:", + " print(period.serialize('\\t'))", + " print()", + " print('---------------------')" + ], + "code_language": "python", + "author": "Ilie Cristian Dorobat", + "author_links": { + "github": "iliedorobat", + "website": "https://iliedorobat.ro/" + }, + "category": [ + "pipeline", + "standalone" + ], + "tags": [ + "temporal", + "normalization", + "date", + "romanian", + "temporal-expression", + "dbpedia" + ] + }, { "id": "spacy-vscode", "title": "spaCy Visual Studio Code Extension",