From f31876154da1be5f1df4ec084cae791f4726f85b Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Fri, 25 Oct 2019 11:19:46 +0200 Subject: [PATCH] Adjust formatting [ci skip] --- spacy/pipeline/entityruler.py | 15 +++++++++------ website/docs/usage/rule-based-matching.md | 12 ++++++++---- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 38bbdfc06..f78c8ed0e 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -44,8 +44,8 @@ class EntityRuler(object): **cfg: Other config parameters. If pipeline component is loaded as part of a model pipeline, this will include all keyword arguments passed to `spacy.load`. - RETURNS (EntityRuler): The newly constructed object. + DOCS: https://spacy.io/api/entityruler#init """ self.nlp = nlp @@ -83,8 +83,8 @@ class EntityRuler(object): """Find matches in document and add them as entities. doc (Doc): The Doc object in the pipeline. - RETURNS (Doc): The Doc with added entities, if available. + DOCS: https://spacy.io/api/entityruler#call """ matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc)) @@ -120,8 +120,8 @@ class EntityRuler(object): @property def labels(self): """All labels present in the match patterns. - RETURNS (set): The string labels. + DOCS: https://spacy.io/api/entityruler#labels """ all_labels = set(self.token_patterns.keys()) @@ -131,8 +131,8 @@ class EntityRuler(object): @property def ent_ids(self): """All entity ids present in the match patterns `id` properties - RETURNS (set): The string entity ids. + DOCS: https://spacy.io/api/entityruler#ent_ids """ all_ent_ids = set() @@ -145,8 +145,8 @@ class EntityRuler(object): @property def patterns(self): """Get all patterns that were added to the entity ruler. - RETURNS (list): The original patterns, one dictionary per pattern. + DOCS: https://spacy.io/api/entityruler#patterns """ all_patterns = [] @@ -172,6 +172,7 @@ class EntityRuler(object): pattern (list of dicts) or a phrase pattern (string). For example: {'label': 'ORG', 'pattern': 'Apple'} {'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]} + patterns (list): The patterns to add. DOCS: https://spacy.io/api/entityruler#add_patterns @@ -232,8 +233,8 @@ class EntityRuler(object): patterns_bytes (bytes): The bytestring to load. **kwargs: Other config paramters, mostly for consistency. - RETURNS (EntityRuler): The loaded entity ruler. + DOCS: https://spacy.io/api/entityruler#from_bytes """ cfg = srsly.msgpack_loads(patterns_bytes) @@ -254,6 +255,7 @@ class EntityRuler(object): """Serialize the entity ruler patterns to a bytestring. RETURNS (bytes): The serialized patterns. + DOCS: https://spacy.io/api/entityruler#to_bytes """ @@ -275,6 +277,7 @@ class EntityRuler(object): **kwargs: Other config paramters, mostly for consistency. RETURNS (EntityRuler): The loaded entity ruler. + DOCS: https://spacy.io/api/entityruler#from_disk """ path = ensure_path(path) diff --git a/website/docs/usage/rule-based-matching.md b/website/docs/usage/rule-based-matching.md index b4fba27cf..235e9c4ff 100644 --- a/website/docs/usage/rule-based-matching.md +++ b/website/docs/usage/rule-based-matching.md @@ -986,9 +986,11 @@ doc = nlp("Apple is opening its first big office in San Francisco.") print([(ent.text, ent.label_) for ent in doc.ents]) ``` -### Adding ids to patterns {#entityruler-ent-ids} +### Adding IDs to patterns {#entityruler-ent-ids new="2.2"} -The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each pattern. Using the `id` attribute allows multiple patterns to be associated with the same entity. +The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each +pattern. Using the `id` attribute allows multiple patterns to be associated with +the same entity. ```python ### {executable="true"} @@ -1010,8 +1012,10 @@ doc2 = nlp("Apple is opening its first big office in San Fran.") print([(ent.text, ent.label_, ent.ent_id_) for ent in doc2.ents]) ``` -If the `id` attribute is included in the [`EntityRuler`](/api/entityruler) patterns, the `ent_id_` property of the matched entity is set to the `id` given in the patterns. So in the example above it's easy to identify that "San Francisco" and "San Fran" are both the same entity. - +If the `id` attribute is included in the [`EntityRuler`](/api/entityruler) +patterns, the `ent_id_` property of the matched entity is set to the `id` given +in the patterns. So in the example above it's easy to identify that "San +Francisco" and "San Fran" are both the same entity. The entity ruler is designed to integrate with spaCy's existing statistical models and enhance the named entity recognizer. If it's added **before the