Adjust formatting [ci skip]

This commit is contained in:
Ines Montani 2019-10-25 11:19:46 +02:00
parent 93640373c7
commit f31876154d
2 changed files with 17 additions and 10 deletions

View File

@ -44,8 +44,8 @@ class EntityRuler(object):
**cfg: Other config parameters. If pipeline component is loaded as part **cfg: Other config parameters. If pipeline component is loaded as part
of a model pipeline, this will include all keyword arguments passed of a model pipeline, this will include all keyword arguments passed
to `spacy.load`. to `spacy.load`.
RETURNS (EntityRuler): The newly constructed object. RETURNS (EntityRuler): The newly constructed object.
DOCS: https://spacy.io/api/entityruler#init DOCS: https://spacy.io/api/entityruler#init
""" """
self.nlp = nlp self.nlp = nlp
@ -83,8 +83,8 @@ class EntityRuler(object):
"""Find matches in document and add them as entities. """Find matches in document and add them as entities.
doc (Doc): The Doc object in the pipeline. doc (Doc): The Doc object in the pipeline.
RETURNS (Doc): The Doc with added entities, if available. RETURNS (Doc): The Doc with added entities, if available.
DOCS: https://spacy.io/api/entityruler#call DOCS: https://spacy.io/api/entityruler#call
""" """
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc)) matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
@ -120,8 +120,8 @@ class EntityRuler(object):
@property @property
def labels(self): def labels(self):
"""All labels present in the match patterns. """All labels present in the match patterns.
RETURNS (set): The string labels. RETURNS (set): The string labels.
DOCS: https://spacy.io/api/entityruler#labels DOCS: https://spacy.io/api/entityruler#labels
""" """
all_labels = set(self.token_patterns.keys()) all_labels = set(self.token_patterns.keys())
@ -131,8 +131,8 @@ class EntityRuler(object):
@property @property
def ent_ids(self): def ent_ids(self):
"""All entity ids present in the match patterns `id` properties """All entity ids present in the match patterns `id` properties
RETURNS (set): The string entity ids. RETURNS (set): The string entity ids.
DOCS: https://spacy.io/api/entityruler#ent_ids DOCS: https://spacy.io/api/entityruler#ent_ids
""" """
all_ent_ids = set() all_ent_ids = set()
@ -145,8 +145,8 @@ class EntityRuler(object):
@property @property
def patterns(self): def patterns(self):
"""Get all patterns that were added to the entity ruler. """Get all patterns that were added to the entity ruler.
RETURNS (list): The original patterns, one dictionary per pattern. RETURNS (list): The original patterns, one dictionary per pattern.
DOCS: https://spacy.io/api/entityruler#patterns DOCS: https://spacy.io/api/entityruler#patterns
""" """
all_patterns = [] all_patterns = []
@ -172,6 +172,7 @@ class EntityRuler(object):
pattern (list of dicts) or a phrase pattern (string). For example: pattern (list of dicts) or a phrase pattern (string). For example:
{'label': 'ORG', 'pattern': 'Apple'} {'label': 'ORG', 'pattern': 'Apple'}
{'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]} {'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]}
patterns (list): The patterns to add. patterns (list): The patterns to add.
DOCS: https://spacy.io/api/entityruler#add_patterns DOCS: https://spacy.io/api/entityruler#add_patterns
@ -232,8 +233,8 @@ class EntityRuler(object):
patterns_bytes (bytes): The bytestring to load. patterns_bytes (bytes): The bytestring to load.
**kwargs: Other config paramters, mostly for consistency. **kwargs: Other config paramters, mostly for consistency.
RETURNS (EntityRuler): The loaded entity ruler. RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://spacy.io/api/entityruler#from_bytes DOCS: https://spacy.io/api/entityruler#from_bytes
""" """
cfg = srsly.msgpack_loads(patterns_bytes) cfg = srsly.msgpack_loads(patterns_bytes)
@ -254,6 +255,7 @@ class EntityRuler(object):
"""Serialize the entity ruler patterns to a bytestring. """Serialize the entity ruler patterns to a bytestring.
RETURNS (bytes): The serialized patterns. RETURNS (bytes): The serialized patterns.
DOCS: https://spacy.io/api/entityruler#to_bytes DOCS: https://spacy.io/api/entityruler#to_bytes
""" """
@ -275,6 +277,7 @@ class EntityRuler(object):
**kwargs: Other config paramters, mostly for consistency. **kwargs: Other config paramters, mostly for consistency.
RETURNS (EntityRuler): The loaded entity ruler. RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://spacy.io/api/entityruler#from_disk DOCS: https://spacy.io/api/entityruler#from_disk
""" """
path = ensure_path(path) path = ensure_path(path)

View File

@ -986,9 +986,11 @@ doc = nlp("Apple is opening its first big office in San Francisco.")
print([(ent.text, ent.label_) for ent in doc.ents]) print([(ent.text, ent.label_) for ent in doc.ents])
``` ```
### Adding ids to patterns {#entityruler-ent-ids} ### Adding IDs to patterns {#entityruler-ent-ids new="2.2"}
The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each pattern. Using the `id` attribute allows multiple patterns to be associated with the same entity. The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each
pattern. Using the `id` attribute allows multiple patterns to be associated with
the same entity.
```python ```python
### {executable="true"} ### {executable="true"}
@ -1010,8 +1012,10 @@ doc2 = nlp("Apple is opening its first big office in San Fran.")
print([(ent.text, ent.label_, ent.ent_id_) for ent in doc2.ents]) print([(ent.text, ent.label_, ent.ent_id_) for ent in doc2.ents])
``` ```
If the `id` attribute is included in the [`EntityRuler`](/api/entityruler) patterns, the `ent_id_` property of the matched entity is set to the `id` given in the patterns. So in the example above it's easy to identify that "San Francisco" and "San Fran" are both the same entity. If the `id` attribute is included in the [`EntityRuler`](/api/entityruler)
patterns, the `ent_id_` property of the matched entity is set to the `id` given
in the patterns. So in the example above it's easy to identify that "San
Francisco" and "San Fran" are both the same entity.
The entity ruler is designed to integrate with spaCy's existing statistical The entity ruler is designed to integrate with spaCy's existing statistical
models and enhance the named entity recognizer. If it's added **before the models and enhance the named entity recognizer. If it's added **before the