Adjust formatting [ci skip]

This commit is contained in:
Ines Montani 2019-10-25 11:19:46 +02:00
parent 93640373c7
commit f31876154d
2 changed files with 17 additions and 10 deletions

View File

@ -44,8 +44,8 @@ class EntityRuler(object):
**cfg: Other config parameters. If pipeline component is loaded as part
of a model pipeline, this will include all keyword arguments passed
to `spacy.load`.
RETURNS (EntityRuler): The newly constructed object.
DOCS: https://spacy.io/api/entityruler#init
"""
self.nlp = nlp
@ -83,8 +83,8 @@ class EntityRuler(object):
"""Find matches in document and add them as entities.
doc (Doc): The Doc object in the pipeline.
RETURNS (Doc): The Doc with added entities, if available.
DOCS: https://spacy.io/api/entityruler#call
"""
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
@ -120,8 +120,8 @@ class EntityRuler(object):
@property
def labels(self):
"""All labels present in the match patterns.
RETURNS (set): The string labels.
DOCS: https://spacy.io/api/entityruler#labels
"""
all_labels = set(self.token_patterns.keys())
@ -131,8 +131,8 @@ class EntityRuler(object):
@property
def ent_ids(self):
"""All entity ids present in the match patterns `id` properties
RETURNS (set): The string entity ids.
DOCS: https://spacy.io/api/entityruler#ent_ids
"""
all_ent_ids = set()
@ -145,8 +145,8 @@ class EntityRuler(object):
@property
def patterns(self):
"""Get all patterns that were added to the entity ruler.
RETURNS (list): The original patterns, one dictionary per pattern.
DOCS: https://spacy.io/api/entityruler#patterns
"""
all_patterns = []
@ -172,6 +172,7 @@ class EntityRuler(object):
pattern (list of dicts) or a phrase pattern (string). For example:
{'label': 'ORG', 'pattern': 'Apple'}
{'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]}
patterns (list): The patterns to add.
DOCS: https://spacy.io/api/entityruler#add_patterns
@ -232,8 +233,8 @@ class EntityRuler(object):
patterns_bytes (bytes): The bytestring to load.
**kwargs: Other config paramters, mostly for consistency.
RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://spacy.io/api/entityruler#from_bytes
"""
cfg = srsly.msgpack_loads(patterns_bytes)
@ -254,6 +255,7 @@ class EntityRuler(object):
"""Serialize the entity ruler patterns to a bytestring.
RETURNS (bytes): The serialized patterns.
DOCS: https://spacy.io/api/entityruler#to_bytes
"""
@ -275,6 +277,7 @@ class EntityRuler(object):
**kwargs: Other config paramters, mostly for consistency.
RETURNS (EntityRuler): The loaded entity ruler.
DOCS: https://spacy.io/api/entityruler#from_disk
"""
path = ensure_path(path)

View File

@ -986,9 +986,11 @@ doc = nlp("Apple is opening its first big office in San Francisco.")
print([(ent.text, ent.label_) for ent in doc.ents])
```
### Adding ids to patterns {#entityruler-ent-ids}
### Adding IDs to patterns {#entityruler-ent-ids new="2.2"}
The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each pattern. Using the `id` attribute allows multiple patterns to be associated with the same entity.
The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each
pattern. Using the `id` attribute allows multiple patterns to be associated with
the same entity.
```python
### {executable="true"}
@ -1010,8 +1012,10 @@ doc2 = nlp("Apple is opening its first big office in San Fran.")
print([(ent.text, ent.label_, ent.ent_id_) for ent in doc2.ents])
```
If the `id` attribute is included in the [`EntityRuler`](/api/entityruler) patterns, the `ent_id_` property of the matched entity is set to the `id` given in the patterns. So in the example above it's easy to identify that "San Francisco" and "San Fran" are both the same entity.
If the `id` attribute is included in the [`EntityRuler`](/api/entityruler)
patterns, the `ent_id_` property of the matched entity is set to the `id` given
in the patterns. So in the example above it's easy to identify that "San
Francisco" and "San Fran" are both the same entity.
The entity ruler is designed to integrate with spaCy's existing statistical
models and enhance the named entity recognizer. If it's added **before the