mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 02:04:07 +03:00
Adjust formatting [ci skip]
This commit is contained in:
parent
93640373c7
commit
f31876154d
|
@ -44,8 +44,8 @@ class EntityRuler(object):
|
||||||
**cfg: Other config parameters. If pipeline component is loaded as part
|
**cfg: Other config parameters. If pipeline component is loaded as part
|
||||||
of a model pipeline, this will include all keyword arguments passed
|
of a model pipeline, this will include all keyword arguments passed
|
||||||
to `spacy.load`.
|
to `spacy.load`.
|
||||||
|
|
||||||
RETURNS (EntityRuler): The newly constructed object.
|
RETURNS (EntityRuler): The newly constructed object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#init
|
DOCS: https://spacy.io/api/entityruler#init
|
||||||
"""
|
"""
|
||||||
self.nlp = nlp
|
self.nlp = nlp
|
||||||
|
@ -83,8 +83,8 @@ class EntityRuler(object):
|
||||||
"""Find matches in document and add them as entities.
|
"""Find matches in document and add them as entities.
|
||||||
|
|
||||||
doc (Doc): The Doc object in the pipeline.
|
doc (Doc): The Doc object in the pipeline.
|
||||||
|
|
||||||
RETURNS (Doc): The Doc with added entities, if available.
|
RETURNS (Doc): The Doc with added entities, if available.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#call
|
DOCS: https://spacy.io/api/entityruler#call
|
||||||
"""
|
"""
|
||||||
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
matches = list(self.matcher(doc)) + list(self.phrase_matcher(doc))
|
||||||
|
@ -120,8 +120,8 @@ class EntityRuler(object):
|
||||||
@property
|
@property
|
||||||
def labels(self):
|
def labels(self):
|
||||||
"""All labels present in the match patterns.
|
"""All labels present in the match patterns.
|
||||||
|
|
||||||
RETURNS (set): The string labels.
|
RETURNS (set): The string labels.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#labels
|
DOCS: https://spacy.io/api/entityruler#labels
|
||||||
"""
|
"""
|
||||||
all_labels = set(self.token_patterns.keys())
|
all_labels = set(self.token_patterns.keys())
|
||||||
|
@ -131,8 +131,8 @@ class EntityRuler(object):
|
||||||
@property
|
@property
|
||||||
def ent_ids(self):
|
def ent_ids(self):
|
||||||
"""All entity ids present in the match patterns `id` properties
|
"""All entity ids present in the match patterns `id` properties
|
||||||
|
|
||||||
RETURNS (set): The string entity ids.
|
RETURNS (set): The string entity ids.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#ent_ids
|
DOCS: https://spacy.io/api/entityruler#ent_ids
|
||||||
"""
|
"""
|
||||||
all_ent_ids = set()
|
all_ent_ids = set()
|
||||||
|
@ -145,8 +145,8 @@ class EntityRuler(object):
|
||||||
@property
|
@property
|
||||||
def patterns(self):
|
def patterns(self):
|
||||||
"""Get all patterns that were added to the entity ruler.
|
"""Get all patterns that were added to the entity ruler.
|
||||||
|
|
||||||
RETURNS (list): The original patterns, one dictionary per pattern.
|
RETURNS (list): The original patterns, one dictionary per pattern.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#patterns
|
DOCS: https://spacy.io/api/entityruler#patterns
|
||||||
"""
|
"""
|
||||||
all_patterns = []
|
all_patterns = []
|
||||||
|
@ -172,6 +172,7 @@ class EntityRuler(object):
|
||||||
pattern (list of dicts) or a phrase pattern (string). For example:
|
pattern (list of dicts) or a phrase pattern (string). For example:
|
||||||
{'label': 'ORG', 'pattern': 'Apple'}
|
{'label': 'ORG', 'pattern': 'Apple'}
|
||||||
{'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]}
|
{'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]}
|
||||||
|
|
||||||
patterns (list): The patterns to add.
|
patterns (list): The patterns to add.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#add_patterns
|
DOCS: https://spacy.io/api/entityruler#add_patterns
|
||||||
|
@ -232,8 +233,8 @@ class EntityRuler(object):
|
||||||
|
|
||||||
patterns_bytes (bytes): The bytestring to load.
|
patterns_bytes (bytes): The bytestring to load.
|
||||||
**kwargs: Other config paramters, mostly for consistency.
|
**kwargs: Other config paramters, mostly for consistency.
|
||||||
|
|
||||||
RETURNS (EntityRuler): The loaded entity ruler.
|
RETURNS (EntityRuler): The loaded entity ruler.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#from_bytes
|
DOCS: https://spacy.io/api/entityruler#from_bytes
|
||||||
"""
|
"""
|
||||||
cfg = srsly.msgpack_loads(patterns_bytes)
|
cfg = srsly.msgpack_loads(patterns_bytes)
|
||||||
|
@ -254,6 +255,7 @@ class EntityRuler(object):
|
||||||
"""Serialize the entity ruler patterns to a bytestring.
|
"""Serialize the entity ruler patterns to a bytestring.
|
||||||
|
|
||||||
RETURNS (bytes): The serialized patterns.
|
RETURNS (bytes): The serialized patterns.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#to_bytes
|
DOCS: https://spacy.io/api/entityruler#to_bytes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -275,6 +277,7 @@ class EntityRuler(object):
|
||||||
**kwargs: Other config paramters, mostly for consistency.
|
**kwargs: Other config paramters, mostly for consistency.
|
||||||
|
|
||||||
RETURNS (EntityRuler): The loaded entity ruler.
|
RETURNS (EntityRuler): The loaded entity ruler.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#from_disk
|
DOCS: https://spacy.io/api/entityruler#from_disk
|
||||||
"""
|
"""
|
||||||
path = ensure_path(path)
|
path = ensure_path(path)
|
||||||
|
|
|
@ -986,9 +986,11 @@ doc = nlp("Apple is opening its first big office in San Francisco.")
|
||||||
print([(ent.text, ent.label_) for ent in doc.ents])
|
print([(ent.text, ent.label_) for ent in doc.ents])
|
||||||
```
|
```
|
||||||
|
|
||||||
### Adding ids to patterns {#entityruler-ent-ids}
|
### Adding IDs to patterns {#entityruler-ent-ids new="2.2"}
|
||||||
|
|
||||||
The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each pattern. Using the `id` attribute allows multiple patterns to be associated with the same entity.
|
The [`EntityRuler`](/api/entityruler) can also accept an `id` attribute for each
|
||||||
|
pattern. Using the `id` attribute allows multiple patterns to be associated with
|
||||||
|
the same entity.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
### {executable="true"}
|
### {executable="true"}
|
||||||
|
@ -1010,8 +1012,10 @@ doc2 = nlp("Apple is opening its first big office in San Fran.")
|
||||||
print([(ent.text, ent.label_, ent.ent_id_) for ent in doc2.ents])
|
print([(ent.text, ent.label_, ent.ent_id_) for ent in doc2.ents])
|
||||||
```
|
```
|
||||||
|
|
||||||
If the `id` attribute is included in the [`EntityRuler`](/api/entityruler) patterns, the `ent_id_` property of the matched entity is set to the `id` given in the patterns. So in the example above it's easy to identify that "San Francisco" and "San Fran" are both the same entity.
|
If the `id` attribute is included in the [`EntityRuler`](/api/entityruler)
|
||||||
|
patterns, the `ent_id_` property of the matched entity is set to the `id` given
|
||||||
|
in the patterns. So in the example above it's easy to identify that "San
|
||||||
|
Francisco" and "San Fran" are both the same entity.
|
||||||
|
|
||||||
The entity ruler is designed to integrate with spaCy's existing statistical
|
The entity ruler is designed to integrate with spaCy's existing statistical
|
||||||
models and enhance the named entity recognizer. If it's added **before the
|
models and enhance the named entity recognizer. If it's added **before the
|
||||||
|
|
Loading…
Reference in New Issue
Block a user