mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Update entity setting section
This commit is contained in:
parent
6a9a467766
commit
0d2b723e8d
|
@ -599,18 +599,27 @@ ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
|
||||||
print('Before', ents)
|
print('Before', ents)
|
||||||
# The model didn't recognize "fb" as an entity :(
|
# The model didn't recognize "fb" as an entity :(
|
||||||
|
|
||||||
fb_ent = Span(doc, 0, 1, label="ORG") # create a Span for the new entity
|
# Create a span for the new entity
|
||||||
|
fb_ent = Span(doc, 0, 1, label="ORG")
|
||||||
|
|
||||||
|
# Option 1: Modify the provided entity spans, leaving the rest unmodified
|
||||||
|
doc.set_ents([fb_ent], default="unmodified")
|
||||||
|
|
||||||
|
# Option 2: Assign a complete list of ents to doc.ents
|
||||||
doc.ents = list(doc.ents) + [fb_ent]
|
doc.ents = list(doc.ents) + [fb_ent]
|
||||||
|
|
||||||
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
|
ents = [(e.text, e.start, e.end, e.label_) for e in doc.ents]
|
||||||
print('After', ents)
|
print('After', ents)
|
||||||
# [('fb', 0, 2, 'ORG')] 🎉
|
# [('fb', 0, 1, 'ORG')] 🎉
|
||||||
```
|
```
|
||||||
|
|
||||||
Keep in mind that you need to create a `Span` with the start and end index of
|
Keep in mind that `Span` is initialized with the start and end **token**
|
||||||
the **token**, not the start and end index of the entity in the document. In
|
indices, not the character offsets. To create a span from character offsets, use
|
||||||
this case, "fb" is token `(0, 1)` – but at the document level, the entity will
|
[`Doc.char_span`](/api/doc#char_span):
|
||||||
have the start and end indices `(0, 2)`.
|
|
||||||
|
```python
|
||||||
|
fb_ent = doc.char_span(0, 2, label="ORG")
|
||||||
|
```
|
||||||
|
|
||||||
#### Setting entity annotations from array {#setting-from-array}
|
#### Setting entity annotations from array {#setting-from-array}
|
||||||
|
|
||||||
|
@ -645,9 +654,10 @@ write efficient native code.
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# cython: infer_types=True
|
# cython: infer_types=True
|
||||||
|
from spacy.typedefs cimport attr_t
|
||||||
from spacy.tokens.doc cimport Doc
|
from spacy.tokens.doc cimport Doc
|
||||||
|
|
||||||
cpdef set_entity(Doc doc, int start, int end, int ent_type):
|
cpdef set_entity(Doc doc, int start, int end, attr_t ent_type):
|
||||||
for i in range(start, end):
|
for i in range(start, end):
|
||||||
doc.c[i].ent_type = ent_type
|
doc.c[i].ent_type = ent_type
|
||||||
doc.c[start].ent_iob = 3
|
doc.c[start].ent_iob = 3
|
||||||
|
|
Loading…
Reference in New Issue
Block a user