mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-02 19:30:19 +03:00
Code improvements
This commit is contained in:
parent
c89ef32ea5
commit
acc8018302
|
@ -41,7 +41,7 @@ ROOT_LEFT_CHARS = {
|
||||||
|
|
||||||
class AttributeFormat:
|
class AttributeFormat:
|
||||||
"""
|
"""
|
||||||
Instructions for rendering information about a token property, e.g. lemma_, ent_type_.
|
Instructions for rendering information about a token property, e.g. *lemma_*, *ent_type_*.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -57,13 +57,13 @@ class AttributeFormat:
|
||||||
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
attribute: the token attribute, e.g. lemma_, ._.holmes.lemma
|
attribute: the token attribute, e.g. *lemma_*, .*_.holmes.lemma*.
|
||||||
name: the name to display e.g. in column headers
|
name: the name to display e.g. in column headers.
|
||||||
aligns: where appropriate the column alignment 'l' (left,
|
aligns: where appropriate the column alignment 'l' (left,
|
||||||
default), 'r' (right) or 'c' (center).
|
default), 'r' (right) or 'c' (center).
|
||||||
max_width: a maximum width to which values of the attribute should be truncated.
|
max_width: a maximum width to which values of the attribute should be truncated.
|
||||||
fg_color: the foreground color that should be used to display instances of the attribute
|
fg_color: the foreground color that should be used to display instances of the attribute.
|
||||||
bg_color: the background color that should be used to display instances of the attribute
|
bg_color: the background color that should be used to display instances of the attribute.
|
||||||
value_dep_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
|
value_dep_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
|
||||||
value_dep_bg_colors: a dictionary from values to background colors that should be used to display those values.
|
value_dep_bg_colors: a dictionary from values to background colors that should be used to display those values.
|
||||||
"""
|
"""
|
||||||
|
@ -90,24 +90,26 @@ class AttributeFormat:
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
|
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
|
||||||
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths
|
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths.
|
||||||
"""
|
"""
|
||||||
value = _get_token_value(token, self.attribute)
|
value = _get_token_value(token, self.attribute)
|
||||||
if self.max_width is not None:
|
if self.max_width is not None:
|
||||||
value = value[: self.max_width]
|
value = value[: self.max_width]
|
||||||
fg_color = None
|
|
||||||
bg_color = None
|
|
||||||
if right_pad_to_len is not None:
|
if right_pad_to_len is not None:
|
||||||
right_padding = " " * (right_pad_to_len - len(value))
|
right_padding = " " * (right_pad_to_len - len(value))
|
||||||
else:
|
else:
|
||||||
right_padding = ""
|
right_padding = ""
|
||||||
|
value_dep_fg_color = None
|
||||||
|
value_dep_bg_color = None
|
||||||
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
|
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
|
||||||
if len(self.value_dep_fg_colors) > 0:
|
if len(self.value_dep_fg_colors) > 0:
|
||||||
fg_color = self.value_dep_fg_colors.get(value, None)
|
value_dep_fg_color = self.value_dep_fg_colors.get(value, None)
|
||||||
if len(self.value_dep_bg_colors) > 0:
|
if len(self.value_dep_bg_colors) > 0:
|
||||||
bg_color = self.value_dep_bg_colors.get(value, None)
|
value_dep_bg_color = self.value_dep_bg_colors.get(value, None)
|
||||||
if fg_color is not None or bg_color is not None:
|
if value_dep_fg_color is not None or value_dep_bg_color is not None:
|
||||||
value = self.printer.text(value, color=fg_color, bg_color=bg_color)
|
value = self.printer.text(
|
||||||
|
value, color=value_dep_fg_color, bg_color=value_dep_bg_color
|
||||||
|
)
|
||||||
return value + right_padding
|
return value + right_padding
|
||||||
|
|
||||||
|
|
||||||
|
@ -120,7 +122,10 @@ def render_dep_tree(sent, root_right: bool) -> List[str]:
|
||||||
root_right: True if the tree should be rendered with the root on the right-hand side,
|
root_right: True if the tree should be rendered with the root on the right-hand side,
|
||||||
False if the tree should be rendered with the root on the left-hand side.
|
False if the tree should be rendered with the root on the left-hand side.
|
||||||
|
|
||||||
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy
|
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy. It was confirmed that
|
||||||
|
this code outputted equivalent trees to deplacy for a large number of sentences; there
|
||||||
|
were a handful of cases where the trees were different, but in these cases
|
||||||
|
the trees outputted here were confirmed to be linguistically corrrect.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Check sent is really a sentence
|
# Check sent is really a sentence
|
||||||
|
@ -346,18 +351,16 @@ def render_table(
|
||||||
adj_start_i = doc[adj_start_i].sent.start
|
adj_start_i = doc[adj_start_i].sent.start
|
||||||
end_i = doc[end_i].sent.end
|
end_i = doc[end_i].sent.end
|
||||||
for sent in doc[adj_start_i:end_i].sents:
|
for sent in doc[adj_start_i:end_i].sents:
|
||||||
if "tree_right" in (c.attribute for c in cols):
|
|
||||||
tree_right = render_dep_tree(sent, True)
|
|
||||||
if "tree_left" in (c.attribute for c in cols):
|
|
||||||
tree_left = render_dep_tree(sent, False)
|
|
||||||
widths = []
|
widths = []
|
||||||
for col in cols:
|
for col in cols:
|
||||||
# get the values without any color codes
|
if col.attribute == "tree_right":
|
||||||
if col.attribute == "tree_left":
|
tree_right = render_dep_tree(sent, True)
|
||||||
width = len(tree_left[0]) # type: ignore
|
width = len(tree_right[0])
|
||||||
elif col.attribute == "tree_right":
|
elif col.attribute == "tree_left":
|
||||||
width = len(tree_right[0]) # type: ignore
|
tree_left = render_dep_tree(sent, False)
|
||||||
|
width = len(tree_left[0])
|
||||||
else:
|
else:
|
||||||
|
# get the values without any color codes
|
||||||
if len(sent) > 0:
|
if len(sent) > 0:
|
||||||
width = max(
|
width = max(
|
||||||
len(col.render(token, ignore_colors=True)) for token in sent
|
len(col.render(token, ignore_colors=True)) for token in sent
|
||||||
|
@ -462,7 +465,7 @@ def _get_token_value(token, attribute: str) -> str:
|
||||||
"""
|
"""
|
||||||
Get value *token.x.y.z*.
|
Get value *token.x.y.z*.
|
||||||
|
|
||||||
token: the token
|
token: the token.
|
||||||
attribute: the attribute name, e.g. *x.y.z*.
|
attribute: the attribute name, e.g. *x.y.z*.
|
||||||
"""
|
"""
|
||||||
obj = token
|
obj = token
|
||||||
|
@ -483,9 +486,9 @@ def _get_adjusted_start_i(
|
||||||
Get the position at which to start rendering a document, which may be
|
Get the position at which to start rendering a document, which may be
|
||||||
adjusted by a search for a specific attribute value.
|
adjusted by a search for a specific attribute value.
|
||||||
|
|
||||||
doc: the document
|
doc: the document.
|
||||||
start_i: the user-specified start index
|
start_i: the user-specified start index.
|
||||||
cols: the list of attribute columns being displayed
|
cols: the list of attribute columns being displayed.
|
||||||
search_attr_name: the name of the attribute for which values are being searched,
|
search_attr_name: the name of the attribute for which values are being searched,
|
||||||
i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed.
|
i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed.
|
||||||
search_attr_value: the attribute value for which to search.
|
search_attr_value: the attribute value for which to search.
|
||||||
|
|
|
@ -540,9 +540,9 @@ is displayed.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `search_attr_name` | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. ~~Optional[str]~~ |
|
| `search_attr_name` | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. A value only has an effect in conjunction with a value for `search_attr_value`. ~~Optional[str]~~ |
|
||||||
| `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. ~~Optional[str]~~ |
|
| `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. A value only has an effect in conjunction with a value for `search_attr_name`. ~~Optional[str]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `start_i` | The index at which to begin searching or whose containing sentence to render. ~~Optional[int]~~ |
|
| `start_i` | The index at which to begin searching or whose containing sentence to render. ~~Optional[int]~~ |
|
||||||
| `length` | The number of tokens to display with their containing sentences. Can be used in isolation to display a portion at the beginning of a document, or in conjunction with the other parameters to display more than one sentence. ~~Optional[int]~~ |
|
| `length` | The number of tokens to display with their containing sentences. Can be used in isolation to display a portion at the beginning of a document, or in conjunction with the other parameters to display more than one sentence. ~~Optional[int]~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user