Code improvements

This commit is contained in:
richardpaulhudson 2023-01-27 09:11:58 +01:00
parent c89ef32ea5
commit acc8018302
2 changed files with 35 additions and 32 deletions

View File

@ -41,7 +41,7 @@ ROOT_LEFT_CHARS = {
class AttributeFormat: class AttributeFormat:
""" """
Instructions for rendering information about a token property, e.g. lemma_, ent_type_. Instructions for rendering information about a token property, e.g. *lemma_*, *ent_type_*.
""" """
def __init__( def __init__(
@ -57,13 +57,13 @@ class AttributeFormat:
value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None, value_dep_bg_colors: Optional[Dict[str, Union[str, int]]] = None,
): ):
""" """
attribute: the token attribute, e.g. lemma_, ._.holmes.lemma attribute: the token attribute, e.g. *lemma_*, .*_.holmes.lemma*.
name: the name to display e.g. in column headers name: the name to display e.g. in column headers.
aligns: where appropriate the column alignment 'l' (left, aligns: where appropriate the column alignment 'l' (left,
default), 'r' (right) or 'c' (center). default), 'r' (right) or 'c' (center).
max_width: a maximum width to which values of the attribute should be truncated. max_width: a maximum width to which values of the attribute should be truncated.
fg_color: the foreground color that should be used to display instances of the attribute fg_color: the foreground color that should be used to display instances of the attribute.
bg_color: the background color that should be used to display instances of the attribute bg_color: the background color that should be used to display instances of the attribute.
value_dep_fg_colors: a dictionary from values to foreground colors that should be used to display those values. value_dep_fg_colors: a dictionary from values to foreground colors that should be used to display those values.
value_dep_bg_colors: a dictionary from values to background colors that should be used to display those values. value_dep_bg_colors: a dictionary from values to background colors that should be used to display those values.
""" """
@ -90,24 +90,26 @@ class AttributeFormat:
) -> str: ) -> str:
""" """
right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding. right_pad_to_len: the width to which values should be right-padded, or 'None' for no right-padding.
ignore_colors: no colors should be rendered, typically because the values are required to calculate widths ignore_colors: no colors should be rendered, typically because the values are required to calculate widths.
""" """
value = _get_token_value(token, self.attribute) value = _get_token_value(token, self.attribute)
if self.max_width is not None: if self.max_width is not None:
value = value[: self.max_width] value = value[: self.max_width]
fg_color = None
bg_color = None
if right_pad_to_len is not None: if right_pad_to_len is not None:
right_padding = " " * (right_pad_to_len - len(value)) right_padding = " " * (right_pad_to_len - len(value))
else: else:
right_padding = "" right_padding = ""
value_dep_fg_color = None
value_dep_bg_color = None
if SUPPORTS_ANSI and not ignore_colors and len(value) > 0: if SUPPORTS_ANSI and not ignore_colors and len(value) > 0:
if len(self.value_dep_fg_colors) > 0: if len(self.value_dep_fg_colors) > 0:
fg_color = self.value_dep_fg_colors.get(value, None) value_dep_fg_color = self.value_dep_fg_colors.get(value, None)
if len(self.value_dep_bg_colors) > 0: if len(self.value_dep_bg_colors) > 0:
bg_color = self.value_dep_bg_colors.get(value, None) value_dep_bg_color = self.value_dep_bg_colors.get(value, None)
if fg_color is not None or bg_color is not None: if value_dep_fg_color is not None or value_dep_bg_color is not None:
value = self.printer.text(value, color=fg_color, bg_color=bg_color) value = self.printer.text(
value, color=value_dep_fg_color, bg_color=value_dep_bg_color
)
return value + right_padding return value + right_padding
@ -120,7 +122,10 @@ def render_dep_tree(sent, root_right: bool) -> List[str]:
root_right: True if the tree should be rendered with the root on the right-hand side, root_right: True if the tree should be rendered with the root on the right-hand side,
False if the tree should be rendered with the root on the left-hand side. False if the tree should be rendered with the root on the left-hand side.
Algorithm adapted from https://github.com/KoichiYasuoka/deplacy Algorithm adapted from https://github.com/KoichiYasuoka/deplacy. It was confirmed that
this code outputted equivalent trees to deplacy for a large number of sentences; there
were a handful of cases where the trees were different, but in these cases
the trees outputted here were confirmed to be linguistically corrrect.
""" """
# Check sent is really a sentence # Check sent is really a sentence
@ -346,18 +351,16 @@ def render_table(
adj_start_i = doc[adj_start_i].sent.start adj_start_i = doc[adj_start_i].sent.start
end_i = doc[end_i].sent.end end_i = doc[end_i].sent.end
for sent in doc[adj_start_i:end_i].sents: for sent in doc[adj_start_i:end_i].sents:
if "tree_right" in (c.attribute for c in cols):
tree_right = render_dep_tree(sent, True)
if "tree_left" in (c.attribute for c in cols):
tree_left = render_dep_tree(sent, False)
widths = [] widths = []
for col in cols: for col in cols:
# get the values without any color codes if col.attribute == "tree_right":
if col.attribute == "tree_left": tree_right = render_dep_tree(sent, True)
width = len(tree_left[0]) # type: ignore width = len(tree_right[0])
elif col.attribute == "tree_right": elif col.attribute == "tree_left":
width = len(tree_right[0]) # type: ignore tree_left = render_dep_tree(sent, False)
width = len(tree_left[0])
else: else:
# get the values without any color codes
if len(sent) > 0: if len(sent) > 0:
width = max( width = max(
len(col.render(token, ignore_colors=True)) for token in sent len(col.render(token, ignore_colors=True)) for token in sent
@ -462,7 +465,7 @@ def _get_token_value(token, attribute: str) -> str:
""" """
Get value *token.x.y.z*. Get value *token.x.y.z*.
token: the token token: the token.
attribute: the attribute name, e.g. *x.y.z*. attribute: the attribute name, e.g. *x.y.z*.
""" """
obj = token obj = token
@ -483,9 +486,9 @@ def _get_adjusted_start_i(
Get the position at which to start rendering a document, which may be Get the position at which to start rendering a document, which may be
adjusted by a search for a specific attribute value. adjusted by a search for a specific attribute value.
doc: the document doc: the document.
start_i: the user-specified start index start_i: the user-specified start index.
cols: the list of attribute columns being displayed cols: the list of attribute columns being displayed.
search_attr_name: the name of the attribute for which values are being searched, search_attr_name: the name of the attribute for which values are being searched,
i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed. i.e. *x.y.z* for token attribute *token.x.y.z*, or *None* if no search is to be performed.
search_attr_value: the attribute value for which to search. search_attr_value: the attribute value for which to search.

View File

@ -540,9 +540,9 @@ is displayed.
> ``` > ```
| Name | Description | | Name | Description |
| ------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | ------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `search_attr_name` | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. ~~Optional[str]~~ | | `search_attr_name` | The name of an attribute to search for in order to determine the sentence to render, e.g. `lemma_`. A value only has an effect in conjunction with a value for `search_attr_value`. ~~Optional[str]~~ |
| `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. ~~Optional[str]~~ | | `search_attr_value` | The value of an attribute to search for in order to determine the sentence to render, e.g. `give`. A value only has an effect in conjunction with a value for `search_attr_name`. ~~Optional[str]~~ |
| _keyword-only_ | | | _keyword-only_ | |
| `start_i` | The index at which to begin searching or whose containing sentence to render. ~~Optional[int]~~ | | `start_i` | The index at which to begin searching or whose containing sentence to render. ~~Optional[int]~~ |
| `length` | The number of tokens to display with their containing sentences. Can be used in isolation to display a portion at the beginning of a document, or in conjunction with the other parameters to display more than one sentence. ~~Optional[int]~~ | | `length` | The number of tokens to display with their containing sentences. Can be used in isolation to display a portion at the beginning of a document, or in conjunction with the other parameters to display more than one sentence. ~~Optional[int]~~ |