mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 12:20:20 +03:00
Changes based on review comments
This commit is contained in:
parent
449559cc2d
commit
59b0b26cc6
|
@ -13,6 +13,7 @@ def build_lemmatizer_model(
|
||||||
nO: Optional[int] = None,
|
nO: Optional[int] = None,
|
||||||
normalize=False,
|
normalize=False,
|
||||||
lowercasing=True,
|
lowercasing=True,
|
||||||
|
lowercasing_relu_width: Optional[int] = 50,
|
||||||
) -> Model[List[Doc], Union[List[Floats2d]]]:
|
) -> Model[List[Doc], Union[List[Floats2d]]]:
|
||||||
"""Build a model for the edit-tree lemmatizer, using a provided token-to-vector component.
|
"""Build a model for the edit-tree lemmatizer, using a provided token-to-vector component.
|
||||||
A linear layer with softmax activation is added to predict scores
|
A linear layer with softmax activation is added to predict scores
|
||||||
|
@ -38,7 +39,9 @@ def build_lemmatizer_model(
|
||||||
model = tok2vec >> with_array(softmax)
|
model = tok2vec >> with_array(softmax)
|
||||||
if lowercasing:
|
if lowercasing:
|
||||||
lowercasing_output = Sigmoid(1)
|
lowercasing_output = Sigmoid(1)
|
||||||
sigmoid_appendage = Relu(50) >> Dropout(0.2) >> lowercasing_output
|
sigmoid_appendage = (
|
||||||
|
Relu(lowercasing_relu_width) >> Dropout() >> lowercasing_output
|
||||||
|
)
|
||||||
model |= tok2vec >> with_array(sigmoid_appendage)
|
model |= tok2vec >> with_array(sigmoid_appendage)
|
||||||
model.set_ref("lowercasing_output", lowercasing_output)
|
model.set_ref("lowercasing_output", lowercasing_output)
|
||||||
return model
|
return model
|
||||||
|
|
|
@ -25,6 +25,7 @@ default_model_config = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.Lemmatizer.v1"
|
@architectures = "spacy.Lemmatizer.v1"
|
||||||
lowercasing = true
|
lowercasing = true
|
||||||
|
lowercasing_relu_width = 50
|
||||||
|
|
||||||
[model.tok2vec]
|
[model.tok2vec]
|
||||||
@architectures = "spacy.HashEmbedCNN.v2"
|
@architectures = "spacy.HashEmbedCNN.v2"
|
||||||
|
@ -204,7 +205,7 @@ class EditTreeLemmatizer(TrainablePipe):
|
||||||
self, docs, scores, lowercasing_flags: Optional[List[Floats2d]]
|
self, docs, scores, lowercasing_flags: Optional[List[Floats2d]]
|
||||||
):
|
):
|
||||||
guesses = []
|
guesses = []
|
||||||
for i, (doc, doc_scores) in enumerate(zip(docs, scores)):
|
for (i, doc, doc_scores) in zip(range(len(docs)), docs, scores):
|
||||||
if self.top_k == 1:
|
if self.top_k == 1:
|
||||||
doc_guesses = doc_scores.argmax(axis=1).reshape(-1, 1)
|
doc_guesses = doc_scores.argmax(axis=1).reshape(-1, 1)
|
||||||
else:
|
else:
|
||||||
|
@ -214,11 +215,13 @@ class EditTreeLemmatizer(TrainablePipe):
|
||||||
doc_guesses = doc_guesses.get()
|
doc_guesses = doc_guesses.get()
|
||||||
|
|
||||||
doc_compat_guesses = []
|
doc_compat_guesses = []
|
||||||
for j, (token, candidates) in enumerate(zip(doc, doc_guesses)):
|
for (j, token, candidates) in zip(range(len(doc)), doc, doc_guesses):
|
||||||
to_lowercase = False
|
|
||||||
if lowercasing_flags is not None and lowercasing_flags[i][j] > 0.5:
|
if lowercasing_flags is not None and lowercasing_flags[i][j] > 0.5:
|
||||||
to_lowercase = True
|
to_lowercase = 1
|
||||||
text = token.lower_ if to_lowercase else token.text
|
text = token.lower_
|
||||||
|
else:
|
||||||
|
to_lowercase = 0
|
||||||
|
text = token.text
|
||||||
tree_id = -1
|
tree_id = -1
|
||||||
for candidate in candidates:
|
for candidate in candidates:
|
||||||
candidate_tree_id = self.cfg["labels"][candidate]
|
candidate_tree_id = self.cfg["labels"][candidate]
|
||||||
|
|
Loading…
Reference in New Issue
Block a user