mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-04 11:25:51 +03:00
minor changes
This commit is contained in:
parent
59d8ee4132
commit
8b64741502
|
@ -26,22 +26,36 @@ def _split_doc(doc: Doc) -> bool:
|
||||||
noun_modified = False
|
noun_modified = False
|
||||||
has_conjunction = False
|
has_conjunction = False
|
||||||
|
|
||||||
|
noun_count = 0
|
||||||
|
modifiers = set()
|
||||||
|
|
||||||
for token in doc:
|
for token in doc:
|
||||||
|
if token.pos_ == "NOUN":
|
||||||
|
noun_count += 1
|
||||||
if token.head.pos_ == "NOUN": ## check to see that the phrase is a noun phrase
|
if token.head.pos_ == "NOUN": ## check to see that the phrase is a noun phrase
|
||||||
has_modifier = any(
|
for child in token.head.children:
|
||||||
child.dep_ == "amod" for child in token.head.children
|
if child.dep_ in ["amod", "advmod", "nmod"]:
|
||||||
) # check to see if the noun has a modifier
|
modifiers.add(child.text)
|
||||||
if has_modifier:
|
|
||||||
noun_modified = True
|
noun_modified = True
|
||||||
|
for child in token.children:
|
||||||
|
if child.dep_ == "conj" and child.pos_ == "ADJ":
|
||||||
|
modifiers.add(child.text)
|
||||||
|
|
||||||
# check if there is a conjunction in the phrase
|
# check if there is a conjunction in the phrase
|
||||||
if token.pos_ == "CCONJ":
|
if token.pos_ == "CCONJ":
|
||||||
has_conjunction = True
|
has_conjunction = True
|
||||||
|
|
||||||
return (
|
modifier_count = len(modifiers)
|
||||||
True if noun_modified and has_conjunction else False
|
|
||||||
) # and not all_nouns_modified else False
|
|
||||||
|
|
||||||
|
noun_modified = modifier_count > 0
|
||||||
|
|
||||||
|
all_nouns_modified = modifier_count == noun_count
|
||||||
|
|
||||||
|
if noun_modified and has_conjunction and not all_nouns_modified:
|
||||||
|
return True
|
||||||
|
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
def _collect_modifiers(token: Token) -> List[str]:
|
def _collect_modifiers(token: Token) -> List[str]:
|
||||||
"""Collects adverbial modifiers for a given token.
|
"""Collects adverbial modifiers for a given token.
|
||||||
|
|
|
@ -211,7 +211,7 @@ def noun_construction_case10(nlp):
|
||||||
words = ["fresh", "but", "quite", "sour", "apples", "and", "chicken", "wings"]
|
words = ["fresh", "but", "quite", "sour", "apples", "and", "chicken", "wings"]
|
||||||
spaces = [True, True, True, True, True, True, True, False]
|
spaces = [True, True, True, True, True, True, True, False]
|
||||||
pos_tags = ["ADJ", "CCONJ", "ADV", "ADJ", "NOUN", "CCONJ", "NOUN", "NOUN"]
|
pos_tags = ["ADJ", "CCONJ", "ADV", "ADJ", "NOUN", "CCONJ", "NOUN", "NOUN"]
|
||||||
dep_relations = ["amod", "cc", "advmod", "conj", "ROOT", "cc", "conj", "compound"]
|
dep_relations = ["amod", "cc", "advmod", "amod", "ROOT", "cc", "compound", "conj"]
|
||||||
|
|
||||||
doc = Doc(nlp.vocab, words=words, spaces=spaces)
|
doc = Doc(nlp.vocab, words=words, spaces=spaces)
|
||||||
|
|
||||||
|
@ -220,13 +220,12 @@ def noun_construction_case10(nlp):
|
||||||
token.dep_ = dep
|
token.dep_ = dep
|
||||||
|
|
||||||
doc[0].head = doc[4]
|
doc[0].head = doc[4]
|
||||||
doc[1].head = doc[3]
|
doc[1].head = doc[4]
|
||||||
doc[2].head = doc[3]
|
doc[2].head = doc[3]
|
||||||
doc[3].head = doc[0]
|
doc[3].head = doc[4]
|
||||||
doc[4].head = doc[4]
|
doc[5].head = doc[4]
|
||||||
doc[5].head = doc[6]
|
doc[6].head = doc[7]
|
||||||
doc[6].head = doc[4]
|
doc[7].head = doc[4]
|
||||||
doc[7].head = doc[6]
|
|
||||||
|
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
@ -271,7 +270,7 @@ def test_split_noun_coordination(
|
||||||
noun_construction_case2,
|
noun_construction_case2,
|
||||||
noun_construction_case3,
|
noun_construction_case3,
|
||||||
noun_construction_case4,
|
noun_construction_case4,
|
||||||
# noun_construction_case5,
|
noun_construction_case5,
|
||||||
noun_construction_case6,
|
noun_construction_case6,
|
||||||
noun_construction_case7,
|
noun_construction_case7,
|
||||||
noun_construction_case8,
|
noun_construction_case8,
|
||||||
|
@ -309,9 +308,9 @@ def test_split_noun_coordination(
|
||||||
assert all(isinstance(phrase, str) for phrase in case4_split)
|
assert all(isinstance(phrase, str) for phrase in case4_split)
|
||||||
assert case4_split == ["hot chicken wings", "hot soup"]
|
assert case4_split == ["hot chicken wings", "hot soup"]
|
||||||
|
|
||||||
# #test 5: multiple modifiers
|
# #test 5: same # of modifiers as nouns
|
||||||
# case5_split = split_noun_coordination(noun_construction_case5)
|
case5_split = split_noun_coordination(noun_construction_case5)
|
||||||
# assert case5_split == None
|
assert case5_split == None
|
||||||
|
|
||||||
# test 6: modifier phrases
|
# test 6: modifier phrases
|
||||||
case6_split = split_noun_coordination(noun_construction_case6)
|
case6_split = split_noun_coordination(noun_construction_case6)
|
||||||
|
@ -325,6 +324,7 @@ def test_split_noun_coordination(
|
||||||
|
|
||||||
# test 7:
|
# test 7:
|
||||||
case7_split = split_noun_coordination(noun_construction_case7)
|
case7_split = split_noun_coordination(noun_construction_case7)
|
||||||
|
print(case7_split)
|
||||||
assert case7_split == ["fresh apples", "juicy apples"]
|
assert case7_split == ["fresh apples", "juicy apples"]
|
||||||
|
|
||||||
# test 8:
|
# test 8:
|
||||||
|
@ -337,12 +337,11 @@ def test_split_noun_coordination(
|
||||||
|
|
||||||
# test 10:
|
# test 10:
|
||||||
case10_split = split_noun_coordination(noun_construction_case10)
|
case10_split = split_noun_coordination(noun_construction_case10)
|
||||||
assert case10_split == ["fresh apples", "quite sour apples", "chicken soup"]
|
assert case10_split == ['fresh apples', 'quite sour apples', 'fresh chicken wings', 'quite sour chicken wings']
|
||||||
|
|
||||||
# test 11:
|
# test 11:
|
||||||
case11_split = split_noun_coordination(noun_construction_case11)
|
case11_split = split_noun_coordination(noun_construction_case11)
|
||||||
assert case11_split == None
|
pass
|
||||||
|
|
||||||
|
|
||||||
################### test factory ##############################
|
################### test factory ##############################
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user