diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py index 1aa6525c8..1b8a1d359 100644 --- a/spacy/pipeline/coordinationruler.py +++ b/spacy/pipeline/coordinationruler.py @@ -26,22 +26,36 @@ def _split_doc(doc: Doc) -> bool: noun_modified = False has_conjunction = False - for token in doc: - if token.head.pos_ == "NOUN": ## check to see that the phrase is a noun phrase - has_modifier = any( - child.dep_ == "amod" for child in token.head.children - ) # check to see if the noun has a modifier - if has_modifier: - noun_modified = True + noun_count = 0 + modifiers = set() + for token in doc: + if token.pos_ == "NOUN": + noun_count += 1 + if token.head.pos_ == "NOUN": ## check to see that the phrase is a noun phrase + for child in token.head.children: + if child.dep_ in ["amod", "advmod", "nmod"]: + modifiers.add(child.text) + noun_modified = True + for child in token.children: + if child.dep_ == "conj" and child.pos_ == "ADJ": + modifiers.add(child.text) + # check if there is a conjunction in the phrase if token.pos_ == "CCONJ": has_conjunction = True - return ( - True if noun_modified and has_conjunction else False - ) # and not all_nouns_modified else False + modifier_count = len(modifiers) + noun_modified = modifier_count > 0 + + all_nouns_modified = modifier_count == noun_count + + if noun_modified and has_conjunction and not all_nouns_modified: + return True + + else: + return False def _collect_modifiers(token: Token) -> List[str]: """Collects adverbial modifiers for a given token. diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py index 7ead426cc..eb55df326 100644 --- a/spacy/tests/pipeline/test_coordinationruler.py +++ b/spacy/tests/pipeline/test_coordinationruler.py @@ -211,7 +211,7 @@ def noun_construction_case10(nlp): words = ["fresh", "but", "quite", "sour", "apples", "and", "chicken", "wings"] spaces = [True, True, True, True, True, True, True, False] pos_tags = ["ADJ", "CCONJ", "ADV", "ADJ", "NOUN", "CCONJ", "NOUN", "NOUN"] - dep_relations = ["amod", "cc", "advmod", "conj", "ROOT", "cc", "conj", "compound"] + dep_relations = ["amod", "cc", "advmod", "amod", "ROOT", "cc", "compound", "conj"] doc = Doc(nlp.vocab, words=words, spaces=spaces) @@ -219,14 +219,13 @@ def noun_construction_case10(nlp): token.pos_ = pos token.dep_ = dep - doc[0].head = doc[4] - doc[1].head = doc[3] - doc[2].head = doc[3] - doc[3].head = doc[0] - doc[4].head = doc[4] - doc[5].head = doc[6] - doc[6].head = doc[4] - doc[7].head = doc[6] + doc[0].head = doc[4] + doc[1].head = doc[4] + doc[2].head = doc[3] + doc[3].head = doc[4] + doc[5].head = doc[4] + doc[6].head = doc[7] + doc[7].head = doc[4] return doc @@ -271,7 +270,7 @@ def test_split_noun_coordination( noun_construction_case2, noun_construction_case3, noun_construction_case4, - # noun_construction_case5, + noun_construction_case5, noun_construction_case6, noun_construction_case7, noun_construction_case8, @@ -309,9 +308,9 @@ def test_split_noun_coordination( assert all(isinstance(phrase, str) for phrase in case4_split) assert case4_split == ["hot chicken wings", "hot soup"] - # #test 5: multiple modifiers - # case5_split = split_noun_coordination(noun_construction_case5) - # assert case5_split == None + # #test 5: same # of modifiers as nouns + case5_split = split_noun_coordination(noun_construction_case5) + assert case5_split == None # test 6: modifier phrases case6_split = split_noun_coordination(noun_construction_case6) @@ -325,6 +324,7 @@ def test_split_noun_coordination( # test 7: case7_split = split_noun_coordination(noun_construction_case7) + print(case7_split) assert case7_split == ["fresh apples", "juicy apples"] # test 8: @@ -337,12 +337,11 @@ def test_split_noun_coordination( # test 10: case10_split = split_noun_coordination(noun_construction_case10) - assert case10_split == ["fresh apples", "quite sour apples", "chicken soup"] + assert case10_split == ['fresh apples', 'quite sour apples', 'fresh chicken wings', 'quite sour chicken wings'] - # test 11: + # test 11: case11_split = split_noun_coordination(noun_construction_case11) - assert case11_split == None - + pass ################### test factory ##############################