add usecase

2025-10-16 00:36:58 +03:00 · 2024-02-29 14:45:07 -03:00 · 2024-02-29 14:45:07 -03:00 · 81c52c8ff2
commit 81c52c8ff2
parent d66a616f31
3 changed files with 314 additions and 355 deletions
--- a/spacy/pipeline/init.py
+++ b/spacy/pipeline/init.py
@ -1,5 +1,5 @@
 from .attributeruler import AttributeRuler
-from .coordinationruler import CoordinationSplitter
+#from .coordinationruler import CoordinationSplitter
 from .dep_parser import DependencyParser
 from .edit_tree_lemmatizer import EditTreeLemmatizer
 from .entity_linker import EntityLinker
@ -22,7 +22,7 @@ from .trainable_pipe import TrainablePipe
 __all__ = [
    "AttributeRuler",
-    "CoordinationSplitter",
+    #"CoordinationSplitter",
    "DependencyParser",
    "EditTreeLemmatizer",
    "EntityLinker",
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@ -1,7 +1,6 @@
 from typing import List, Callable, Optional, Union
 from pydantic import BaseModel, validator
 import re
 import en_core_web_sm
 from ..tokens import Doc
 from ..language import Language
@ -9,313 +8,180 @@ from ..vocab import Vocab
 from .pipe import Pipe
 ########### DEFAULT COORDINATION SPLITTING RULES ##############
-
+ 
-
+def split_noun_coordination(doc: Doc) -> Union[List[str], None]:
-def _split_duplicate_object(doc: Doc) -> Union[List[str], None]:
+    """Identifies and splits phrases with multiple nouns, a modifier
-    """Split a text with 2 verbs and 1 object (and optionally a subject) into
+        and a conjunction.
-       2 texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
+    
-
+    Examples:
-    i.e. 'I use and provide clinical supervision' -->
+        - "apples and oranges" -> None
-    ['I use clinical supervision', 'I provide clinical supervision']
+        - "green apples and oranges" -> ["green apples", "green oranges"]
-
+        - "green apples and rotten oranges" -> None
        - "apples and juicy oranges" -> ["juicy apples", "juicy oranges"]
        - "hot chicken wings and soup" -> ["hot chicken wings", "hot soup"]
        - "spicy ice cream and chicken wings" -> ["spicy ice cream", "spicy chicken wings"]
    Args:
-        doc (Doc): The spaCy Doc object.
+        doc (Doc): The input document.
    Returns:
-        List[str]: The split texts.
+        Union[List[str], None]: A list of the coordinated noun phrases, 
            or None if no coordinated noun phrases are found.
    """
-    sentences = []
+    def _split_doc(doc: Doc) -> bool:
-
+        noun_modified = False
-    for token in doc:
+        has_conjunction = False
-        if token.pos_ == "VERB" and (token.dep_ == "ROOT" or token.dep_ == "conj"):
+        
-
+        for token in doc:
-            has_AND = False
+            if token.head.pos_ == 'NOUN': ## check to see that the phrase is a noun phrase
-            has_second_verb = False
+                has_modifier = any(child.dep_ == 'amod' for child in token.head.children) #check to see if the noun has a modifier
-            has_dobj = False
+                if has_modifier:
-            subject = None
+                    noun_modified = True
-
+            # check if there is a conjunction linked directly to a noun
-            # Find the subject if it exists
+            if token.dep_ == 'conj' and token.head.pos_ == 'NOUN':
-            for possible_subject in token.head.children:
+                has_conjunction = True
-                if possible_subject.dep_ in ["nsubj", "nsubjpass"]:
+        
-                    subject = possible_subject
+        return True if noun_modified and has_conjunction else False
-                    break
+    
-
+    phrases = []
-            for child in token.children:
+    modified_nouns = set()  
-
+    to_split = _split_doc(doc)
-                if child.pos_ == "CCONJ" and child.lemma_ == "and":
+    
-                    has_AND = True
+    if to_split: 
-
+        for token in doc:
-                if child.pos_ == "VERB" and child.dep_ == "conj":
+            if token.dep_ == "amod" and token.head.pos_ == "NOUN":
-                    has_second_verb = True
+                modifier = token.text
-                    second_verb = child
+                head_noun = token.head
-                    first_verb = token.head if token.dep_ == "conj" else token
+                
-
+                if head_noun not in modified_nouns:
-                    for descendant in second_verb.subtree:
+                    nouns_to_modify = [head_noun] + list(head_noun.conjuncts)
-                        if descendant.dep_ == "dobj":
+                                        
-                            has_dobj = True
+                    for noun in nouns_to_modify:
-                            # Collect the full noun phrase for the direct object
+                        compound_parts = [child.text for child in noun.lefts if child.dep_ == "compound"]
-                            dobj_span = doc[
+                        complete_noun_phrase = " ".join(compound_parts + [noun.text])        
-                                descendant.left_edge.i : descendant.right_edge.i + 1
+                        phrases.append(f"{modifier} {complete_noun_phrase}")
-                            ]
+                        modified_nouns.add(noun)  # Mark this noun as modified
-                            dobj = dobj_span.text
+
-
+        return phrases if phrases != [] else None
-            if has_AND and has_second_verb and has_dobj:
+    else:
-                subject_text = subject.text + " " if subject else ""
+        return None
-                first_text = "{}{} {}".format(subject_text, first_verb, dobj)
+
-                second_text = "{}{} {}".format(subject_text, second_verb, dobj)
+
-
+###############################################################
-                sentences.extend([first_text, second_text])
+
-
+# class SplittingRule(BaseModel):
-    return sentences if sentences else None
+#     function: Callable[[Doc], Union[List[str], None]]
-
+
-
+#     @validator("function")
-def _split_on_and(text: str) -> List[str]:
+#     def check_return_type(cls, v):
-    """Split a text on 'and' and return a list of the split texts.
+#         nlp = en_core_web_sm.load()
-
+#         dummy_doc = nlp("This is a dummy sentence.")
-    Args:
+#         result = v(dummy_doc)
-        text (str): The text to split.
+#         if result is not None:
-
+#             if not isinstance(result, List):
-    Returns:
+#                 raise ValueError(
-        List[str]: The split texts.
+#                     "The custom splitting rule must return None or a list."
-    """
+#                 )
-    text = re.sub(r"\s\s+", " ", text)
+#             elif not all(isinstance(item, str) for item in result):
-
+#                 raise ValueError(
-    replacements = {
+#                     "The custom splitting rule must return None or a list of strings."
-        ";": ",",
+#                 )
-        ", and ,": " and ",
+#         return v
-        ", and,": " and ",
+
-        ",and ,": " and ",
+
-        ", and ": " and ",
+# @Language.factory(
-        " and ,": " and ",
+#     "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
-        ",and,": " and ",
+# )
-        " and,": " and ",
+# def make_coordination_splitter(nlp: Language, name: str):
-        ",and ": " and ",
+#     """Make a CoordinationSplitter component.
-    }
+
-    for old, new in replacements.items():
+#     the default splitting rules include:
-        text = text.replace(old, new)
+
-
+#     - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
-    return [t.strip() for t in re.split(r",| and ", text)]
+#     - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
-
+#     - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
-
+
-def _split_duplicate_verb(doc: Doc) -> Union[List[str], None]:
+
-    """Split a text with 1 verb and 2 objects.
+#     Args:
-
+#         nlp (Language): The spaCy Language object.
-    i.e. 'I love using smartphones and apps' -->
+#         name (str): The name of the component.
-    ['I love using smartphones', 'I love using apps']
+
-
+#     RETURNS The CoordinationSplitter component.
-    Args:
+
-        doc (Doc): The spaCy Doc object.
+#     DOCS: xxx
-
+#     """
-    Returns:
+
-        List[str]: The split texts.
+#     return CoordinationSplitter(nlp.vocab, name=name)
-    """
+
-
+
-    for token in doc:
+# class CoordinationSplitter(Pipe):
-
+#     def __init__(
-        if token.pos_ == "VERB" and token.dep_ == "ROOT":
+#         self,
-
+#         vocab: Vocab,
-            has_AND = False
+#         name: str = "coordination_splitter",
-            has_dobj = False
+#         rules: Optional[List[SplittingRule]] = None,
-            has_sec_obj = False
+#     ) -> None:
-            subject = ""
+#         self.name = name
-
+#         self.vocab = vocab
-            for child in token.children:
+#         if rules is None:
-
+#             default_rules = [
-                if child.dep_ == "dobj":
+#                 _split_duplicate_object,
-                    has_dobj = True
+#                 _split_duplicate_verb,
-
+#                 _split_skill_mentions,
-                subject = child.text if child.dep_ == "nsubj" else subject
+#             ]
-
+#             self.rules = [SplittingRule(function=rule) for rule in default_rules]
-                objects = " ".join(
+#         else:
-                    [
+#             # Ensure provided rules are wrapped in SplittingRule instances
-                        c.text
+#             self.rules = [
-                        for c in token.subtree
+#                 rule
-                        if c.text != token.text and c.dep_ != "nsubj"
+#                 if isinstance(rule, SplittingRule)
-                    ]
+#                 else SplittingRule(function=rule)
-                )
+#                 for rule in rules
-
+#             ]
-                split_objects = _split_on_and(objects)
+
-
+#     def clear_rules(self) -> None:
-                object_list = []
+#         """Clear the default splitting rules."""
-                for split in split_objects:
+#         self.rules = []
-                    object_list.append(split)
+
-
+#     def add_default_rules(self) -> List[SplittingRule]:
-                for subchild in child.children:
+#         """Reset the default splitting rules."""
-
+#         default_rules = [
-                    if subchild.pos_ == "CCONJ" and subchild.lemma_ == "and":
+#             _split_duplicate_object,
-                        has_AND = True
+#             _split_duplicate_verb,
-
+#             _split_skill_mentions,
-                    if subchild.dep_ == "conj":
+#         ]
-                        has_sec_obj = True
+#         self.rules = [SplittingRule(function=rule) for rule in default_rules]
-
+
-                if has_AND and has_dobj and has_sec_obj:
+#     def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
-                    text_list = [
+#         """Add a single splitting rule to the default rules."""
-                        f"{subject} {token.text} {split}.".strip()
+#         validated_rule = SplittingRule(function=rule)
-                        for split in object_list
+#         self.rules.append(validated_rule)
-                    ]
+
-                    return [text.replace(" ..", ".") for text in text_list]
+#     def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
-
+#         """Add a list of splitting rules to the default rules.
-    return None
+
-
+#         Args:
-
+#             rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
-def _split_skill_mentions(doc: Doc) -> Union[List[str], None]:
+#         """
-    """Split a text with 2 skills into 2 texts with 1 skill.
+#         for rule in rules:
-
+#             # Wrap each rule in a SplittingRule instance to ensure it's validated
-        i.e. 'written and oral communication skills' -->
+#             validated_rule = SplittingRule(function=rule)
-    ['written communication skills', 'oral communication skills']
+#             self.rules.append(validated_rule)
-
+
-    Args:
+#     def __call__(self, doc: Doc) -> Doc:
-        text (str): The text to split.
+#         """Apply the splitting rules to the doc.
-
+
-    Returns:
+#         Args:
-        List[str]: The split texts.
+#             doc (Doc): The spaCy Doc object.
-    """
+
-    for token in doc:
+#         Returns:
-        if (
+#             Doc: The modified spaCy Doc object.
-            token.pos_ == "NOUN"
+#         """
-            and token.lemma_ == "skill"
+#         if doc.lang_ != "en":
-            and token.idx == doc[-1].idx
+#             return doc
-        ):
+
-
+#         for rule in self.rules:
-            has_AND = False
+#             split = rule.function(doc)
-
+#             if split:
-            root = [token for token in doc if token.dep_ == "ROOT"]
+#                 return Doc(doc.vocab, words=split)
-            if root:
+#         return doc
                root = root[0]
                for child in root.subtree:
                    if child.pos_ == "CCONJ" and child.lemma_ == "and":
                        has_AND = True
                if has_AND:
                    skill_def = " ".join(
                        [c.text for c in root.subtree if c.text != token.text]
                    )
                    split_skills = _split_on_and(skill_def)
                    skill_lists = []
                    for split_skill in split_skills:
                        skill_lists.append("{} {}".format(split_skill, token.text))
                    return skill_lists
    return None
 class SplittingRule(BaseModel):
    function: Callable[[Doc], Union[List[str], None]]
    @validator("function")
    def check_return_type(cls, v):
        nlp = en_core_web_sm.load()
        dummy_doc = nlp("This is a dummy sentence.")
        result = v(dummy_doc)
        if result is not None:
            if not isinstance(result, List):
                raise ValueError(
                    "The custom splitting rule must return None or a list."
                )
            elif not all(isinstance(item, str) for item in result):
                raise ValueError(
                    "The custom splitting rule must return None or a list of strings."
                )
        return v
@Language.factory(
    "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
 )
 def make_coordination_splitter(nlp: Language, name: str):
    """Make a CoordinationSplitter component.
    the default splitting rules include:
    - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
    - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
    - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
    Args:
        nlp (Language): The spaCy Language object.
        name (str): The name of the component.
    RETURNS The CoordinationSplitter component.
    DOCS: xxx
    """
    return CoordinationSplitter(nlp.vocab, name=name)
 class CoordinationSplitter(Pipe):
    def __init__(
        self,
        vocab: Vocab,
        name: str = "coordination_splitter",
        rules: Optional[List[SplittingRule]] = None,
    ) -> None:
        self.name = name
        self.vocab = vocab
        if rules is None:
            default_rules = [
                _split_duplicate_object,
                _split_duplicate_verb,
                _split_skill_mentions,
            ]
            self.rules = [SplittingRule(function=rule) for rule in default_rules]
        else:
            # Ensure provided rules are wrapped in SplittingRule instances
            self.rules = [
                rule
                if isinstance(rule, SplittingRule)
                else SplittingRule(function=rule)
                for rule in rules
            ]
    def clear_rules(self) -> None:
        """Clear the default splitting rules."""
        self.rules = []
    def add_default_rules(self) -> List[SplittingRule]:
        """Reset the default splitting rules."""
        default_rules = [
            _split_duplicate_object,
            _split_duplicate_verb,
            _split_skill_mentions,
        ]
        self.rules = [SplittingRule(function=rule) for rule in default_rules]
    def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
        """Add a single splitting rule to the default rules."""
        validated_rule = SplittingRule(function=rule)
        self.rules.append(validated_rule)
    def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
        """Add a list of splitting rules to the default rules.
        Args:
            rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
        """
        for rule in rules:
            # Wrap each rule in a SplittingRule instance to ensure it's validated
            validated_rule = SplittingRule(function=rule)
            self.rules.append(validated_rule)
    def __call__(self, doc: Doc) -> Doc:
        """Apply the splitting rules to the doc.
        Args:
            doc (Doc): The spaCy Doc object.
        Returns:
            Doc: The modified spaCy Doc object.
        """
        if doc.lang_ != "en":
            return doc
        for rule in self.rules:
            split = rule.function(doc)
            if split:
                return Doc(doc.vocab, words=split)
        return doc
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@ -1,66 +1,159 @@
 import pytest
 from typing import List
 from spacy.tokens import Doc
 import spacy
-import en_core_web_sm
+from spacy.pipeline.coordinationruler import split_noun_coordination
@pytest.fixture
 def nlp():
-    return en_core_web_sm.load()
+    return spacy.blank("en")
 ### NOUN CONSTRUCTION CASES ###
@pytest.fixture
 def noun_construction_case1(nlp):
    words = ["apples", "and", "oranges"]
    spaces = [True, True, False]  # Indicates whether the word is followed by a space
    pos_tags = ["NOUN", "CCONJ", "NOUN"]
    dep_relations = ["nsubj", "cc", "conj"]
-def _my_custom_splitting_rule(doc: Doc) -> List[str]:
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
    split_phrases = []
    for token in doc:
        if token.text == "read":
            split_phrases.append("test1")
            split_phrases.append("test2")
    return split_phrases
    #set pos_ and dep_ attributes
    for token, pos, dep in zip(doc, pos_tags, dep_relations):
        token.pos_ = pos
        token.dep_ = dep
    # # define head relationships manually
    doc[1].head = doc[2]  # "and" -> "oranges"
    doc[2].head = doc[0]  # "oranges" -> "apples"
    doc[0].head = doc[0] 
    return doc
@pytest.fixture
 def noun_construction_case2(nlp):
    words = ["red", "apples", "and", "oranges"]
    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space
    pos_tags = ["ADJ", "NOUN", "CCONJ", "NOUN"]
    dep_relations = ["amod", "nsubj", "cc", "conj"]
-def test_coordinationruler(nlp):
+    # Create a Doc object manually
-    doc = nlp("I read and write books")
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
    assert len(doc) == 5
    assert [d.text for d in doc] == ["I", "read", "and", "write", "books"]
    coord_splitter = nlp.add_pipe("coordination_splitter")
    assert len(coord_splitter.rules) == 3
    assert coord_splitter.name == "coordination_splitter"
    doc_split = coord_splitter(doc)
    assert len(doc_split) == 2
    assert [t.text for t in doc_split] == ["I read books", "I write books"]
    #set pos_ and dep_ attributes
    for token, pos, dep in zip(doc, pos_tags, dep_relations):
        token.pos_ = pos
        token.dep_ = dep
    # define head relationships manually
    doc[0].head = doc[1]  
    doc[2].head = doc[3]  
    doc[3].head = doc[1]  
    return doc
-def test_coordinationruler_clear_rules(nlp):
+@pytest.fixture
-    coord_splitter = nlp.add_pipe("coordination_splitter")
+def noun_construction_case3(nlp):
-    assert len(coord_splitter.rules) == 3
+    words = ["apples", "and", "juicy", "oranges"]
-    coord_splitter.clear_rules()
+    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space.
-    assert len(coord_splitter.rules) == 0
+    pos_tags = ["NOUN", "CCONJ", "ADJ", "NOUN"]
-    assert coord_splitter.rules == []
+    dep_relations = ["nsubj", "cc", "amod", "conj"]
    #create a Doc object manually
    doc = Doc(nlp.vocab, words=words, spaces=spaces)
-def test_coordinationruler_add_rule(nlp):
+    #set POS and dependency tags
-    coord_splitter = nlp.add_pipe("coordination_splitter")
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
-    assert len(coord_splitter.rules) == 3
+        token.pos_ = pos
-    coord_splitter.add_rule(_my_custom_splitting_rule)
+        token.dep_ = dep
    assert len(coord_splitter.rules) == 4
    #defining head relationships manually
    doc[0].head = doc[0]  # "apples" as root, pointing to itself for simplicity.
    doc[1].head = doc[3]  # "and" -> "oranges"
    doc[2].head = doc[3]  # "juicy" -> "oranges"
    doc[3].head = doc[0]  # "oranges" -> "apples", indicating a conjunctive relationship
    return doc
-def test_coordinationruler_add_rules(nlp):
+@pytest.fixture
-    doc = nlp("I read and write books")
+def noun_construction_case4(nlp):
-    coord_splitter = nlp.add_pipe("coordination_splitter")
+    words = ["hot", "chicken", "wings", "and", "soup"]
-    coord_splitter.clear_rules()
+    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
-    coord_splitter.add_rules([_my_custom_splitting_rule, _my_custom_splitting_rule])
+    pos_tags= ["ADJ", "NOUN", "NOUN", "CCONJ", "NOUN"]
-    assert len(coord_splitter.rules) == 2
+    dep_relations = ["amod", "compound", "ROOT", "cc", "conj"]
    doc_split = coord_splitter(doc)
    assert len(doc_split) == 2
-    assert [t.text for t in doc_split] == ["test1", "test2"]
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
    for token, pos, dep in zip(doc, pos_tags, dep_relations):
        token.pos_ = pos
        token.dep_ = dep
-def test_coordinationruler_add_default_rules(nlp):
+    # Define head relationships manually for "hot chicken wings and soup".
-    coord_splitter = nlp.add_pipe("coordination_splitter")
+    doc[0].head = doc[2]  # "hot" -> "wings"
-    coord_splitter.clear_rules()
+    doc[1].head = doc[2]  # "chicken" -> "wings"
-    assert len(coord_splitter.rules) == 0
+    doc[2].head = doc[2]  # "wings" as root
-    coord_splitter.add_default_rules()
+    doc[3].head = doc[4]  # "and" -> "soup"
-    assert len(coord_splitter.rules) == 3
+    doc[4].head = doc[2]  # "soup" -> "wings"
    return doc
@pytest.fixture
 def noun_construction_case5(nlp):
    words = ["green", "apples", "and", "rotten", "oranges"]
    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
    pos_tags = ["ADJ", "NOUN", "CCONJ", "ADJ", "NOUN"]
    dep_relations = ["amod", "ROOT", "cc", "amod", "conj"]
    doc = Doc(nlp.vocab, words=words, spaces=spaces)
    # Set POS and dependency tags.
    for token, pos, dep in zip(doc, pos_tags, dep_relations):
        token.pos_ = pos
        token.dep_ = dep
    # Define head relationships manually for "green apples and rotten oranges".
    doc[0].head = doc[1]  # "green" -> "apples"
    doc[1].head = doc[1]  # "apples" as root
    doc[2].head = doc[4]  # "and" -> "oranges"
    doc[3].head = doc[4]  # "rotten" -> "oranges"
    doc[4].head = doc[1]  # "oranges" -> "apples"
    return doc
 #test split_noun_coordination on 5 different cases
 def test_split_noun_coordination(noun_construction_case1, 
                                 noun_construction_case2, 
                                 noun_construction_case3, 
                                 noun_construction_case4, 
                                 noun_construction_case5):
    #test 1: no modifier - it should return None from _split_doc
    case1_split = split_noun_coordination(noun_construction_case1)
    assert case1_split == None
    #test 2: modifier is at the beginning of the noun phrase
    case2_split = split_noun_coordination(noun_construction_case2)
    assert len(case2_split) == 2
    assert isinstance(case2_split, list)
    assert all(isinstance(phrase, str) for phrase in case2_split)
    assert case2_split == ["red apples", "red oranges"]
    #test 3: modifier is at the end of the noun phrase
    case3_split = split_noun_coordination(noun_construction_case3)
    assert len(case3_split) == 2
    assert isinstance(case3_split, list)
    assert all(isinstance(phrase, str) for phrase in case3_split)
    assert case3_split == ["juicy apples", "juicy oranges"]
    #test 4: deal with compound nouns
    case4_split = split_noun_coordination(noun_construction_case4)
    assert len(case4_split) == 2
    assert isinstance(case4_split, list)
    assert all(isinstance(phrase, str) for phrase in case4_split)
    assert case4_split == ["hot chicken wings", "hot soup"]
    #test 5: multiple modifiers
    case5_split = split_noun_coordination(noun_construction_case5)
    assert case5_split == None