From 81c52c8ff22cba36f8f189919a8c9c1135eaceba Mon Sep 17 00:00:00 2001
From: India Kerle <india.kerle@nesta.org.uk>
Date: Thu, 29 Feb 2024 14:45:07 -0300
Subject: [PATCH] add usecase

---
 spacy/pipeline/__init__.py                    |   4 +-
 spacy/pipeline/coordinationruler.py           | 480 +++++++-----------
 .../tests/pipeline/test_coordinationruler.py  | 185 +++++--
 3 files changed, 314 insertions(+), 355 deletions(-)

diff --git a/spacy/pipeline/__init__.py b/spacy/pipeline/__init__.py
index 02c900310..52e30ad4f 100644
--- a/spacy/pipeline/__init__.py
+++ b/spacy/pipeline/__init__.py
@@ -1,5 +1,5 @@
 from .attributeruler import AttributeRuler
-from .coordinationruler import CoordinationSplitter
+#from .coordinationruler import CoordinationSplitter
 from .dep_parser import DependencyParser
 from .edit_tree_lemmatizer import EditTreeLemmatizer
 from .entity_linker import EntityLinker
@@ -22,7 +22,7 @@ from .trainable_pipe import TrainablePipe
 
 __all__ = [
     "AttributeRuler",
-    "CoordinationSplitter",
+    #"CoordinationSplitter",
     "DependencyParser",
     "EditTreeLemmatizer",
     "EntityLinker",
diff --git a/spacy/pipeline/coordinationruler.py b/spacy/pipeline/coordinationruler.py
index f2b62ac85..e171dca9b 100644
--- a/spacy/pipeline/coordinationruler.py
+++ b/spacy/pipeline/coordinationruler.py
@@ -1,7 +1,6 @@
 from typing import List, Callable, Optional, Union
 from pydantic import BaseModel, validator
 import re
-import en_core_web_sm
 
 from ..tokens import Doc
 from ..language import Language
@@ -9,313 +8,180 @@ from ..vocab import Vocab
 from .pipe import Pipe
 
 ########### DEFAULT COORDINATION SPLITTING RULES ##############
-
-
-def _split_duplicate_object(doc: Doc) -> Union[List[str], None]:
-    """Split a text with 2 verbs and 1 object (and optionally a subject) into
-       2 texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
-
-    i.e. 'I use and provide clinical supervision' -->
-    ['I use clinical supervision', 'I provide clinical supervision']
-
+ 
+def split_noun_coordination(doc: Doc) -> Union[List[str], None]:
+    """Identifies and splits phrases with multiple nouns, a modifier
+        and a conjunction.
+    
+    Examples:
+        - "apples and oranges" -> None
+        - "green apples and oranges" -> ["green apples", "green oranges"]
+        - "green apples and rotten oranges" -> None
+        - "apples and juicy oranges" -> ["juicy apples", "juicy oranges"]
+        - "hot chicken wings and soup" -> ["hot chicken wings", "hot soup"]
+        - "spicy ice cream and chicken wings" -> ["spicy ice cream", "spicy chicken wings"]
+    
     Args:
-        doc (Doc): The spaCy Doc object.
+        doc (Doc): The input document.
 
     Returns:
-        List[str]: The split texts.
+        Union[List[str], None]: A list of the coordinated noun phrases, 
+            or None if no coordinated noun phrases are found.
     """
-    sentences = []
-
-    for token in doc:
-        if token.pos_ == "VERB" and (token.dep_ == "ROOT" or token.dep_ == "conj"):
-
-            has_AND = False
-            has_second_verb = False
-            has_dobj = False
-            subject = None
-
-            # Find the subject if it exists
-            for possible_subject in token.head.children:
-                if possible_subject.dep_ in ["nsubj", "nsubjpass"]:
-                    subject = possible_subject
-                    break
-
-            for child in token.children:
-
-                if child.pos_ == "CCONJ" and child.lemma_ == "and":
-                    has_AND = True
-
-                if child.pos_ == "VERB" and child.dep_ == "conj":
-                    has_second_verb = True
-                    second_verb = child
-                    first_verb = token.head if token.dep_ == "conj" else token
-
-                    for descendant in second_verb.subtree:
-                        if descendant.dep_ == "dobj":
-                            has_dobj = True
-                            # Collect the full noun phrase for the direct object
-                            dobj_span = doc[
-                                descendant.left_edge.i : descendant.right_edge.i + 1
-                            ]
-                            dobj = dobj_span.text
-
-            if has_AND and has_second_verb and has_dobj:
-                subject_text = subject.text + " " if subject else ""
-                first_text = "{}{} {}".format(subject_text, first_verb, dobj)
-                second_text = "{}{} {}".format(subject_text, second_verb, dobj)
-
-                sentences.extend([first_text, second_text])
-
-    return sentences if sentences else None
-
-
-def _split_on_and(text: str) -> List[str]:
-    """Split a text on 'and' and return a list of the split texts.
-
-    Args:
-        text (str): The text to split.
-
-    Returns:
-        List[str]: The split texts.
-    """
-    text = re.sub(r"\s\s+", " ", text)
-
-    replacements = {
-        ";": ",",
-        ", and ,": " and ",
-        ", and,": " and ",
-        ",and ,": " and ",
-        ", and ": " and ",
-        " and ,": " and ",
-        ",and,": " and ",
-        " and,": " and ",
-        ",and ": " and ",
-    }
-    for old, new in replacements.items():
-        text = text.replace(old, new)
-
-    return [t.strip() for t in re.split(r",| and ", text)]
-
-
-def _split_duplicate_verb(doc: Doc) -> Union[List[str], None]:
-    """Split a text with 1 verb and 2 objects.
-
-    i.e. 'I love using smartphones and apps' -->
-    ['I love using smartphones', 'I love using apps']
-
-    Args:
-        doc (Doc): The spaCy Doc object.
-
-    Returns:
-        List[str]: The split texts.
-    """
-
-    for token in doc:
-
-        if token.pos_ == "VERB" and token.dep_ == "ROOT":
-
-            has_AND = False
-            has_dobj = False
-            has_sec_obj = False
-            subject = ""
-
-            for child in token.children:
-
-                if child.dep_ == "dobj":
-                    has_dobj = True
-
-                subject = child.text if child.dep_ == "nsubj" else subject
-
-                objects = " ".join(
-                    [
-                        c.text
-                        for c in token.subtree
-                        if c.text != token.text and c.dep_ != "nsubj"
-                    ]
-                )
-
-                split_objects = _split_on_and(objects)
-
-                object_list = []
-                for split in split_objects:
-                    object_list.append(split)
-
-                for subchild in child.children:
-
-                    if subchild.pos_ == "CCONJ" and subchild.lemma_ == "and":
-                        has_AND = True
-
-                    if subchild.dep_ == "conj":
-                        has_sec_obj = True
-
-                if has_AND and has_dobj and has_sec_obj:
-                    text_list = [
-                        f"{subject} {token.text} {split}.".strip()
-                        for split in object_list
-                    ]
-                    return [text.replace(" ..", ".") for text in text_list]
-
-    return None
-
-
-def _split_skill_mentions(doc: Doc) -> Union[List[str], None]:
-    """Split a text with 2 skills into 2 texts with 1 skill.
-
-        i.e. 'written and oral communication skills' -->
-    ['written communication skills', 'oral communication skills']
-
-    Args:
-        text (str): The text to split.
-
-    Returns:
-        List[str]: The split texts.
-    """
-    for token in doc:
-        if (
-            token.pos_ == "NOUN"
-            and token.lemma_ == "skill"
-            and token.idx == doc[-1].idx
-        ):
-
-            has_AND = False
-
-            root = [token for token in doc if token.dep_ == "ROOT"]
-            if root:
-                root = root[0]
-
-                for child in root.subtree:
-
-                    if child.pos_ == "CCONJ" and child.lemma_ == "and":
-                        has_AND = True
-
-                if has_AND:
-                    skill_def = " ".join(
-                        [c.text for c in root.subtree if c.text != token.text]
-                    )
-
-                    split_skills = _split_on_and(skill_def)
-
-                    skill_lists = []
-                    for split_skill in split_skills:
-                        skill_lists.append("{} {}".format(split_skill, token.text))
-
-                    return skill_lists
-    return None
-
-
-class SplittingRule(BaseModel):
-    function: Callable[[Doc], Union[List[str], None]]
-
-    @validator("function")
-    def check_return_type(cls, v):
-        nlp = en_core_web_sm.load()
-        dummy_doc = nlp("This is a dummy sentence.")
-        result = v(dummy_doc)
-        if result is not None:
-            if not isinstance(result, List):
-                raise ValueError(
-                    "The custom splitting rule must return None or a list."
-                )
-            elif not all(isinstance(item, str) for item in result):
-                raise ValueError(
-                    "The custom splitting rule must return None or a list of strings."
-                )
-        return v
-
-
-@Language.factory(
-    "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
-)
-def make_coordination_splitter(nlp: Language, name: str):
-    """Make a CoordinationSplitter component.
-
-    the default splitting rules include:
-
-    - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
-    - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
-    - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
-
-
-    Args:
-        nlp (Language): The spaCy Language object.
-        name (str): The name of the component.
-
-    RETURNS The CoordinationSplitter component.
-
-    DOCS: xxx
-    """
-
-    return CoordinationSplitter(nlp.vocab, name=name)
-
-
-class CoordinationSplitter(Pipe):
-    def __init__(
-        self,
-        vocab: Vocab,
-        name: str = "coordination_splitter",
-        rules: Optional[List[SplittingRule]] = None,
-    ) -> None:
-        self.name = name
-        self.vocab = vocab
-        if rules is None:
-            default_rules = [
-                _split_duplicate_object,
-                _split_duplicate_verb,
-                _split_skill_mentions,
-            ]
-            self.rules = [SplittingRule(function=rule) for rule in default_rules]
-        else:
-            # Ensure provided rules are wrapped in SplittingRule instances
-            self.rules = [
-                rule
-                if isinstance(rule, SplittingRule)
-                else SplittingRule(function=rule)
-                for rule in rules
-            ]
-
-    def clear_rules(self) -> None:
-        """Clear the default splitting rules."""
-        self.rules = []
-
-    def add_default_rules(self) -> List[SplittingRule]:
-        """Reset the default splitting rules."""
-        default_rules = [
-            _split_duplicate_object,
-            _split_duplicate_verb,
-            _split_skill_mentions,
-        ]
-        self.rules = [SplittingRule(function=rule) for rule in default_rules]
-
-    def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
-        """Add a single splitting rule to the default rules."""
-        validated_rule = SplittingRule(function=rule)
-        self.rules.append(validated_rule)
-
-    def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
-        """Add a list of splitting rules to the default rules.
-
-        Args:
-            rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
-        """
-        for rule in rules:
-            # Wrap each rule in a SplittingRule instance to ensure it's validated
-            validated_rule = SplittingRule(function=rule)
-            self.rules.append(validated_rule)
-
-    def __call__(self, doc: Doc) -> Doc:
-        """Apply the splitting rules to the doc.
-
-        Args:
-            doc (Doc): The spaCy Doc object.
-
-        Returns:
-            Doc: The modified spaCy Doc object.
-        """
-        if doc.lang_ != "en":
-            return doc
-
-        for rule in self.rules:
-            split = rule.function(doc)
-            if split:
-                return Doc(doc.vocab, words=split)
-        return doc
+    def _split_doc(doc: Doc) -> bool:
+        noun_modified = False
+        has_conjunction = False
+        
+        for token in doc:
+            if token.head.pos_ == 'NOUN': ## check to see that the phrase is a noun phrase
+                has_modifier = any(child.dep_ == 'amod' for child in token.head.children) #check to see if the noun has a modifier
+                if has_modifier:
+                    noun_modified = True
+            # check if there is a conjunction linked directly to a noun
+            if token.dep_ == 'conj' and token.head.pos_ == 'NOUN':
+                has_conjunction = True
+        
+        return True if noun_modified and has_conjunction else False
+    
+    phrases = []
+    modified_nouns = set()  
+    to_split = _split_doc(doc)
+    
+    if to_split: 
+        for token in doc:
+            if token.dep_ == "amod" and token.head.pos_ == "NOUN":
+                modifier = token.text
+                head_noun = token.head
+                
+                if head_noun not in modified_nouns:
+                    nouns_to_modify = [head_noun] + list(head_noun.conjuncts)
+                                        
+                    for noun in nouns_to_modify:
+                        compound_parts = [child.text for child in noun.lefts if child.dep_ == "compound"]
+                        complete_noun_phrase = " ".join(compound_parts + [noun.text])        
+                        phrases.append(f"{modifier} {complete_noun_phrase}")
+                        modified_nouns.add(noun)  # Mark this noun as modified
+
+        return phrases if phrases != [] else None
+    else:
+        return None
+
+
+###############################################################
+
+# class SplittingRule(BaseModel):
+#     function: Callable[[Doc], Union[List[str], None]]
+
+#     @validator("function")
+#     def check_return_type(cls, v):
+#         nlp = en_core_web_sm.load()
+#         dummy_doc = nlp("This is a dummy sentence.")
+#         result = v(dummy_doc)
+#         if result is not None:
+#             if not isinstance(result, List):
+#                 raise ValueError(
+#                     "The custom splitting rule must return None or a list."
+#                 )
+#             elif not all(isinstance(item, str) for item in result):
+#                 raise ValueError(
+#                     "The custom splitting rule must return None or a list of strings."
+#                 )
+#         return v
+
+
+# @Language.factory(
+#     "coordination_splitter", requires=["token.dep", "token.tag", "token.pos"]
+# )
+# def make_coordination_splitter(nlp: Language, name: str):
+#     """Make a CoordinationSplitter component.
+
+#     the default splitting rules include:
+
+#     - _split_duplicate_object: Split a text with 2 verbs and 1 object (and optionally a subject) into two texts each with 1 verb, the shared object (and its modifiers), and the subject if present.
+#     - _split_duplicate_verb: Split a text with 1 verb and 2 objects into two texts each with 1 verb and 1 object.
+#     - _split_skill_mentions: Split a text with 2 skills into 2 texts with 1 skill (the phrase must end with 'skills' and the skills must be separated by 'and')
+
+
+#     Args:
+#         nlp (Language): The spaCy Language object.
+#         name (str): The name of the component.
+
+#     RETURNS The CoordinationSplitter component.
+
+#     DOCS: xxx
+#     """
+
+#     return CoordinationSplitter(nlp.vocab, name=name)
+
+
+# class CoordinationSplitter(Pipe):
+#     def __init__(
+#         self,
+#         vocab: Vocab,
+#         name: str = "coordination_splitter",
+#         rules: Optional[List[SplittingRule]] = None,
+#     ) -> None:
+#         self.name = name
+#         self.vocab = vocab
+#         if rules is None:
+#             default_rules = [
+#                 _split_duplicate_object,
+#                 _split_duplicate_verb,
+#                 _split_skill_mentions,
+#             ]
+#             self.rules = [SplittingRule(function=rule) for rule in default_rules]
+#         else:
+#             # Ensure provided rules are wrapped in SplittingRule instances
+#             self.rules = [
+#                 rule
+#                 if isinstance(rule, SplittingRule)
+#                 else SplittingRule(function=rule)
+#                 for rule in rules
+#             ]
+
+#     def clear_rules(self) -> None:
+#         """Clear the default splitting rules."""
+#         self.rules = []
+
+#     def add_default_rules(self) -> List[SplittingRule]:
+#         """Reset the default splitting rules."""
+#         default_rules = [
+#             _split_duplicate_object,
+#             _split_duplicate_verb,
+#             _split_skill_mentions,
+#         ]
+#         self.rules = [SplittingRule(function=rule) for rule in default_rules]
+
+#     def add_rule(self, rule: Callable[[Doc], Union[List[str], None]]) -> None:
+#         """Add a single splitting rule to the default rules."""
+#         validated_rule = SplittingRule(function=rule)
+#         self.rules.append(validated_rule)
+
+#     def add_rules(self, rules: List[Callable[[Doc], Union[List[str], None]]]) -> None:
+#         """Add a list of splitting rules to the default rules.
+
+#         Args:
+#             rules (List[Callable[[Doc], Union[List[str], None]]]): A list of functions to be added as splitting rules.
+#         """
+#         for rule in rules:
+#             # Wrap each rule in a SplittingRule instance to ensure it's validated
+#             validated_rule = SplittingRule(function=rule)
+#             self.rules.append(validated_rule)
+
+#     def __call__(self, doc: Doc) -> Doc:
+#         """Apply the splitting rules to the doc.
+
+#         Args:
+#             doc (Doc): The spaCy Doc object.
+
+#         Returns:
+#             Doc: The modified spaCy Doc object.
+#         """
+#         if doc.lang_ != "en":
+#             return doc
+
+#         for rule in self.rules:
+#             split = rule.function(doc)
+#             if split:
+#                 return Doc(doc.vocab, words=split)
+#         return doc
diff --git a/spacy/tests/pipeline/test_coordinationruler.py b/spacy/tests/pipeline/test_coordinationruler.py
index be439e9c5..7ca8f39f4 100644
--- a/spacy/tests/pipeline/test_coordinationruler.py
+++ b/spacy/tests/pipeline/test_coordinationruler.py
@@ -1,66 +1,159 @@
 import pytest
 from typing import List
+
 from spacy.tokens import Doc
+import spacy
 
-import en_core_web_sm
-
+from spacy.pipeline.coordinationruler import split_noun_coordination
 
 @pytest.fixture
 def nlp():
-    return en_core_web_sm.load()
+    return spacy.blank("en")
 
+### NOUN CONSTRUCTION CASES ###
+@pytest.fixture
+def noun_construction_case1(nlp):
+    words = ["apples", "and", "oranges"]
+    spaces = [True, True, False]  # Indicates whether the word is followed by a space
+    pos_tags = ["NOUN", "CCONJ", "NOUN"]
+    dep_relations = ["nsubj", "cc", "conj"]
 
-def _my_custom_splitting_rule(doc: Doc) -> List[str]:
-    split_phrases = []
-    for token in doc:
-        if token.text == "read":
-            split_phrases.append("test1")
-            split_phrases.append("test2")
-    return split_phrases
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
+    #set pos_ and dep_ attributes
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+        
+    # # define head relationships manually
+    doc[1].head = doc[2]  # "and" -> "oranges"
+    doc[2].head = doc[0]  # "oranges" -> "apples"
+    doc[0].head = doc[0] 
+    
+    return doc
+    
+@pytest.fixture
+def noun_construction_case2(nlp):
+    words = ["red", "apples", "and", "oranges"]
+    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space
+    pos_tags = ["ADJ", "NOUN", "CCONJ", "NOUN"]
+    dep_relations = ["amod", "nsubj", "cc", "conj"]
 
-def test_coordinationruler(nlp):
-    doc = nlp("I read and write books")
-    assert len(doc) == 5
-    assert [d.text for d in doc] == ["I", "read", "and", "write", "books"]
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    assert len(coord_splitter.rules) == 3
-    assert coord_splitter.name == "coordination_splitter"
-    doc_split = coord_splitter(doc)
-    assert len(doc_split) == 2
-    assert [t.text for t in doc_split] == ["I read books", "I write books"]
+    # Create a Doc object manually
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
+    #set pos_ and dep_ attributes
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+        
+    # define head relationships manually
+    doc[0].head = doc[1]  
+    doc[2].head = doc[3]  
+    doc[3].head = doc[1]  
+    
+    return doc
 
-def test_coordinationruler_clear_rules(nlp):
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    assert len(coord_splitter.rules) == 3
-    coord_splitter.clear_rules()
-    assert len(coord_splitter.rules) == 0
-    assert coord_splitter.rules == []
+@pytest.fixture
+def noun_construction_case3(nlp):
+    words = ["apples", "and", "juicy", "oranges"]
+    spaces = [True, True, True, False]  # Indicates whether the word is followed by a space.
+    pos_tags = ["NOUN", "CCONJ", "ADJ", "NOUN"]
+    dep_relations = ["nsubj", "cc", "amod", "conj"]
 
+    #create a Doc object manually
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
-def test_coordinationruler_add_rule(nlp):
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    assert len(coord_splitter.rules) == 3
-    coord_splitter.add_rule(_my_custom_splitting_rule)
-    assert len(coord_splitter.rules) == 4
+    #set POS and dependency tags
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
 
+    #defining head relationships manually
+    doc[0].head = doc[0]  # "apples" as root, pointing to itself for simplicity.
+    doc[1].head = doc[3]  # "and" -> "oranges"
+    doc[2].head = doc[3]  # "juicy" -> "oranges"
+    doc[3].head = doc[0]  # "oranges" -> "apples", indicating a conjunctive relationship
+    
+    return doc
 
-def test_coordinationruler_add_rules(nlp):
-    doc = nlp("I read and write books")
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    coord_splitter.clear_rules()
-    coord_splitter.add_rules([_my_custom_splitting_rule, _my_custom_splitting_rule])
-    assert len(coord_splitter.rules) == 2
-    doc_split = coord_splitter(doc)
-    assert len(doc_split) == 2
+@pytest.fixture
+def noun_construction_case4(nlp):
+    words = ["hot", "chicken", "wings", "and", "soup"]
+    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
+    pos_tags= ["ADJ", "NOUN", "NOUN", "CCONJ", "NOUN"]
+    dep_relations = ["amod", "compound", "ROOT", "cc", "conj"]
 
-    assert [t.text for t in doc_split] == ["test1", "test2"]
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
 
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
 
-def test_coordinationruler_add_default_rules(nlp):
-    coord_splitter = nlp.add_pipe("coordination_splitter")
-    coord_splitter.clear_rules()
-    assert len(coord_splitter.rules) == 0
-    coord_splitter.add_default_rules()
-    assert len(coord_splitter.rules) == 3
+    # Define head relationships manually for "hot chicken wings and soup".
+    doc[0].head = doc[2]  # "hot" -> "wings"
+    doc[1].head = doc[2]  # "chicken" -> "wings"
+    doc[2].head = doc[2]  # "wings" as root
+    doc[3].head = doc[4]  # "and" -> "soup"
+    doc[4].head = doc[2]  # "soup" -> "wings"
+    
+    return doc
+
+@pytest.fixture
+def noun_construction_case5(nlp):
+    words = ["green", "apples", "and", "rotten", "oranges"]
+    spaces = [True, True, True, True, False]  # Indicates whether the word is followed by a space.
+    pos_tags = ["ADJ", "NOUN", "CCONJ", "ADJ", "NOUN"]
+    dep_relations = ["amod", "ROOT", "cc", "amod", "conj"]
+
+    doc = Doc(nlp.vocab, words=words, spaces=spaces)
+
+    # Set POS and dependency tags.
+    for token, pos, dep in zip(doc, pos_tags, dep_relations):
+        token.pos_ = pos
+        token.dep_ = dep
+
+    # Define head relationships manually for "green apples and rotten oranges".
+    doc[0].head = doc[1]  # "green" -> "apples"
+    doc[1].head = doc[1]  # "apples" as root
+    doc[2].head = doc[4]  # "and" -> "oranges"
+    doc[3].head = doc[4]  # "rotten" -> "oranges"
+    doc[4].head = doc[1]  # "oranges" -> "apples"
+    
+    return doc
+
+#test split_noun_coordination on 5 different cases
+def test_split_noun_coordination(noun_construction_case1, 
+                                 noun_construction_case2, 
+                                 noun_construction_case3, 
+                                 noun_construction_case4, 
+                                 noun_construction_case5):
+    
+    #test 1: no modifier - it should return None from _split_doc
+    case1_split = split_noun_coordination(noun_construction_case1)
+    assert case1_split == None
+    
+    #test 2: modifier is at the beginning of the noun phrase
+    case2_split = split_noun_coordination(noun_construction_case2)
+    assert len(case2_split) == 2
+    assert isinstance(case2_split, list)
+    assert all(isinstance(phrase, str) for phrase in case2_split)
+    assert case2_split == ["red apples", "red oranges"]
+    
+    #test 3: modifier is at the end of the noun phrase
+    case3_split = split_noun_coordination(noun_construction_case3)
+    assert len(case3_split) == 2
+    assert isinstance(case3_split, list)
+    assert all(isinstance(phrase, str) for phrase in case3_split)
+    assert case3_split == ["juicy apples", "juicy oranges"]
+    
+    #test 4: deal with compound nouns
+    case4_split = split_noun_coordination(noun_construction_case4)
+    assert len(case4_split) == 2
+    assert isinstance(case4_split, list)
+    assert all(isinstance(phrase, str) for phrase in case4_split)
+    assert case4_split == ["hot chicken wings", "hot soup"]
+    
+    #test 5: multiple modifiers
+    case5_split = split_noun_coordination(noun_construction_case5)
+    assert case5_split == None 
\ No newline at end of file