diff --git a/.gitignore b/.gitignore index 40a800245..5c75b8b05 100644 --- a/.gitignore +++ b/.gitignore @@ -96,5 +96,8 @@ setup.py # Windows local helper files *.bat +# Mac OS X +*.DS_Store + # Komodo project files -*.komodoproject \ No newline at end of file +*.komodoproject diff --git a/examples/InventoryCount/Instructions.md b/examples/InventoryCount/Instructions.md new file mode 100644 index 000000000..456f5d4fe --- /dev/null +++ b/examples/InventoryCount/Instructions.md @@ -0,0 +1,5 @@ +An example of inventory counting using SpaCy.io NLP library. Meant to show how to instantiate Spacy's English class, and allow reusability by reloading the main module. + +In the future, a better implementation of this library would be to apply machine learning to each query and learn what to classify as the quantitative statement (55 kgs OF), vs the actual item of count (how likely is a preposition object to be the item of count if x,y,z qualifications appear in the statement). + + diff --git a/examples/InventoryCount/inventory.py b/examples/InventoryCount/inventory.py new file mode 100644 index 000000000..abc031513 --- /dev/null +++ b/examples/InventoryCount/inventory.py @@ -0,0 +1,35 @@ +class Inventory: + """ + Inventory class - a struct{} like feature to house inventory counts + across modules. + """ + originalQuery = None + item = "" + unit = "" + amount = "" + + def __init__(self, statement): + """ + Constructor - only takes in the original query/statement + :return: new Inventory object + """ + + self.originalQuery = statement + pass + + def __str__(self): + return str(self.amount) + ' ' + str(self.unit) + ' ' + str(self.item) + + def printInfo(self): + print '-------------Inventory Count------------' + print "Original Query: " + str(self.originalQuery) + print 'Amount: ' + str(self.amount) + print 'Unit: ' + str(self.unit) + print 'Item: ' + str(self.item) + print '----------------------------------------' + + def isValid(self): + if not self.item or not self.unit or not self.amount or not self.originalQuery: + return False + else: + return True diff --git a/examples/InventoryCount/inventoryCount.py b/examples/InventoryCount/inventoryCount.py new file mode 100644 index 000000000..b1b7b43c8 --- /dev/null +++ b/examples/InventoryCount/inventoryCount.py @@ -0,0 +1,92 @@ +from inventory import Inventory + + +def runTest(nlp): + testset = [] + testset += [nlp(u'6 lobster cakes')] + testset += [nlp(u'6 avacados')] + testset += [nlp(u'fifty five carrots')] + testset += [nlp(u'i have 55 carrots')] + testset += [nlp(u'i got me some 9 cabbages')] + testset += [nlp(u'i got 65 kgs of carrots')] + + result = [] + for doc in testset: + c = decodeInventoryEntry_level1(doc) + if not c.isValid(): + c = decodeInventoryEntry_level2(doc) + result.append(c) + + for i in result: + i.printInfo() + + +def decodeInventoryEntry_level1(document): + """ + Decodes a basic entry such as: '6 lobster cake' or '6' cakes + @param document : NLP Doc object + :return: Status if decoded correctly (true, false), and Inventory object + """ + count = Inventory(str(document)) + for token in document: + if token.pos_ == (u'NOUN' or u'NNS' or u'NN'): + item = str(token) + + for child in token.children: + if child.dep_ == u'compound' or child.dep_ == u'ad': + item = str(child) + str(item) + elif child.dep_ == u'nummod': + count.amount = str(child).strip() + for numerical_child in child.children: + # this isn't arithmetic rather than treating it such as a string + count.amount = str(numerical_child) + str(count.amount).strip() + else: + print "WARNING: unknown child: " + str(child) + ':'+str(child.dep_) + + count.item = item + count.unit = item + + return count + + +def decodeInventoryEntry_level2(document): + """ + Entry level 2, a more complicated parsing scheme that covers examples such as + 'i have 80 boxes of freshly baked pies' + + @document @param document : NLP Doc object + :return: Status if decoded correctly (true, false), and Inventory object- + """ + + count = Inventory(str(document)) + + for token in document: + # Look for a preposition object that is a noun (this is the item we are counting). + # If found, look at its' dependency (if a preposition that is not indicative of + # inventory location, the dependency of the preposition must be a noun + + if token.dep_ == (u'pobj' or u'meta') and token.pos_ == (u'NOUN' or u'NNS' or u'NN'): + item = '' + + # Go through all the token's children, these are possible adjectives and other add-ons + # this deals with cases such as 'hollow rounded waffle pancakes" + for i in token.children: + item += ' ' + str(i) + + item += ' ' + str(token) + count.item = item + + # Get the head of the item: + if token.head.dep_ != u'prep': + # Break out of the loop, this is a confusing entry + break + else: + amountUnit = token.head.head + count.unit = str(amountUnit) + + for inner in amountUnit.children: + if inner.pos_ == u'NUM': + count.amount += str(inner) + return count + + diff --git a/examples/InventoryCount/main.py b/examples/InventoryCount/main.py new file mode 100644 index 000000000..497a740cb --- /dev/null +++ b/examples/InventoryCount/main.py @@ -0,0 +1,31 @@ +import inventoryCount as mainModule +import os +from spacy.en import English, LOCAL_DATA_DIR +data_dir = os.environ.get('SPACY_DATA', LOCAL_DATA_DIR) + +if __name__ == '__main__': + """ + Main module for this example - loads the English main NLP class, + and keeps it in RAM while waiting for the user to re-run it. Allows the + developer to re-edit their module under testing without having + to wait as long to load the English class + """ + + # Set the NLP object here for the parameters you want to see, + # or just leave it blank and get all the opts + print "Loading English module... this will take a while." + nlp = English() + print "Done loading English module." + while True: + try: + reload(mainModule) + mainModule.runTest(nlp) + raw_input('================ To reload main module, press Enter ================') + + + except Exception, e: + print "Unexpected error: " + str(e) + continue + + +