Merge pull request #284 from olegzd/olegzd/example/inventoryCount

Added reloadable English() example for inventory counting
This commit is contained in:
Matthew Honnibal 2016-03-25 09:48:47 +11:00
commit 9cd21ad5b5
5 changed files with 167 additions and 1 deletions

3
.gitignore vendored
View File

@ -96,5 +96,8 @@ setup.py
# Windows local helper files # Windows local helper files
*.bat *.bat
# Mac OS X
*.DS_Store
# Komodo project files # Komodo project files
*.komodoproject *.komodoproject

View File

@ -0,0 +1,5 @@
An example of inventory counting using SpaCy.io NLP library. Meant to show how to instantiate Spacy's English class, and allow reusability by reloading the main module.
In the future, a better implementation of this library would be to apply machine learning to each query and learn what to classify as the quantitative statement (55 kgs OF), vs the actual item of count (how likely is a preposition object to be the item of count if x,y,z qualifications appear in the statement).

View File

@ -0,0 +1,35 @@
class Inventory:
"""
Inventory class - a struct{} like feature to house inventory counts
across modules.
"""
originalQuery = None
item = ""
unit = ""
amount = ""
def __init__(self, statement):
"""
Constructor - only takes in the original query/statement
:return: new Inventory object
"""
self.originalQuery = statement
pass
def __str__(self):
return str(self.amount) + ' ' + str(self.unit) + ' ' + str(self.item)
def printInfo(self):
print '-------------Inventory Count------------'
print "Original Query: " + str(self.originalQuery)
print 'Amount: ' + str(self.amount)
print 'Unit: ' + str(self.unit)
print 'Item: ' + str(self.item)
print '----------------------------------------'
def isValid(self):
if not self.item or not self.unit or not self.amount or not self.originalQuery:
return False
else:
return True

View File

@ -0,0 +1,92 @@
from inventory import Inventory
def runTest(nlp):
testset = []
testset += [nlp(u'6 lobster cakes')]
testset += [nlp(u'6 avacados')]
testset += [nlp(u'fifty five carrots')]
testset += [nlp(u'i have 55 carrots')]
testset += [nlp(u'i got me some 9 cabbages')]
testset += [nlp(u'i got 65 kgs of carrots')]
result = []
for doc in testset:
c = decodeInventoryEntry_level1(doc)
if not c.isValid():
c = decodeInventoryEntry_level2(doc)
result.append(c)
for i in result:
i.printInfo()
def decodeInventoryEntry_level1(document):
"""
Decodes a basic entry such as: '6 lobster cake' or '6' cakes
@param document : NLP Doc object
:return: Status if decoded correctly (true, false), and Inventory object
"""
count = Inventory(str(document))
for token in document:
if token.pos_ == (u'NOUN' or u'NNS' or u'NN'):
item = str(token)
for child in token.children:
if child.dep_ == u'compound' or child.dep_ == u'ad':
item = str(child) + str(item)
elif child.dep_ == u'nummod':
count.amount = str(child).strip()
for numerical_child in child.children:
# this isn't arithmetic rather than treating it such as a string
count.amount = str(numerical_child) + str(count.amount).strip()
else:
print "WARNING: unknown child: " + str(child) + ':'+str(child.dep_)
count.item = item
count.unit = item
return count
def decodeInventoryEntry_level2(document):
"""
Entry level 2, a more complicated parsing scheme that covers examples such as
'i have 80 boxes of freshly baked pies'
@document @param document : NLP Doc object
:return: Status if decoded correctly (true, false), and Inventory object-
"""
count = Inventory(str(document))
for token in document:
# Look for a preposition object that is a noun (this is the item we are counting).
# If found, look at its' dependency (if a preposition that is not indicative of
# inventory location, the dependency of the preposition must be a noun
if token.dep_ == (u'pobj' or u'meta') and token.pos_ == (u'NOUN' or u'NNS' or u'NN'):
item = ''
# Go through all the token's children, these are possible adjectives and other add-ons
# this deals with cases such as 'hollow rounded waffle pancakes"
for i in token.children:
item += ' ' + str(i)
item += ' ' + str(token)
count.item = item
# Get the head of the item:
if token.head.dep_ != u'prep':
# Break out of the loop, this is a confusing entry
break
else:
amountUnit = token.head.head
count.unit = str(amountUnit)
for inner in amountUnit.children:
if inner.pos_ == u'NUM':
count.amount += str(inner)
return count

View File

@ -0,0 +1,31 @@
import inventoryCount as mainModule
import os
from spacy.en import English, LOCAL_DATA_DIR
data_dir = os.environ.get('SPACY_DATA', LOCAL_DATA_DIR)
if __name__ == '__main__':
"""
Main module for this example - loads the English main NLP class,
and keeps it in RAM while waiting for the user to re-run it. Allows the
developer to re-edit their module under testing without having
to wait as long to load the English class
"""
# Set the NLP object here for the parameters you want to see,
# or just leave it blank and get all the opts
print "Loading English module... this will take a while."
nlp = English()
print "Done loading English module."
while True:
try:
reload(mainModule)
mainModule.runTest(nlp)
raw_input('================ To reload main module, press Enter ================')
except Exception, e:
print "Unexpected error: " + str(e)
continue