mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 05:01:02 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			93 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from inventory import Inventory
 | |
| 
 | |
| 
 | |
| def runTest(nlp):
 | |
|     testset = []
 | |
|     testset += [nlp(u'6 lobster cakes')]
 | |
|     testset += [nlp(u'6 avacados')]
 | |
|     testset += [nlp(u'fifty five carrots')]
 | |
|     testset += [nlp(u'i have 55 carrots')]
 | |
|     testset += [nlp(u'i got me some 9 cabbages')]
 | |
|     testset += [nlp(u'i got 65 kgs of carrots')]
 | |
| 
 | |
|     result = []
 | |
|     for doc in testset:
 | |
|         c = decodeInventoryEntry_level1(doc)
 | |
|         if not c.isValid():
 | |
|             c = decodeInventoryEntry_level2(doc)
 | |
|         result.append(c)
 | |
| 
 | |
|     for i in result:
 | |
|         i.printInfo()
 | |
| 
 | |
| 
 | |
| def decodeInventoryEntry_level1(document):
 | |
|     """
 | |
|     Decodes a basic entry such as: '6 lobster cake' or '6' cakes
 | |
|     @param document : NLP Doc object
 | |
|     :return: Status if decoded correctly (true, false), and Inventory object
 | |
|     """
 | |
|     count = Inventory(str(document))
 | |
|     for token in document:
 | |
|         if token.pos_ == (u'NOUN' or u'NNS' or u'NN'):
 | |
|             item = str(token)
 | |
| 
 | |
|             for child in token.children:
 | |
|                 if child.dep_ == u'compound' or child.dep_ == u'ad':
 | |
|                     item = str(child) + str(item)
 | |
|                 elif child.dep_ == u'nummod':
 | |
|                     count.amount = str(child).strip()
 | |
|                     for numerical_child in child.children:
 | |
|                         # this isn't arithmetic rather than treating it such as a string
 | |
|                         count.amount = str(numerical_child) + str(count.amount).strip()
 | |
|                 else:
 | |
|                     print "WARNING: unknown child: " + str(child) + ':'+str(child.dep_)
 | |
| 
 | |
|             count.item = item
 | |
|             count.unit = item
 | |
| 
 | |
|     return count
 | |
| 
 | |
| 
 | |
| def decodeInventoryEntry_level2(document):
 | |
|     """
 | |
|     Entry level 2, a more complicated parsing scheme that covers examples such as
 | |
|     'i have 80 boxes of freshly baked pies'
 | |
| 
 | |
|     @document @param document : NLP Doc object
 | |
|     :return: Status if decoded correctly (true, false), and Inventory object-
 | |
|     """
 | |
| 
 | |
|     count = Inventory(str(document))
 | |
| 
 | |
|     for token in document:
 | |
|         #  Look for a preposition object that is a noun (this is the item we are counting).
 | |
|         #  If found, look at its' dependency (if a preposition that is not indicative of
 | |
|         #  inventory location, the dependency of the preposition must be a noun
 | |
| 
 | |
|         if token.dep_ == (u'pobj' or u'meta') and token.pos_ == (u'NOUN' or u'NNS' or u'NN'):
 | |
|             item = ''
 | |
| 
 | |
|             #  Go through all the token's children, these are possible adjectives and other add-ons
 | |
|             #  this deals with cases such as 'hollow rounded waffle pancakes"
 | |
|             for i in token.children:
 | |
|                 item += ' ' + str(i)
 | |
| 
 | |
|             item += ' ' + str(token)
 | |
|             count.item = item
 | |
| 
 | |
|             # Get the head of the item:
 | |
|             if token.head.dep_ != u'prep':
 | |
|                 #  Break out of the loop, this is a confusing entry
 | |
|                 break
 | |
|             else:
 | |
|                 amountUnit = token.head.head
 | |
|                 count.unit = str(amountUnit)
 | |
| 
 | |
|                 for inner in amountUnit.children:
 | |
|                     if inner.pos_ == u'NUM':
 | |
|                         count.amount += str(inner)
 | |
|     return count
 | |
| 
 | |
| 
 |