mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-26 05:31:15 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			62 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			62 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | ||
| # coding: utf8
 | ||
| """A simple example of extracting relations between phrases and entities using
 | ||
| spaCy's named entity recognizer and the dependency parse. Here, we extract
 | ||
| money and currency values (entities labelled as MONEY) and then check the
 | ||
| dependency tree to find the noun phrase they are referring to – for example:
 | ||
| $9.4 million --> Net income.
 | ||
| 
 | ||
| Compatible with: spaCy v2.0.0+
 | ||
| """
 | ||
| from __future__ import unicode_literals, print_function
 | ||
| 
 | ||
| import plac
 | ||
| import spacy
 | ||
| 
 | ||
| 
 | ||
| TEXTS = [
 | ||
|     'Net income was $9.4 million compared to the prior year of $2.7 million.',
 | ||
|     'Revenue exceeded twelve billion dollars, with a loss of $1b.',
 | ||
| ]
 | ||
| 
 | ||
| 
 | ||
| @plac.annotations(
 | ||
|     model=("Model to load (needs parser and NER)", "positional", None, str))
 | ||
| def main(model='en_core_web_sm'):
 | ||
|     nlp = spacy.load(model)
 | ||
|     print("Loaded model '%s'" % model)
 | ||
|     print("Processing %d texts" % len(TEXTS))
 | ||
| 
 | ||
|     for text in TEXTS:
 | ||
|         doc = nlp(text)
 | ||
|         relations = extract_currency_relations(doc)
 | ||
|         for r1, r2 in relations:
 | ||
|             print('{:<10}\t{}\t{}'.format(r1.text, r2.ent_type_, r2.text))
 | ||
| 
 | ||
| 
 | ||
| def extract_currency_relations(doc):
 | ||
|     # merge entities and noun chunks into one token
 | ||
|     for span in [*list(doc.ents), *list(doc.noun_chunks)]:
 | ||
|         span.merge()
 | ||
| 
 | ||
|     relations = []
 | ||
|     for money in filter(lambda w: w.ent_type_ == 'MONEY', doc):
 | ||
|         if money.dep_ in ('attr', 'dobj'):
 | ||
|             subject = [w for w in money.head.lefts if w.dep_ == 'nsubj']
 | ||
|             if subject:
 | ||
|                 subject = subject[0]
 | ||
|                 relations.append((subject, money))
 | ||
|         elif money.dep_ == 'pobj' and money.head.dep_ == 'prep':
 | ||
|             relations.append((money.head.head, money))
 | ||
|     return relations
 | ||
| 
 | ||
| 
 | ||
| if __name__ == '__main__':
 | ||
|     plac.call(main)
 | ||
| 
 | ||
|     # Expected output:
 | ||
|     # Net income      MONEY   $9.4 million
 | ||
|     # the prior year  MONEY   $2.7 million
 | ||
|     # Revenue         MONEY   twelve billion dollars
 | ||
|     # a loss          MONEY   1b
 |