mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Merge branch 'master' of https://github.com/explosion/spaCy
This commit is contained in:
		
						commit
						16ce7409e4
					
				|  | @ -9,6 +9,12 @@ from spacy.gold import GoldParse | |||
| from spacy.tagger import Tagger | ||||
| 
 | ||||
|   | ||||
| try: | ||||
|     unicode | ||||
| except: | ||||
|     unicode = str | ||||
| 
 | ||||
| 
 | ||||
| def train_ner(nlp, train_data, entity_types): | ||||
|     # Add new words to vocab. | ||||
|     for raw_text, _ in train_data: | ||||
|  | @ -24,7 +30,6 @@ def train_ner(nlp, train_data, entity_types): | |||
|             doc = nlp.make_doc(raw_text) | ||||
|             gold = GoldParse(doc, entities=entity_offsets) | ||||
|             ner.update(doc, gold) | ||||
|     ner.model.end_training() | ||||
|     return ner | ||||
| 
 | ||||
| def save_model(ner, model_dir): | ||||
|  | @ -33,8 +38,11 @@ def save_model(ner, model_dir): | |||
|         model_dir.mkdir() | ||||
|     assert model_dir.is_dir() | ||||
| 
 | ||||
|     with (model_dir / 'config.json').open('w') as file_: | ||||
|         json.dump(ner.cfg, file_) | ||||
|     with (model_dir / 'config.json').open('wb') as file_: | ||||
|         data = json.dumps(ner.cfg) | ||||
|         if isinstance(data, unicode): | ||||
|             data = data.encode('utf8') | ||||
|         file_.write(data) | ||||
|     ner.model.dump(str(model_dir / 'model')) | ||||
|     if not (model_dir / 'vocab').exists(): | ||||
|         (model_dir / 'vocab').mkdir() | ||||
|  |  | |||
|  | @ -7,6 +7,6 @@ thinc>=6.2.0,<6.3.0 | |||
| murmurhash>=0.26,<0.27 | ||||
| plac<0.9.3 | ||||
| six | ||||
| ujson | ||||
| ujson>=1.35 | ||||
| cloudpickle | ||||
| sputnik>=0.9.2,<0.10.0 | ||||
|  |  | |||
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							|  | @ -234,7 +234,7 @@ def setup_package(): | |||
|                 'cymem>=1.30,<1.32', | ||||
|                 'preshed>=0.46.0,<0.47.0', | ||||
|                 'thinc>=6.2.0,<6.3.0', | ||||
|                 'plac', | ||||
|                 'plac<0.9.3', | ||||
|                 'six', | ||||
|                 'cloudpickle', | ||||
|                 'pathlib', | ||||
|  |  | |||
							
								
								
									
										12
									
								
								spacy/tests/regression/test_issue792.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										12
									
								
								spacy/tests/regression/test_issue792.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,12 @@ | |||
| # coding: utf-8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import pytest | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail | ||||
| @pytest.mark.parametrize('text', ["This is a string ", "This is a string\u0020"]) | ||||
| def test_issue792(en_tokenizer, text): | ||||
|     """Test for Issue #792: Trailing whitespace is removed after parsing.""" | ||||
|     doc = en_tokenizer(text) | ||||
|     assert doc.text_with_ws == text | ||||
|  | @ -500,7 +500,8 @@ cdef class Doc: | |||
|         by the values of the given attribute ID. | ||||
| 
 | ||||
|         Example: | ||||
|             from spacy.en import English, attrs | ||||
|             from spacy.en import English | ||||
|             from spacy import attrs | ||||
|             nlp = English() | ||||
|             tokens = nlp(u'apple apple orange banana') | ||||
|             tokens.count_by(attrs.ORTH) | ||||
|  | @ -585,9 +586,6 @@ cdef class Doc: | |||
|             elif attr_id == POS: | ||||
|                 for i in range(length): | ||||
|                     tokens[i].pos = <univ_pos_t>values[i] | ||||
|             elif attr_id == TAG: | ||||
|                 for i in range(length): | ||||
|                     tokens[i].tag = <univ_pos_t>values[i] | ||||
|             elif attr_id == DEP: | ||||
|                 for i in range(length): | ||||
|                     tokens[i].dep = values[i] | ||||
|  |  | |||
|  | @ -12,10 +12,10 @@ | |||
|         "COMPANY_URL": "https://explosion.ai", | ||||
|         "DEMOS_URL": "https://demos.explosion.ai", | ||||
| 
 | ||||
|         "SPACY_VERSION": "1.5", | ||||
|         "SPACY_VERSION": "1.6", | ||||
|         "LATEST_NEWS": { | ||||
|             "url": "https://explosion.ai/blog/spacy-user-survey", | ||||
|             "title": "The results of the spaCy user survey" | ||||
|             "url": "https://explosion.ai/blog/deep-learning-formula-nlp", | ||||
|             "title": "The new deep learning formula for state-of-the-art NLP models" | ||||
|         }, | ||||
| 
 | ||||
|         "SOCIAL": { | ||||
|  |  | |||
|  | @ -232,7 +232,7 @@ | |||
|             "NLP with spaCy in 10 lines of code": { | ||||
|                 "url": "https://github.com/cytora/pycon-nlp-in-10-lines", | ||||
|                 "author": "Andraz Hribernik et al. (Cytora)", | ||||
|                 "tags": [ "jupyter" ] | ||||
|                 "tags": ["jupyter"] | ||||
|             }, | ||||
|             "Intro to NLP with spaCy": { | ||||
|                 "url": "https://nicschrading.com/project/Intro-to-NLP-with-spaCy/", | ||||
|  | @ -241,7 +241,7 @@ | |||
|             "NLP with spaCy and IPython Notebook": { | ||||
|                 "url": "http://blog.sharepointexperience.com/2016/01/nlp-and-sharepoint-part-1/", | ||||
|                 "author": "Dustin Miller (SharePoint)", | ||||
|                 "tags": [ "jupyter" ] | ||||
|                 "tags": ["jupyter"] | ||||
|             }, | ||||
|             "Getting Started with spaCy": { | ||||
|                 "url": "http://textminingonline.com/getting-started-with-spacy", | ||||
|  | @ -254,7 +254,7 @@ | |||
|             "NLP (almost) From Scratch - POS Network with spaCy": { | ||||
|                 "url": "http://sujitpal.blogspot.de/2016/07/nlp-almost-from-scratch-implementing.html", | ||||
|                 "author": "Sujit Pal", | ||||
|                 "tags": [ "gensim", "keras" ] | ||||
|                 "tags": ["gensim", "keras"] | ||||
|             }, | ||||
|             "NLP tasks with various libraries": { | ||||
|                 "url": "http://clarkgrubb.com/nlp", | ||||
|  | @ -270,44 +270,48 @@ | |||
|             "Modern NLP in Python – What you can learn about food by analyzing a million Yelp reviews": { | ||||
|                 "url": "http://nbviewer.jupyter.org/github/skipgram/modern-nlp-in-python/blob/master/executable/Modern_NLP_in_Python.ipynb", | ||||
|                 "author": "Patrick Harrison (S&P Global)", | ||||
|                 "tags": [ "jupyter", "gensim" ] | ||||
|                 "tags": ["jupyter", "gensim"] | ||||
|             }, | ||||
| 
 | ||||
|             "Deep Learning with custom pipelines and Keras": { | ||||
|                 "url": "https://explosion.ai/blog/spacy-deep-learning-keras", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "keras", "sentiment" ] | ||||
|                 "tags": ["keras", "sentiment"] | ||||
|             }, | ||||
|             "A decomposable attention model for Natural Language Inference": { | ||||
|                 "url": "https://github.com/explosion/spaCy/tree/master/examples/keras_parikh_entailment", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "keras", "similarity" ] | ||||
|                 "tags": ["keras", "similarity"] | ||||
|             }, | ||||
| 
 | ||||
|             "Using the German model": { | ||||
|                 "url": "https://explosion.ai/blog/german-model", | ||||
|                 "author": "Wolfgang Seeker", | ||||
|                 "tags": [ "multi-lingual" ] | ||||
|                 "tags": ["multi-lingual"] | ||||
|             }, | ||||
|             "Sense2vec with spaCy and Gensim": { | ||||
|                 "url": "https://explosion.ai/blog/sense2vec-with-spacy", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "big data", "gensim" ] | ||||
|                 "tags": ["big data", "gensim"] | ||||
|             }, | ||||
|             "Building your bot's brain with Node.js and spaCy": { | ||||
|                 "url": "https://explosion.ai/blog/chatbot-node-js-spacy", | ||||
|                 "author": "Wah Loon Keng", | ||||
|                 "tags": [ "bots", "node.js" ] | ||||
|                 "tags": ["bots", "node.js"] | ||||
|             }, | ||||
|             "An intent classifier with spaCy": { | ||||
|                 "url": "http://blog.themusio.com/2016/07/18/musios-intent-classifier-2/", | ||||
|                 "author": "Musio", | ||||
|                 "tags": [ "bots", "keras" ] | ||||
|                 "tags": ["bots", "keras"] | ||||
|             }, | ||||
|             "Visual Question Answering with spaCy": { | ||||
|                 "url": "http://iamaaditya.github.io/2016/04/visual_question_answering_demo_notebook", | ||||
|                 "author": "Aaditya Prakash", | ||||
|                 "tags": [ "vqa", "keras" ] | ||||
|                 "tags": ["vqa", "keras"] | ||||
|             }, | ||||
|             "Extracting time suggestions from emails with spaCy": { | ||||
|                 "url": "https://medium.com/redsift-outbox/what-time-cc9ce0c2aed2", | ||||
|                 "author": "Chris Savvopoulos", | ||||
|                 "tags": ["ner"] | ||||
|             } | ||||
|         }, | ||||
| 
 | ||||
|  | @ -315,22 +319,22 @@ | |||
|             "Information extraction": { | ||||
|                 "url": "https://github.com/explosion/spaCy/blob/master/examples/information_extraction.py", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "snippet" ] | ||||
|                 "tags": ["snippet"] | ||||
|             }, | ||||
|             "Neural bag of words": { | ||||
|                 "url": "https://github.com/explosion/spaCy/blob/master/examples/nn_text_class.py", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "sentiment" ] | ||||
|                 "tags": ["sentiment"] | ||||
|             }, | ||||
|             "Part-of-speech tagging": { | ||||
|                 "url": "https://github.com/explosion/spaCy/blob/master/examples/pos_tag.py", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "pos" ] | ||||
|                 "tags": ["pos"] | ||||
|             }, | ||||
|             "Parallel parse": { | ||||
|                 "url": "https://github.com/explosion/spaCy/blob/master/examples/parallel_parse.py", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "big data" ] | ||||
|                 "tags": ["big data"] | ||||
|             }, | ||||
|             "Inventory count": { | ||||
|                 "url": "https://github.com/explosion/spaCy/tree/master/examples/inventory_count", | ||||
|  | @ -339,7 +343,7 @@ | |||
|             "Multi-word matches": { | ||||
|                 "url": "https://github.com/explosion/spaCy/blob/master/examples/multi_word_matches.py", | ||||
|                 "author": "Matthew Honnibal", | ||||
|                 "tags": [ "matcher", "out of date" ] | ||||
|                 "tags": ["matcher", "out of date"] | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user