mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						4f42bcdd13
					
				| 
						 | 
					@ -1,6 +1,6 @@
 | 
				
			||||||
# fmt: off
 | 
					# fmt: off
 | 
				
			||||||
__title__ = "spacy"
 | 
					__title__ = "spacy"
 | 
				
			||||||
__version__ = "3.0.0.dev13"
 | 
					__version__ = "3.0.0.dev14"
 | 
				
			||||||
__release__ = True
 | 
					__release__ = True
 | 
				
			||||||
__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 | 
					__download_url__ = "https://github.com/explosion/spacy-models/releases/download"
 | 
				
			||||||
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 | 
					__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -332,13 +332,14 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        n_words = sum(len(ex.predicted) for ex in dev_examples)
 | 
					        n_words = sum(len(ex.predicted) for ex in dev_examples)
 | 
				
			||||||
 | 
					        batch_size = cfg.get("evaluation_batch_size", 128)
 | 
				
			||||||
        start_time = timer()
 | 
					        start_time = timer()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if optimizer.averages:
 | 
					        if optimizer.averages:
 | 
				
			||||||
            with nlp.use_params(optimizer.averages):
 | 
					            with nlp.use_params(optimizer.averages):
 | 
				
			||||||
                scorer = nlp.evaluate(dev_examples, batch_size=32)
 | 
					                scorer = nlp.evaluate(dev_examples, batch_size=batch_size)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            scorer = nlp.evaluate(dev_examples, batch_size=32)
 | 
					            scorer = nlp.evaluate(dev_examples, batch_size=batch_size)
 | 
				
			||||||
        end_time = timer()
 | 
					        end_time = timer()
 | 
				
			||||||
        wps = n_words / (end_time - start_time)
 | 
					        wps = n_words / (end_time - start_time)
 | 
				
			||||||
        scores = scorer.scores
 | 
					        scores = scorer.scores
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -45,18 +45,22 @@ class Corpus:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def make_examples(self, nlp, reference_docs, max_length=0):
 | 
					    def make_examples(self, nlp, reference_docs, max_length=0):
 | 
				
			||||||
        for reference in reference_docs:
 | 
					        for reference in reference_docs:
 | 
				
			||||||
            if len(reference) >= max_length >= 1:
 | 
					            if len(reference) == 0:
 | 
				
			||||||
                if reference.is_sentenced:
 | 
					                continue
 | 
				
			||||||
                    for ref_sent in reference.sents:
 | 
					            elif max_length == 0 or len(reference) < max_length:
 | 
				
			||||||
                        yield Example(
 | 
					 | 
				
			||||||
                            nlp.make_doc(ref_sent.text),
 | 
					 | 
				
			||||||
                            ref_sent.as_doc()
 | 
					 | 
				
			||||||
                        )
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                yield Example(
 | 
					                yield Example(
 | 
				
			||||||
                    nlp.make_doc(reference.text),
 | 
					                    nlp.make_doc(reference.text),
 | 
				
			||||||
                    reference
 | 
					                    reference
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					            elif reference.is_sentenced:
 | 
				
			||||||
 | 
					                for ref_sent in reference.sents:
 | 
				
			||||||
 | 
					                    if len(ref_sent) == 0:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					                    elif max_length == 0 or len(ref_sent) < max_length:
 | 
				
			||||||
 | 
					                        yield Example(
 | 
				
			||||||
 | 
					                            nlp.make_doc(ref_sent.text),
 | 
				
			||||||
 | 
					                            ref_sent.as_doc()
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    def make_examples_gold_preproc(self, nlp, reference_docs):
 | 
					    def make_examples_gold_preproc(self, nlp, reference_docs):
 | 
				
			||||||
        for reference in reference_docs:
 | 
					        for reference in reference_docs:
 | 
				
			||||||
| 
						 | 
					@ -65,7 +69,7 @@ class Corpus:
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                ref_sents = [reference]
 | 
					                ref_sents = [reference]
 | 
				
			||||||
            for ref_sent in ref_sents:
 | 
					            for ref_sent in ref_sents:
 | 
				
			||||||
                yield Example(
 | 
					                eg = Example(
 | 
				
			||||||
                    Doc(
 | 
					                    Doc(
 | 
				
			||||||
                        nlp.vocab, 
 | 
					                        nlp.vocab, 
 | 
				
			||||||
                        words=[w.text for w in ref_sent],
 | 
					                        words=[w.text for w in ref_sent],
 | 
				
			||||||
| 
						 | 
					@ -73,6 +77,8 @@ class Corpus:
 | 
				
			||||||
                    ),
 | 
					                    ),
 | 
				
			||||||
                    ref_sent
 | 
					                    ref_sent
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					                if len(eg.x):
 | 
				
			||||||
 | 
					                    yield eg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def read_docbin(self, vocab, locs):
 | 
					    def read_docbin(self, vocab, locs):
 | 
				
			||||||
        """ Yield training examples as example dicts """
 | 
					        """ Yield training examples as example dicts """
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -449,7 +449,7 @@ cdef class Parser:
 | 
				
			||||||
                if component is self:
 | 
					                if component is self:
 | 
				
			||||||
                    break
 | 
					                    break
 | 
				
			||||||
                if hasattr(component, "pipe"):
 | 
					                if hasattr(component, "pipe"):
 | 
				
			||||||
                    doc_sample = list(component.pipe(doc_sample))
 | 
					                    doc_sample = list(component.pipe(doc_sample, batch_size=8))
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    doc_sample = [component(doc) for doc in doc_sample]
 | 
					                    doc_sample = [component(doc) for doc in doc_sample]
 | 
				
			||||||
        if doc_sample:
 | 
					        if doc_sample:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user