Merge branch 'develop' into spacy.io-develop

This commit is contained in:
Ines Montani 2020-07-01 15:37:03 +02:00
commit 85e816738f
12 changed files with 69 additions and 41 deletions

View File

@ -1,6 +1,6 @@
# fmt: off # fmt: off
__title__ = "spacy" __title__ = "spacy_nightly"
__version__ = "3.0.0.dev13" __version__ = "3.0.0a0"
__release__ = True __release__ = True
__download_url__ = "https://github.com/explosion/spacy-models/releases/download" __download_url__ = "https://github.com/explosion/spacy-models/releases/download"
__compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json" __compatibility__ = "https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json"

View File

@ -332,13 +332,14 @@ def create_evaluation_callback(nlp, optimizer, corpus, cfg):
) )
n_words = sum(len(ex.predicted) for ex in dev_examples) n_words = sum(len(ex.predicted) for ex in dev_examples)
batch_size = cfg.get("evaluation_batch_size", 128)
start_time = timer() start_time = timer()
if optimizer.averages: if optimizer.averages:
with nlp.use_params(optimizer.averages): with nlp.use_params(optimizer.averages):
scorer = nlp.evaluate(dev_examples, batch_size=32) scorer = nlp.evaluate(dev_examples, batch_size=batch_size)
else: else:
scorer = nlp.evaluate(dev_examples, batch_size=32) scorer = nlp.evaluate(dev_examples, batch_size=batch_size)
end_time = timer() end_time = timer()
wps = n_words / (end_time - start_time) wps = n_words / (end_time - start_time)
scores = scorer.scores scores = scorer.scores

View File

@ -45,18 +45,22 @@ class Corpus:
def make_examples(self, nlp, reference_docs, max_length=0): def make_examples(self, nlp, reference_docs, max_length=0):
for reference in reference_docs: for reference in reference_docs:
if len(reference) >= max_length >= 1: if len(reference) == 0:
if reference.is_sentenced: continue
for ref_sent in reference.sents: elif max_length == 0 or len(reference) < max_length:
yield Example(
nlp.make_doc(ref_sent.text),
ref_sent.as_doc()
)
else:
yield Example( yield Example(
nlp.make_doc(reference.text), nlp.make_doc(reference.text),
reference reference
) )
elif reference.is_sentenced:
for ref_sent in reference.sents:
if len(ref_sent) == 0:
continue
elif max_length == 0 or len(ref_sent) < max_length:
yield Example(
nlp.make_doc(ref_sent.text),
ref_sent.as_doc()
)
def make_examples_gold_preproc(self, nlp, reference_docs): def make_examples_gold_preproc(self, nlp, reference_docs):
for reference in reference_docs: for reference in reference_docs:
@ -65,7 +69,7 @@ class Corpus:
else: else:
ref_sents = [reference] ref_sents = [reference]
for ref_sent in ref_sents: for ref_sent in ref_sents:
yield Example( eg = Example(
Doc( Doc(
nlp.vocab, nlp.vocab,
words=[w.text for w in ref_sent], words=[w.text for w in ref_sent],
@ -73,6 +77,8 @@ class Corpus:
), ),
ref_sent ref_sent
) )
if len(eg.x):
yield eg
def read_docbin(self, vocab, locs): def read_docbin(self, vocab, locs):
""" Yield training examples as example dicts """ """ Yield training examples as example dicts """

View File

@ -449,7 +449,7 @@ cdef class Parser:
if component is self: if component is self:
break break
if hasattr(component, "pipe"): if hasattr(component, "pipe"):
doc_sample = list(component.pipe(doc_sample)) doc_sample = list(component.pipe(doc_sample, batch_size=8))
else: else:
doc_sample = [component(doc) for doc in doc_sample] doc_sample = [component(doc) for doc in doc_sample]
if doc_sample: if doc_sample:

View File

@ -27,7 +27,7 @@ Button.defaultProps = {
} }
Button.propTypes = { Button.propTypes = {
to: PropTypes.string.isRequired, to: PropTypes.string,
variant: PropTypes.oneOf(['primary', 'secondary', 'tertiary']), variant: PropTypes.oneOf(['primary', 'secondary', 'tertiary']),
large: PropTypes.bool, large: PropTypes.bool,
icon: PropTypes.string, icon: PropTypes.string,

Binary file not shown.

Before

Width:  |  Height:  |  Size: 29 KiB

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 134 KiB

After

Width:  |  Height:  |  Size: 126 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 170 KiB

After

Width:  |  Height:  |  Size: 157 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 384 KiB

After

Width:  |  Height:  |  Size: 354 KiB

47
website/src/pages/404.js Normal file
View File

@ -0,0 +1,47 @@
import React from 'react'
import { window } from 'browser-monads'
import { graphql } from 'gatsby'
import Template from '../templates/index'
import { LandingHeader, LandingTitle } from '../components/landing'
import Button from '../components/button'
export default ({ data, location }) => {
const { nightly } = data.site.siteMetadata
const pageContext = { title: '404 Error', searchExclude: true, isIndex: false }
return (
<Template data={data} pageContext={pageContext} location={location}>
<LandingHeader style={{ minHeight: 400 }} nightly={nightly}>
<LandingTitle>
Ooops, this page
<br />
does not exist!
</LandingTitle>
<br />
<Button onClick={() => window.history.go(-1)} variant="tertiary">
Click here to go back
</Button>
</LandingHeader>
</Template>
)
}
export const pageQuery = graphql`
query {
site {
siteMetadata {
nightly
title
description
navigation {
text
url
}
docSearch {
apiKey
indexName
}
}
}
}
`

View File

@ -1,7 +0,0 @@
---
title: 404 Error
---
import Error from 'widgets/404.js'
<Error />

View File

@ -1,19 +0,0 @@
import React from 'react'
import { window } from 'browser-monads'
import { LandingHeader, LandingTitle } from '../components/landing'
import Button from '../components/button'
export default () => (
<LandingHeader style={{ minHeight: 400 }}>
<LandingTitle>
Ooops, this page
<br />
does not exist!
</LandingTitle>
<br />
<Button onClick={() => window.history.go(-1)} variant="tertiary">
Click here to go back
</Button>
</LandingHeader>
)