From e6dc0958c4e17c9d361aca35793214646e509336 Mon Sep 17 00:00:00 2001 From: "mr.Shu" Date: Sun, 25 Jan 2015 17:51:18 +0100 Subject: [PATCH 1/4] docs: Added missing cosine declaration * Added missing cosine declaration to code listing that already included imports for `dot` and `norm` function it uses. Signed-off-by: mr.Shu --- docs/source/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/index.rst b/docs/source/index.rst index c2c4e44d0..18ce24ee8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -207,6 +207,7 @@ problematic, given our starting assumptions: >>> from numpy.linalg import norm >>> import spacy.en >>> from spacy.postags import ADVERB, VERB + >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1), norm(v2)) >>> def is_bad_adverb(token, target_verb, tol): ... if token.pos != ADVERB ... return False From 97c45187050346bcddafd71637b7712158170b60 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Jan 2015 04:55:41 +1100 Subject: [PATCH 2/4] * Fix code in examples. --- docs/source/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 18ce24ee8..7d87eafc1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -184,7 +184,7 @@ adverbs to. Recall that our previous adverb highlighting function looked like this: >>> import spacy.en - >>> from spacy.postags import ADVERB + >>> from spacy.parts_of_speech import ADV >>> # Load the pipeline, and call it with some text. >>> nlp = spacy.en.English() >>> tokens = nlp("‘Give it back,’ he pleaded abjectly, ‘it’s mine.’", @@ -206,7 +206,7 @@ problematic, given our starting assumptions: >>> from numpy import dot >>> from numpy.linalg import norm >>> import spacy.en - >>> from spacy.postags import ADVERB, VERB + >>> from spacy.parts_of_speech import ADV, VERB >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1), norm(v2)) >>> def is_bad_adverb(token, target_verb, tol): ... if token.pos != ADVERB From 86d7e151e9afde75f49623b6000108f57cf4eb49 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 26 Jan 2015 05:10:04 +1100 Subject: [PATCH 3/4] * Explain acronym --- docs/source/index.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index 7d87eafc1..bc56581a4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -8,7 +8,8 @@ spaCy: Industrial-strength NLP ============================== `spaCy`_ is a new library for text processing in Python and Cython. -I wrote it because I think small companies are terrible at NLP. Or rather: +I wrote it because I think small companies are terrible at +natural language processing (NLP). Or rather: small companies are using terrible NLP technology. .. _spaCy: https://github.com/honnibal/spaCy/ @@ -311,6 +312,7 @@ on the standard evaluation from the Wall Street Journal, given gold-standard sentence boundaries and tokenization. I'm in the process of completing a more realistic evaluation on web text. + spaCy's parser offers a better speed/accuracy trade-off than any published system: its accuracy is within 1% of the current state-of-the-art, and it's seven times faster than the 2014 CoreNLP neural network parser, which is the From e237b69247d06dfdba5b9cc73b60f91b88d7e42e Mon Sep 17 00:00:00 2001 From: MatthieuBizien Date: Sun, 25 Jan 2015 19:30:50 +0100 Subject: [PATCH 4/4] Bug fix on the documentation --- docs/source/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/index.rst b/docs/source/index.rst index bc56581a4..9d26ac0ab 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -78,7 +78,7 @@ particularly egregious: >>> nlp = spacy.en.English() >>> tokens = nlp("‘Give it back,’ he pleaded abjectly, ‘it’s mine.’", tag=True, parse=False) - >>> print(''.join(tok.string.upper() if tok.pos == ADV else tok.string) for t in tokens) + >>> print(''.join(tok.string.upper() if tok.pos == ADV else tok.string for tok in tokens)) ‘Give it BACK,’ he pleaded ABJECTLY, ‘it’s mine.’ @@ -144,7 +144,7 @@ cosine metric: >>> from numpy import dot >>> from numpy.linalg import norm >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1), norm(v2)) - >>> words = [w for w in nlp.vocab if w.is_lower] + >>> words = [w for w in nlp.vocab if w.lower] >>> words.sort(key=lambda w: cosine(w, pleaded)) >>> words.reverse() >>> print('1-20', ', '.join(w.orth_ for w in words[0:20]))