From ffedff9e6c544b4becb455e3b483bed0fea32ff6 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 3 Nov 2015 18:54:05 +1100 Subject: [PATCH 1/3] * Remove the archive after download, to save disk space --- spacy/en/download.py | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/en/download.py b/spacy/en/download.py index 748e0542d..1cc029c09 100644 --- a/spacy/en/download.py +++ b/spacy/en/download.py @@ -38,6 +38,7 @@ def install_data(url, extract_path, download_path): assert tmp == download_path t = tarfile.open(download_path) t.extractall(extract_path) + os.unlink(download_path) @plac.annotations( From dde9e1357cf9599a63d3dec806f96faddbae9df1 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 3 Nov 2015 18:54:35 +1100 Subject: [PATCH 2/3] * Add todo to morphology.lemmatize --- spacy/morphology.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index 442aebd68..dff9d39e2 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -88,6 +88,8 @@ cdef class Morphology: return orth cdef unicode py_string = self.strings[orth] if pos != NOUN and pos != VERB and pos != ADJ and pos != PUNCT: + # TODO: This should lower-case + # return self.strings[py_string.lower()] return orth cdef set lemma_strings cdef unicode lemma_string From 9e37437ba82995c91d0037ca01e70cd50a571d5f Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 3 Nov 2015 19:07:02 +1100 Subject: [PATCH 3/3] * Fix assign_tag in doc.merge --- spacy/tokens/doc.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 6b14d761c..927c01147 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -473,7 +473,7 @@ cdef class Doc: token.lex = lex token.spacy = self.data[end-1].spacy if tag in self.vocab.morphology.tag_map: - self.vocab.morphology.assign_tag(token, self.vocab.strings[tag]) + self.vocab.morphology.assign_tag(token, tag) else: token.tag = self.vocab.strings[tag] token.tag = self.vocab.strings[tag]