From 07c09a0e1ba9d4c949c185176b6887b68a6a29e9 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 9 Sep 2015 14:29:22 +0200 Subject: [PATCH 1/4] * Fix attribute getters and setters in Lexeme --- spacy/lexeme.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx index 8ec238e32..e0fa854cb 100644 --- a/spacy/lexeme.pyx +++ b/spacy/lexeme.pyx @@ -60,19 +60,19 @@ cdef class Lexeme: def __set__(self, int x): self.c.suffix = x property cluster: - def __get__(self): return self.c.suffix - def __set__(self, int x): self.c.suffix = x + def __get__(self): return self.c.cluster + def __set__(self, int x): self.c.cluster = x property prob: - def __get__(self): return self.c.suffix - def __set__(self, int x): self.c.suffix = x + def __get__(self): return self.c.prob + def __set__(self, float x): self.c.prob = x property lower_: def __get__(self): return self.vocab.strings[self.c.lower] def __set__(self, unicode x): self.c.lower = self.vocab.strings[x] property norm_: - def __get__(self): return self.c.norm + def __get__(self): return self.vocab.strings[self.c.norm] def __set__(self, unicode x): self.c.norm = self.vocab.strings[x] property shape_: @@ -80,11 +80,11 @@ cdef class Lexeme: def __set__(self, unicode x): self.c.shape = self.vocab.strings[x] property prefix_: - def __get__(self): return self.c.prefix + def __get__(self): return self.vocab.strings[self.c.prefix] def __set__(self, unicode x): self.c.prefix = self.vocab.strings[x] property suffix_: - def __get__(self): return self.c.suffix + def __get__(self): return self.vocab.strings[self.c.suffix] def __set__(self, unicode x): self.c.suffix = self.vocab.strings[x] property flags: From 0b527fbdc8d2623c76ac0a747f2251458ad3f6e2 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 9 Sep 2015 14:30:24 +0200 Subject: [PATCH 2/4] * Set POS tag in morphology --- spacy/morphology.pyx | 1 + 1 file changed, 1 insertion(+) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index fc6a4936b..2ce484d7b 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -24,6 +24,7 @@ cdef class Morphology: self.rich_tags[i].id = i self.rich_tags[i].name = self.strings[tag_str] self.rich_tags[i].morph = 0 + self.rich_tags[i].pos = UNIV_POS_NAMES[props['pos'].upper()] self.reverse_index[self.rich_tags[i].name] = i self._cache = PreshMapArray(self.n_tags) From d9f1fc211202a0c4aff75db665e8a74060d31e81 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 9 Sep 2015 14:31:09 +0200 Subject: [PATCH 3/4] * Add deprecation warning for unused load_vectors argument. --- spacy/language.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spacy/language.py b/spacy/language.py index 881df7d1a..f32756a4d 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -1,4 +1,5 @@ from os import path +from warnings import warn try: import ujson as json @@ -184,7 +185,10 @@ class Language(object): return None def __init__(self, data_dir=None, vocab=None, tokenizer=None, tagger=None, - parser=None, entity=None, matcher=None, serializer=None): + parser=None, entity=None, matcher=None, serializer=None, + load_vectors=True): + if load_vectors is not True: + warn("load_vectors is deprecated", DeprecationWarning) if data_dir is None: data_dir = self.default_data_dir() if vocab is None: From 07686470a96bd37e7405f94dc1d8fb4766717383 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 9 Sep 2015 14:32:28 +0200 Subject: [PATCH 4/4] * Don't consider a coordinated NP a base chunk --- spacy/tokens/doc.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 41d24d8ac..1f687b046 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -186,7 +186,7 @@ cdef class Doc: def noun_chunks(self): """Yield spans for base noun phrases.""" cdef const TokenC* word - labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr'] + labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr', 'conj'] np_deps = [self.vocab.strings[label] for label in labels] np_label = self.vocab.strings['NP'] for i in range(self.length):