Merge branch 'develop' of https://github.com/honnibal/spaCy into develop

2025-06-29 17:33:10 +03:00 · 2015-09-09 14:33:38 +02:00 · 2015-09-09 14:33:38 +02:00 · 31ccf494e6
commit 31ccf494e6
parent a7f4b26c8c 07686470a9
4 changed files with 14 additions and 9 deletions
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1,4 +1,5 @@
 from os import path
 from warnings import warn
 try:
    import ujson as json
@ -184,7 +185,10 @@ class Language(object):
            return None
    def __init__(self, data_dir=None, vocab=None, tokenizer=None, tagger=None,
-                 parser=None, entity=None, matcher=None, serializer=None):
+                 parser=None, entity=None, matcher=None, serializer=None,
                 load_vectors=True):
        if load_vectors is not True:
            warn("load_vectors is deprecated", DeprecationWarning)
        if data_dir is None:
            data_dir = self.default_data_dir()
        if vocab is None:
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -60,19 +60,19 @@ cdef class Lexeme:
        def __set__(self, int x): self.c.suffix = x
    property cluster:
-        def __get__(self): return self.c.suffix
+        def __get__(self): return self.c.cluster
-        def __set__(self, int x): self.c.suffix = x
+        def __set__(self, int x): self.c.cluster = x
    property prob:
-        def __get__(self): return self.c.suffix
+        def __get__(self): return self.c.prob
-        def __set__(self, int x): self.c.suffix = x
+        def __set__(self, float x): self.c.prob = x
    property lower_:
        def __get__(self): return self.vocab.strings[self.c.lower]
        def __set__(self, unicode x): self.c.lower = self.vocab.strings[x]
    property norm_:
-        def __get__(self): return self.c.norm
+        def __get__(self): return self.vocab.strings[self.c.norm]
        def __set__(self, unicode x): self.c.norm = self.vocab.strings[x]
    property shape_:
@ -80,11 +80,11 @@ cdef class Lexeme:
        def __set__(self, unicode x): self.c.shape = self.vocab.strings[x]
    property prefix_:
-        def __get__(self): return self.c.prefix
+        def __get__(self): return self.vocab.strings[self.c.prefix]
        def __set__(self, unicode x): self.c.prefix = self.vocab.strings[x]
    property suffix_:
-        def __get__(self): return self.c.suffix
+        def __get__(self): return self.vocab.strings[self.c.suffix]
        def __set__(self, unicode x): self.c.suffix = self.vocab.strings[x]
    property flags:
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@ -24,6 +24,7 @@ cdef class Morphology:
            self.rich_tags[i].id = i
            self.rich_tags[i].name = self.strings[tag_str]
            self.rich_tags[i].morph = 0
            self.rich_tags[i].pos = UNIV_POS_NAMES[props['pos'].upper()]
            self.reverse_index[self.rich_tags[i].name] = i
        self._cache = PreshMapArray(self.n_tags)
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -188,7 +188,7 @@ cdef class Doc:
    def noun_chunks(self):
        """Yield spans for base noun phrases."""
        cdef const TokenC* word
-        labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr']
+        labels = ['nsubj', 'dobj', 'nsubjpass', 'pcomp', 'pobj', 'attr', 'conj']
        np_deps = [self.vocab.strings[label] for label in labels]
        np_label = self.vocab.strings['NP']
        for i in range(self.length):