From 1786331cd82674a5d4ec14cce74d135278dae84d Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Tue, 3 May 2016 12:51:47 +0200
Subject: [PATCH 1/7] add model sanity test

---
 spacy/tests/conftest.py                      | 14 ++---
 spacy/tests/integration/__init__.py          |  0
 spacy/tests/integration/test_model_sanity.py | 62 ++++++++++++++++++++
 3 files changed, 68 insertions(+), 8 deletions(-)
 create mode 100644 spacy/tests/integration/__init__.py
 create mode 100644 spacy/tests/integration/test_model_sanity.py

diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index 83a39a03a..cf7fd223a 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -1,17 +1,15 @@
-from spacy.en import English
-
 import pytest
 import os
 
+import spacy
 
 @pytest.fixture(scope="session")
 def EN():
-    if os.environ.get('SPACY_DATA'):
-        data_dir = os.environ.get('SPACY_DATA')
-    else:
-        data_dir = None
-    print("Load EN from %s" % data_dir)
-    return English(data_dir=data_dir)
+    return spacy.load("en")
+
+@pytest.fixture(score="session")
+def DE():
+    return spacy.load("de")
 
 
 def pytest_addoption(parser):
diff --git a/spacy/tests/integration/__init__.py b/spacy/tests/integration/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spacy/tests/integration/test_model_sanity.py b/spacy/tests/integration/test_model_sanity.py
new file mode 100644
index 000000000..0cddb85dd
--- /dev/null
+++ b/spacy/tests/integration/test_model_sanity.py
@@ -0,0 +1,62 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+import numpy
+
+@pytest.mark.models
+class TestModelSanity:
+	"""
+	This is to make sure the model works as expected. The tests make sure that values are properly set.
+	Tests are not meant to evaluate the content of the output, only make sure the output is formally okay.
+	"""
+
+	@pytest.fixture(scope='class', params=['en','de'])
+	def example(self, request, EN, DE):
+		if request.param == 'en':
+			return EN(u'There was a stranger standing at the big street talking to herself.')
+		elif request.param == 'de':
+			return DE(u'An der großen Straße stand eine merkwürdige Gestalt und führte Selbstgespräche.')
+
+	def test_tokenization(self, example):
+		# tokenization should split the document into tokens
+		assert len(example) > 1
+
+	def test_tagging(self, example):
+		# if tagging was done properly, pos tags shouldn't be empty
+		assert example.is_tagged
+		assert all( t.pos != 0 for t in example )
+		assert all( t.tag != 0 for t in example )
+
+	def test_parsing(self, example):
+		# if parsing was done properly
+		# - dependency labels shouldn't be empty
+		# - the head of some tokens should not be root
+		assert example.is_parsed
+		assert all( t.dep != 0 for t in example )
+		assert any( t.dep != i for i,t in enumerate(example) )
+
+	def test_ner(self, example):
+		# if ner was done properly, ent_iob shouldn't be empty
+		assert all( t.ent_iob != 0 for t in example )
+
+	def test_vectors(self, example):
+		# if vectors are available, they should differ on different words
+		# this isn't a perfect test since this could in principle fail in a sane model as well,
+		# but that's very unlikely and a good indicator if something is wrong
+		vector0 = example[0].vector
+		vector1 = example[1].vector
+		vector2 = example[2].vector
+		assert not numpy.array_equal(vector0,vector1)
+		assert not numpy.array_equal(vector0,vector2)
+		assert not numpy.array_equal(vector1,vector2)
+
+	def test_probs(self, example):
+		# if frequencies/probabilities are okay, they should differ for different words
+		# this isn't a perfect test since this could in principle fail in a sane model as well,
+		# but that's very unlikely and a good indicator if something is wrong
+		prob0 = example[0].prob
+		prob1 = example[1].prob
+		prob2 = example[2].prob
+		assert not prob0 == prob1
+		assert not prob0 == prob2
+		assert not prob1 == prob2

From 7b246c13cbe58946cf75b8d860db80bf22963a93 Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Tue, 3 May 2016 14:24:35 +0200
Subject: [PATCH 2/7] reformulate noun chunk tests for English

---
 spacy/symbols.pxd                   |  1 +
 spacy/symbols.pyx                   |  1 +
 spacy/syntax/parser.pyx             |  5 ++
 spacy/tests/conftest.py             |  2 +-
 spacy/tests/parser/test_base_nps.py | 40 +++++++-------
 spacy/tests/unit/__init__.py        |  0
 spacy/tests/unit/test_parser.py     | 83 +++++++++++++++++++++++++++++
 7 files changed, 111 insertions(+), 21 deletions(-)
 create mode 100644 spacy/tests/unit/__init__.py
 create mode 100644 spacy/tests/unit/test_parser.py

diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd
index 942d8aa9c..d577eaf6d 100644
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@@ -382,6 +382,7 @@ cpdef enum symbol_t:
     cc
     ccomp
     complm
+    compound
     conj
     csubj
     csubjpass
diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx
index 712bef9a3..0e8dcda13 100644
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@@ -381,6 +381,7 @@ IDS = {
     "cc": cc,
     "ccomp": ccomp,
     "complm": complm,
+    "compound": compound,
     "conj": conj,
     "csubj": csubj,
     "csubjpass": csubjpass,
diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx
index 04f9d5f22..22f37127a 100644
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@@ -225,6 +225,11 @@ cdef class Parser:
     def step_through(self, Doc doc):
         return StepwiseState(self, doc)
 
+    def from_transition_sequence(self, Doc doc, sequence):
+        with self.step_through(doc) as stepwise:
+            for transition in sequence:
+                stepwise.transition(transition)
+
     def add_label(self, label):
         for action in self.moves.action_types:
             self.moves.add_action(action, label)
diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py
index cf7fd223a..cc64ee46f 100644
--- a/spacy/tests/conftest.py
+++ b/spacy/tests/conftest.py
@@ -7,7 +7,7 @@ import spacy
 def EN():
     return spacy.load("en")
 
-@pytest.fixture(score="session")
+@pytest.fixture(scope="session")
 def DE():
     return spacy.load("de")
 
diff --git a/spacy/tests/parser/test_base_nps.py b/spacy/tests/parser/test_base_nps.py
index 8d308bc8d..b598158d0 100644
--- a/spacy/tests/parser/test_base_nps.py
+++ b/spacy/tests/parser/test_base_nps.py
@@ -2,30 +2,30 @@ from __future__ import unicode_literals
 import pytest
 
 
-@pytest.mark.models
-def test_nsubj(EN):
-    sent = EN(u'A base phrase should be recognized.')
-    base_nps = list(sent.noun_chunks)
-    assert len(base_nps) == 1
-    assert base_nps[0].string == 'A base phrase '
+# @pytest.mark.models
+# def test_nsubj(EN):
+#     sent = EN(u'A base phrase should be recognized.')
+#     base_nps = list(sent.noun_chunks)
+#     assert len(base_nps) == 1
+#     assert base_nps[0].string == 'A base phrase '
 
 
-@pytest.mark.models
-def test_coord(EN):
-    sent = EN(u'A base phrase and a good phrase are often the same.')
-    base_nps = list(sent.noun_chunks)
-    assert len(base_nps) == 2
-    assert base_nps[0].string == 'A base phrase '
-    assert base_nps[1].string == 'a good phrase '
+# @pytest.mark.models
+# def test_coord(EN):
+#     sent = EN(u'A base phrase and a good phrase are often the same.')
+#     base_nps = list(sent.noun_chunks)
+#     assert len(base_nps) == 2
+#     assert base_nps[0].string == 'A base phrase '
+#     assert base_nps[1].string == 'a good phrase '
 
 
-@pytest.mark.models
-def test_pp(EN):
-    sent = EN(u'A phrase with another phrase occurs')
-    base_nps = list(sent.noun_chunks)
-    assert len(base_nps) == 2
-    assert base_nps[0].string == 'A phrase '
-    assert base_nps[1].string == 'another phrase ' 
+# @pytest.mark.models
+# def test_pp(EN):
+#     sent = EN(u'A phrase with another phrase occurs')
+#     base_nps = list(sent.noun_chunks)
+#     assert len(base_nps) == 2
+#     assert base_nps[0].string == 'A phrase '
+#     assert base_nps[1].string == 'another phrase ' 
 
 
 @pytest.mark.models
diff --git a/spacy/tests/unit/__init__.py b/spacy/tests/unit/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/spacy/tests/unit/test_parser.py b/spacy/tests/unit/test_parser.py
new file mode 100644
index 000000000..7fdb663ed
--- /dev/null
+++ b/spacy/tests/unit/test_parser.py
@@ -0,0 +1,83 @@
+from __future__ import unicode_literals
+
+import pytest
+import numpy
+
+from spacy.attrs import HEAD, DEP
+from spacy.symbols import root, det, compound, nsubjpass, aux, auxpass, punct, nsubj, cc, amod, conj, advmod, attr, prep, pobj
+
+
+@pytest.mark.models
+class TestNounChunks:
+    @pytest.fixture(scope="class")
+    def ex1_en(self, EN):
+        example = EN.tokenizer.tokens_from_list('A base phrase should be recognized .'.split(' '))
+        EN.tagger.tag_from_strings(example, 'DT NN NN MD VB VBN .'.split(' '))
+        example.from_array([HEAD, DEP],
+        numpy.asarray(
+            [
+                [2, det],
+                [1, compound],
+                [3, nsubjpass],
+                [2, aux],
+                [1, auxpass],
+                [0, root],
+                [-1, punct]
+            ], dtype='int32'))
+        return example
+
+    @pytest.fixture(scope="class")
+    def ex2_en(self, EN):
+        example = EN.tokenizer.tokens_from_list('A base phrase and a good phrase are often the same .'.split(' '))
+        EN.tagger.tag_from_strings(example, 'DT NN NN CC DT JJ NN VBP RB DT JJ .'.split(' '))
+        example.from_array([HEAD, DEP],
+        numpy.asarray(
+            [
+                [2, det],
+                [1, compound],
+                [5, nsubj],
+                [-1, cc],
+                [1, det],
+                [1, amod],
+                [-4, conj],
+                [0, root],
+                [-1, advmod],
+                [1, det],
+                [-3, attr],
+                [-4, punct]
+            ], dtype='int32'))
+        return example
+
+    @pytest.fixture(scope="class")
+    def ex3_en(self, EN):
+        example = EN.tokenizer.tokens_from_list('A phrase with another phrase occurs .'.split(' '))
+        EN.tagger.tag_from_strings(example, 'DT NN IN DT NN VBZ .'.split(' '))
+        example.from_array([HEAD, DEP],
+        numpy.asarray(
+            [
+                [1, det],
+                [4, nsubj],
+                [-1, prep],
+                [1, det],
+                [-2, pobj],
+                [0, root],
+                [-1, punct]
+            ], dtype='int32'))
+        return example
+
+    def test_standard_chunk(self, ex1_en):
+        chunks = list(ex1_en.noun_chunks)
+        assert len(chunks) == 1
+        assert chunks[0].string == 'A base phrase '
+
+    def test_coordinated_chunks(self, ex2_en):
+        chunks = list(ex2_en.noun_chunks)
+        assert len(chunks) == 2
+        assert chunks[0].string == 'A base phrase '
+        assert chunks[1].string == 'a good phrase '
+
+    def test_pp_chunks(self, ex3_en):
+        chunks = list(ex3_en.noun_chunks)
+        assert len(chunks) == 2
+        assert chunks[0].string == 'A phrase '
+        assert chunks[1].string == 'another phrase '

From 7825b7554813c83e0423cec48ba293959d006840 Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Tue, 3 May 2016 15:01:28 +0200
Subject: [PATCH 3/7] add tests for German noun chunker

---
 spacy/tests/unit/test_parser.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/spacy/tests/unit/test_parser.py b/spacy/tests/unit/test_parser.py
index 7fdb663ed..ba224b9ec 100644
--- a/spacy/tests/unit/test_parser.py
+++ b/spacy/tests/unit/test_parser.py
@@ -65,18 +65,35 @@ class TestNounChunks:
             ], dtype='int32'))
         return example
 
-    def test_standard_chunk(self, ex1_en):
+    # @pytest.fixture(score="class")
+    # def ex1_de(self, DE):
+    #     example = EN.tokenizer.tokens_from_list('Eine Tasse steht auf dem Tisch .'.split(' '))
+    #     EN.tagger.tag_from_strings(example, 'ART NN VVFIN APPR ART NN $.'.split(' '))
+    #     example.from_array([HEAD, DEP],
+    #     numpy.asarray(
+    #         [
+    #             [1, det],
+    #             [4, nsubj],
+    #             [-1, prep],
+    #             [1, det],
+    #             [-2, pobj],
+    #             [0, root],
+    #             [-1, punct]
+    #         ], dtype='int32'))
+    #     return example
+
+    def test_en_standard_chunk(self, ex1_en):
         chunks = list(ex1_en.noun_chunks)
         assert len(chunks) == 1
         assert chunks[0].string == 'A base phrase '
 
-    def test_coordinated_chunks(self, ex2_en):
+    def test_en_coordinated_chunks(self, ex2_en):
         chunks = list(ex2_en.noun_chunks)
         assert len(chunks) == 2
         assert chunks[0].string == 'A base phrase '
         assert chunks[1].string == 'a good phrase '
 
-    def test_pp_chunks(self, ex3_en):
+    def test_en_pp_chunks(self, ex3_en):
         chunks = list(ex3_en.noun_chunks)
         assert len(chunks) == 2
         assert chunks[0].string == 'A phrase '

From fd8019ec92952f38aa65dcdf07dd5003eb0bebef Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Tue, 3 May 2016 15:53:30 +0200
Subject: [PATCH 4/7] fix typo in german_noun_chunks

---
 spacy/syntax/iterators.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx
index b8b810d36..653c89d8f 100644
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@@ -40,7 +40,7 @@ def german_noun_chunks(doc):
             for rdep in doc[word.i].rights:
                 if rdep.pos == NOUN and rdep.dep == close_app:
                     rbracket = rdep.i+1
-            yield word.l_edge, rbracket, np_label
+            yield word.left_edge.i, rbracket, np_label
 
 
 CHUNKERS = {'en': english_noun_chunks, 'de': german_noun_chunks}

From a06fca9fdf48353aa671a84c090f4d21d53b6ec6 Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Tue, 3 May 2016 16:58:59 +0200
Subject: [PATCH 5/7] German noun chunk iterator now doesn't return tokens more
 than once

---
 spacy/symbols.pxd               |  1 -
 spacy/symbols.pyx               |  1 -
 spacy/syntax/iterators.pyx      | 10 +++--
 spacy/tests/unit/test_parser.py | 72 +++++++++++++++++++++++++--------
 4 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/spacy/symbols.pxd b/spacy/symbols.pxd
index d577eaf6d..942d8aa9c 100644
--- a/spacy/symbols.pxd
+++ b/spacy/symbols.pxd
@@ -382,7 +382,6 @@ cpdef enum symbol_t:
     cc
     ccomp
     complm
-    compound
     conj
     csubj
     csubjpass
diff --git a/spacy/symbols.pyx b/spacy/symbols.pyx
index 0e8dcda13..712bef9a3 100644
--- a/spacy/symbols.pyx
+++ b/spacy/symbols.pyx
@@ -381,7 +381,6 @@ IDS = {
     "cc": cc,
     "ccomp": ccomp,
     "complm": complm,
-    "compound": compound,
     "conj": conj,
     "csubj": csubj,
     "csubjpass": csubjpass,
diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx
index b8b810d36..395f772ce 100644
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@@ -32,7 +32,9 @@ def german_noun_chunks(doc):
     np_deps = set(doc.vocab.strings[label] for label in labels)
     close_app = doc.vocab.strings['nk']
 
-    for word in doc:
+    i = 0
+    while i  < len(doc):
+        word = doc[i]
         if word.pos == NOUN and word.dep in np_deps:
             rbracket = word.i+1
             # try to extend the span to the right
@@ -40,7 +42,9 @@ def german_noun_chunks(doc):
             for rdep in doc[word.i].rights:
                 if rdep.pos == NOUN and rdep.dep == close_app:
                     rbracket = rdep.i+1
-            yield word.l_edge, rbracket, np_label
-
+            yield word.left_edge.i, rbracket, np_label
+            i = rbracket
+            continue
+        i += 1
 
 CHUNKERS = {'en': english_noun_chunks, 'de': german_noun_chunks}
diff --git a/spacy/tests/unit/test_parser.py b/spacy/tests/unit/test_parser.py
index ba224b9ec..78bfad293 100644
--- a/spacy/tests/unit/test_parser.py
+++ b/spacy/tests/unit/test_parser.py
@@ -1,10 +1,11 @@
+# -*- coding: utf-8 -*-
+
 from __future__ import unicode_literals
 
 import pytest
 import numpy
 
 from spacy.attrs import HEAD, DEP
-from spacy.symbols import root, det, compound, nsubjpass, aux, auxpass, punct, nsubj, cc, amod, conj, advmod, attr, prep, pobj
 
 
 @pytest.mark.models
@@ -13,6 +14,7 @@ class TestNounChunks:
     def ex1_en(self, EN):
         example = EN.tokenizer.tokens_from_list('A base phrase should be recognized .'.split(' '))
         EN.tagger.tag_from_strings(example, 'DT NN NN MD VB VBN .'.split(' '))
+        det,compound,nsubjpass,aux,auxpass,root,punct = tuple( EN.vocab.strings[l] for l in ['det','compound','nsubjpass','aux','auxpass','root','punct'] )
         example.from_array([HEAD, DEP],
         numpy.asarray(
             [
@@ -30,6 +32,7 @@ class TestNounChunks:
     def ex2_en(self, EN):
         example = EN.tokenizer.tokens_from_list('A base phrase and a good phrase are often the same .'.split(' '))
         EN.tagger.tag_from_strings(example, 'DT NN NN CC DT JJ NN VBP RB DT JJ .'.split(' '))
+        det,compound,nsubj,cc,amod,conj,root,advmod,attr,punct = tuple( EN.vocab.strings[l] for l in ['det','compound','nsubj','cc','amod','conj','root','advmod','attr','punct'] )
         example.from_array([HEAD, DEP],
         numpy.asarray(
             [
@@ -52,6 +55,7 @@ class TestNounChunks:
     def ex3_en(self, EN):
         example = EN.tokenizer.tokens_from_list('A phrase with another phrase occurs .'.split(' '))
         EN.tagger.tag_from_strings(example, 'DT NN IN DT NN VBZ .'.split(' '))
+        det,nsubj,prep,pobj,root,punct = tuple( EN.vocab.strings[l] for l in ['det','nsubj','prep','pobj','root','punct'] )
         example.from_array([HEAD, DEP],
         numpy.asarray(
             [
@@ -65,22 +69,43 @@ class TestNounChunks:
             ], dtype='int32'))
         return example
 
-    # @pytest.fixture(score="class")
-    # def ex1_de(self, DE):
-    #     example = EN.tokenizer.tokens_from_list('Eine Tasse steht auf dem Tisch .'.split(' '))
-    #     EN.tagger.tag_from_strings(example, 'ART NN VVFIN APPR ART NN $.'.split(' '))
-    #     example.from_array([HEAD, DEP],
-    #     numpy.asarray(
-    #         [
-    #             [1, det],
-    #             [4, nsubj],
-    #             [-1, prep],
-    #             [1, det],
-    #             [-2, pobj],
-    #             [0, root],
-    #             [-1, punct]
-    #         ], dtype='int32'))
-    #     return example
+    @pytest.fixture(scope="class")
+    def ex1_de(self, DE):
+        example = DE.tokenizer.tokens_from_list('Eine Tasse steht auf dem Tisch .'.split(' '))
+        DE.tagger.tag_from_strings(example, 'ART NN VVFIN APPR ART NN $.'.split(' '))
+        nk,sb,root,mo,punct = tuple( DE.vocab.strings[l] for l in ['nk','sb','root','mo','punct'])
+        example.from_array([HEAD, DEP],
+        numpy.asarray(
+            [
+                [1, nk],
+                [1, sb],
+                [0, root],
+                [-1, mo],
+                [1, nk],
+                [-2, nk],
+                [-3, punct]
+            ], dtype='int32'))
+        return example
+
+    @pytest.fixture(scope="class")
+    def ex2_de(self, DE):
+        example = DE.tokenizer.tokens_from_list('Die Sängerin singt mit einer Tasse Kaffee Arien .'.split(' '))
+        DE.tagger.tag_from_strings(example, 'ART NN VVFIN APPR ART NN NN NN $.'.split(' '))
+        nk,sb,root,mo,punct,oa = tuple( DE.vocab.strings[l] for l in ['nk','sb','root','mo','punct','oa'])
+        example.from_array([HEAD, DEP],
+        numpy.asarray(
+            [
+                [1, nk],
+                [1, sb],
+                [0, root],
+                [-1, mo],
+                [1, nk],
+                [-2, nk],
+                [-1, nk],
+                [-5, oa],
+                [-6, punct]
+            ], dtype='int32'))
+        return example
 
     def test_en_standard_chunk(self, ex1_en):
         chunks = list(ex1_en.noun_chunks)
@@ -98,3 +123,16 @@ class TestNounChunks:
         assert len(chunks) == 2
         assert chunks[0].string == 'A phrase '
         assert chunks[1].string == 'another phrase '
+
+    def test_de_standard_chunk(self, ex1_de):
+        chunks = list(ex1_de.noun_chunks)
+        assert len(chunks) == 2
+        assert chunks[0].string == 'Eine Tasse '
+        assert chunks[1].string == 'dem Tisch '
+
+    def test_de_extended_chunk(self, ex2_de):
+        chunks = list(ex2_de.noun_chunks)
+        assert len(chunks) == 3
+        assert chunks[0].string == 'Die Sängerin '
+        assert chunks[1].string == 'einer Tasse Kaffee '
+        assert chunks[2].string == 'Arien '

From 5bf2fd1f788ccb6e9691b6d137edfd65120ee4ef Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Tue, 3 May 2016 17:19:05 +0200
Subject: [PATCH 6/7] make the code less cryptic

---
 spacy/syntax/iterators.pyx | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx
index 395f772ce..a02dce0b7 100644
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@@ -32,9 +32,10 @@ def german_noun_chunks(doc):
     np_deps = set(doc.vocab.strings[label] for label in labels)
     close_app = doc.vocab.strings['nk']
 
-    i = 0
-    while i  < len(doc):
-        word = doc[i]
+    rbracket = 0
+    for i, word in enumerate(doc):
+        if i < rbracket:
+            continue
         if word.pos == NOUN and word.dep in np_deps:
             rbracket = word.i+1
             # try to extend the span to the right
@@ -42,9 +43,6 @@ def german_noun_chunks(doc):
             for rdep in doc[word.i].rights:
                 if rdep.pos == NOUN and rdep.dep == close_app:
                     rbracket = rdep.i+1
-            yield word.left_edge.i, rbracket, np_label
-            i = rbracket
-            continue
-        i += 1
+            yield word.left_edge.i, rbracket, np_label            
 
 CHUNKERS = {'en': english_noun_chunks, 'de': german_noun_chunks}

From e4ea2bea01b991ddde345b2b6026cd063765c1a7 Mon Sep 17 00:00:00 2001
From: Wolfgang Seeker <seeker@spacy.io>
Date: Wed, 4 May 2016 07:40:38 +0200
Subject: [PATCH 7/7] fix whitespace

---
 spacy/syntax/iterators.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/spacy/syntax/iterators.pyx b/spacy/syntax/iterators.pyx
index a02dce0b7..516c2e41b 100644
--- a/spacy/syntax/iterators.pyx
+++ b/spacy/syntax/iterators.pyx
@@ -43,6 +43,7 @@ def german_noun_chunks(doc):
             for rdep in doc[word.i].rights:
                 if rdep.pos == NOUN and rdep.dep == close_app:
                     rbracket = rdep.i+1
-            yield word.left_edge.i, rbracket, np_label            
+            yield word.left_edge.i, rbracket, np_label
+
 
 CHUNKERS = {'en': english_noun_chunks, 'de': german_noun_chunks}