From 135062d23c94e5b9a6f68e7bfb280b1af8b570e7 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Mon, 19 Oct 2015 15:47:04 +1100
Subject: [PATCH] * Fix error with merged text when merged region did not have
 trailing whitespace

---
 spacy/tokens/doc.pyx            |  3 ++-
 tests/tokens/test_tokens_api.py | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx
index 55a83913b..c0cc6803b 100644
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@@ -448,7 +448,8 @@ cdef class Doc:
         cdef Span span = self[start:end]
         # Get LexemeC for newly merged token
         new_orth = ''.join([t.text_with_ws for t in span])
-        new_orth = new_orth[:-len(span[-1].whitespace_)]
+        if span[-1].whitespace_:
+            new_orth = new_orth[:-len(span[-1].whitespace_)]
         cdef const LexemeC* lex = self.vocab.get(self.mem, new_orth)
         # House the new merged token where it starts
         cdef TokenC* token = &self.data[start]
diff --git a/tests/tokens/test_tokens_api.py b/tests/tokens/test_tokens_api.py
index b40513b02..794a29bcb 100644
--- a/tests/tokens/test_tokens_api.py
+++ b/tests/tokens/test_tokens_api.py
@@ -124,6 +124,18 @@ def test_merge(EN):
     assert doc[4].tag_ == 'NAMED'
 
 
+def test_merge_end_string(EN):
+    doc = EN('WKRO played songs by the beach boys all night')
+
+    assert len(doc) == 9
+    # merge 'The Beach Boys'
+    doc.merge(doc[7].idx, doc[8].idx + len(doc[8]), 'NAMED', 'LEMMA', 'TYPE')
+    assert len(doc) == 8
+
+    assert doc[7].text == 'all night'
+    assert doc[7].text_with_ws == 'all night'
+
+
 @pytest.mark.models
 def test_merge_children(EN):
     """Test that attachments work correctly after merging."""