From 8c0b4b850ef472d27ed36b952ed33ea25de1e152 Mon Sep 17 00:00:00 2001
From: Gyorgy Orosz <oroszgy@gmail.com>
Date: Tue, 30 May 2017 21:34:46 +0200
Subject: [PATCH 1/2] Fixed emoji handling for Hungarian

---
 spacy/lang/hu/punctuation.py             | 13 +++++--------
 spacy/tests/tokenizer/test_exceptions.py |  4 +---
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/spacy/lang/hu/punctuation.py b/spacy/lang/hu/punctuation.py
index 27a2912e2..b758e0104 100644
--- a/spacy/lang/hu/punctuation.py
+++ b/spacy/lang/hu/punctuation.py
@@ -1,18 +1,17 @@
 # coding: utf8
 from __future__ import unicode_literals
 
-from ..punctuation import TOKENIZER_INFIXES
-from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CURRENCY
+from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES
 from ..char_classes import QUOTES, UNITS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
 
+LIST_ICONS = [r'[\p{So}--[°]]']
 
 _currency = r'\$|¢|£|€|¥|฿'
 _quotes = QUOTES.replace("'", '')
 
+_prefixes = ([r'\+'] + LIST_PUNCT + LIST_ELLIPSES + LIST_QUOTES + LIST_ICONS)
 
-_prefixes = ([r'\+'] + LIST_PUNCT + LIST_ELLIPSES + LIST_QUOTES)
-
-_suffixes = (LIST_PUNCT + LIST_ELLIPSES + LIST_QUOTES +
+_suffixes = (LIST_PUNCT + LIST_ELLIPSES + LIST_QUOTES + LIST_ICONS +
              [r'(?<=[0-9])\+',
               r'(?<=°[FfCcKk])\.',
               r'(?<=[0-9])(?:{})'.format(_currency),
@@ -20,8 +19,7 @@ _suffixes = (LIST_PUNCT + LIST_ELLIPSES + LIST_QUOTES +
               r'(?<=[{}{}{}(?:{})])\.'.format(ALPHA_LOWER, r'%²\-\)\]\+', QUOTES, _currency),
               r'(?<=[{})])-e'.format(ALPHA_LOWER)])
 
-
-_infixes = (LIST_ELLIPSES +
+_infixes = (LIST_ELLIPSES + LIST_ICONS +
             [r'(?<=[{}])\.(?=[{}])'.format(ALPHA_LOWER, ALPHA_UPPER),
              r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
              r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA),
@@ -29,7 +27,6 @@ _infixes = (LIST_ELLIPSES +
              r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
              r'(?<=[{a}])([{q}\)\]\(\[])(?=[\-{a}])'.format(a=ALPHA, q=_quotes)])
 
-
 TOKENIZER_PREFIXES = _prefixes
 TOKENIZER_SUFFIXES = _suffixes
 TOKENIZER_INFIXES = _infixes
diff --git a/spacy/tests/tokenizer/test_exceptions.py b/spacy/tests/tokenizer/test_exceptions.py
index 70fb103dc..57281b998 100644
--- a/spacy/tests/tokenizer/test_exceptions.py
+++ b/spacy/tests/tokenizer/test_exceptions.py
@@ -41,7 +41,5 @@ def test_tokenizer_excludes_false_pos_emoticons(tokenizer, text, length):
 @pytest.mark.parametrize('text,length', [('can you still dunk?🍕🍔😵LOL', 8),
                                          ('i💙you', 3), ('🤘🤘yay!', 4)])
 def test_tokenizer_handles_emoji(tokenizer, text, length):
-    exceptions = ["hu"]
     tokens = tokenizer(text)
-    if tokens[0].lang_ not in exceptions:
-        assert len(tokens) == length
+    assert len(tokens) == length

From 981196c181cb12ddde69ab6ef4878f14243c194f Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 31 May 2017 11:34:31 +0200
Subject: [PATCH 2/2] Fix typo

---
 website/docs/usage/rule-based-matching.jade | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/docs/usage/rule-based-matching.jade b/website/docs/usage/rule-based-matching.jade
index 8588729b6..71400ea55 100644
--- a/website/docs/usage/rule-based-matching.jade
+++ b/website/docs/usage/rule-based-matching.jade
@@ -408,7 +408,7 @@ p
     |  To label the hashtags, we first need to add a new custom flag.
     |  #[code IS_HASHTAG] will be the flag's ID, which you can use to assign it
     |  to the hashtag's span, and check its value via a token's
-    |  #[+api("token#check_flag") #[code code check_flag()]] method. On each
+    |  #[+api("token#check_flag") #[code check_flag()]] method. On each
     |  match, we merge the hashtag and assign the flag.
 
 +code.