mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Update base punctuation
This commit is contained in:
parent
9d6c8eaa49
commit
e85e1d571b
|
@ -33,7 +33,7 @@ _currency = r'\$ £ € ¥ ฿ US\$ C\$ A\$'
|
||||||
# These expressions contain various unicode variations, including characters
|
# These expressions contain various unicode variations, including characters
|
||||||
# used in Chinese (see #1333, #1340, #1351) – unless there are cross-language
|
# used in Chinese (see #1333, #1340, #1351) – unless there are cross-language
|
||||||
# conflicts, spaCy's base tokenizer should handle all of those by default
|
# conflicts, spaCy's base tokenizer should handle all of those by default
|
||||||
_punct = r'… …… , : ; \! \? ¿ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ ·'
|
_punct = r'… …… , : ; \! \? ¿ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · ।'
|
||||||
_quotes = r'\' \'\' " ” “ `` ` ‘ ´ ‘‘ ’’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉'
|
_quotes = r'\' \'\' " ” “ `` ` ‘ ´ ‘‘ ’’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉'
|
||||||
_hyphens = '- – — -- --- —— ~'
|
_hyphens = '- – — -- --- —— ~'
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user