mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Update base norm exceptions with more unicode characters
e.g. unicode variations of punctuation used in Chinese
This commit is contained in:
parent
3516aa0cea
commit
9d6c8eaa49
|
@ -31,11 +31,21 @@ BASE_NORMS = {
|
|||
"„": '"',
|
||||
"»": '"',
|
||||
"«": '"',
|
||||
"‘‘": '"',
|
||||
"’’": '"',
|
||||
"?": "?",
|
||||
"!": "!",
|
||||
",": ",",
|
||||
";": ";",
|
||||
":": ":",
|
||||
"。": ".",
|
||||
"।": ".",
|
||||
"…": "...",
|
||||
"—": "-",
|
||||
"–": "-",
|
||||
"--": "-",
|
||||
"---": "-",
|
||||
"——": "-",
|
||||
"€": "$",
|
||||
"£": "$",
|
||||
"¥": "$",
|
||||
|
|
Loading…
Reference in New Issue
Block a user