mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 04:40:20 +03:00
Update punctuation.py
Add mathematical left and right angle brackets as punctuation for ancient Greek for better tokenization.
This commit is contained in:
parent
8933b37021
commit
e8c23a04f3
|
@ -6,6 +6,7 @@ _prefixes = (
|
||||||
[
|
[
|
||||||
"†",
|
"†",
|
||||||
"⸏",
|
"⸏",
|
||||||
|
"〈",
|
||||||
]
|
]
|
||||||
+ LIST_PUNCT
|
+ LIST_PUNCT
|
||||||
+ LIST_ELLIPSES
|
+ LIST_ELLIPSES
|
||||||
|
@ -22,6 +23,7 @@ _suffixes = (
|
||||||
+ [
|
+ [
|
||||||
"†",
|
"†",
|
||||||
"⸎",
|
"⸎",
|
||||||
|
"〉",
|
||||||
r"(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]",
|
r"(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user