mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-04 20:30:24 +03:00
Update punctuation.py
Add mathematical left and right angle brackets as punctuation for ancient Greek for better tokenization.
This commit is contained in:
parent
8933b37021
commit
e8c23a04f3
|
@ -6,6 +6,7 @@ _prefixes = (
|
|||
[
|
||||
"†",
|
||||
"⸏",
|
||||
"〈",
|
||||
]
|
||||
+ LIST_PUNCT
|
||||
+ LIST_ELLIPSES
|
||||
|
@ -22,6 +23,7 @@ _suffixes = (
|
|||
+ [
|
||||
"†",
|
||||
"⸎",
|
||||
"〉",
|
||||
r"(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]",
|
||||
]
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user