mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
* Remove em-dash from lemma rules. Handle instead in specials.
This commit is contained in:
commit
d341443282
|
@ -1,3 +1,4 @@
|
||||||
|
# -#- coding: utf-8 -*-
|
||||||
import json
|
import json
|
||||||
|
|
||||||
contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"}
|
contractions = {"n't", "'nt", "not", "'ve", "'d", "'ll", "'s", "'m", "'ma", "'re"}
|
||||||
|
@ -133,6 +134,8 @@ hardcoded_specials = {
|
||||||
|
|
||||||
"''": [{"F": "''"}],
|
"''": [{"F": "''"}],
|
||||||
|
|
||||||
|
"—": [{"F": "—", "L": "--", "P": ":"}],
|
||||||
|
|
||||||
"Corp.": [{"F": "Corp."}],
|
"Corp.": [{"F": "Corp."}],
|
||||||
"Inc.": [{"F": "Inc."}],
|
"Inc.": [{"F": "Inc."}],
|
||||||
"Co.": [{"F": "Co."}],
|
"Co.": [{"F": "Co."}],
|
||||||
|
@ -412,6 +415,6 @@ def generate_specials():
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
specials = generate_specials()
|
specials = generate_specials()
|
||||||
with open("specials.json", "w") as f:
|
with open("specials.json", "w") as file_:
|
||||||
json.dump(specials, f)
|
file_.write(json.dumps(specials, indent=2))
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,5 @@
|
||||||
"punct": [
|
"punct": [
|
||||||
["“", "\""],
|
["“", "\""],
|
||||||
["”", "\""],
|
["”", "\""],
|
||||||
["—", "--"]
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user