Add tokenizer exception for 'Ph.D.', to fix 592.

This commit is contained in:
Matthew Honnibal 2016-11-03 00:03:34 +01:00
parent 532318e80b
commit 41a90a7fbb

View File

@ -112,6 +112,10 @@ TOKENIZER_INFIXES = (r'''\.\.\.+ (?<=[a-z])\.(?=[A-Z]) (?<=[a-zA-Z])-(?=[a-zA-z]
TOKENIZER_EXCEPTIONS = {
"Ph.D.": [
{
"F": "Ph.D."
}],
"d.": [
{
"F": "d."