From 6fcc3df9895ee1d4a712327d2204a8eae294c598 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 6 Aug 2015 17:11:00 +0200 Subject: [PATCH] * Expand gazetteer with some of the errors from the reddit parse --- lang_data/en/gazetteer.json | 108 +++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/lang_data/en/gazetteer.json b/lang_data/en/gazetteer.json index 44cb0e780..2726d75af 100644 --- a/lang_data/en/gazetteer.json +++ b/lang_data/en/gazetteer.json @@ -70,5 +70,111 @@ [{"lower": "html5"}] ] ] - + "Python": [ + "PRODUCT", + {}, + [ + [{"orth": "Python"}] + ] + ], + "Ruby": [ + "PRODUCT", + {}, + [ + [{"orth": "Ruby"}] + ] + ], + "Digg": [ + "PRODUCT", + {}, + [ + [{"lower": "digg"}] + ] + ], + "Fox": [ + "ORG", + {}, + [ + [{"orth": "Fox"}] + ] + ], + "Google": [ + "ORG", + {}, + [ + [{"lower": "google"}] + ] + ], + "Mac": [ + "PRODUCT", + {}, + [ + [{"lower": "mac"}] + ] + ], + "Wikipedia": [ + "PRODUCT", + {}, + [ + [{"lower": "wikipedia"}] + ] + ], + "Windows": [ + "PRODUCT", + {}, + [ + [{"orth": "Windows"}] + ] + ], + "Dell": [ + "ORG", + {}, + [ + [{"lower": "dell"}] + ] + ], + "Facebook": [ + "ORG", + {}, + [ + [{"lower": "facebook"}] + ] + ], + "Blizzard": [ + "ORG", + {}, + [ + [{"orth": "Facebook"}] + ] + ], + "Ubuntu": [ + "ORG", + {}, + [ + [{"orth": "Ubuntu"}] + ] + ], + "Youtube": [ + "PRODUCT", + {}, + [ + [{"lower": "youtube"}] + ] + ], + "false_positives": [ + null, + {}, + [{"orth": "Shit"}], + [{"orth": "Weed"}], + [{"orth": "Cool"}], + [{"orth": "Btw"}], + [{"orth": "Bah"}], + [{"orth": "Bullshit"}], + [{"orth": "Lol"}], + [{"orth": "Yo"}, {"orth": "dawg"}], + [{"orth": "Yay"}], + [{"orth": "Ahh"}], + [{"orth": "Yea"}], + [{"orth": "Bah"}] + ] }