From e8248b4b8be0097f106eb51660e693b734741ba4 Mon Sep 17 00:00:00 2001 From: Lonami Exo Date: Mon, 6 Nov 2017 09:36:54 +0100 Subject: [PATCH] Remove now unused Emoji ranges generator --- telethon_generator/emoji_ranges.py | 101 ----------------------------- 1 file changed, 101 deletions(-) delete mode 100644 telethon_generator/emoji_ranges.py diff --git a/telethon_generator/emoji_ranges.py b/telethon_generator/emoji_ranges.py deleted file mode 100644 index 90597cf6..00000000 --- a/telethon_generator/emoji_ranges.py +++ /dev/null @@ -1,101 +0,0 @@ -""" -Simple module to allow fetching unicode.org emoji lists and printing a -Python-like tuple out of them. - -May not be accurate 100%, and is definitely not as efficient as it could be, -but it should only be ran whenever the Unicode consortium decides to add -new emojies to the list. -""" -import os -import sys -import re -import urllib.error -import urllib.request - - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) - - -def get(url, enc='utf-8'): - try: - with urllib.request.urlopen(url) as f: - return f.read().decode(enc, errors='replace') - except urllib.error.HTTPError as e: - eprint('Caught', e, 'for', url, '; returning empty') - return '' - - -PREFIX_URL = 'http://unicode.org/Public/emoji/' -SUFFIX_URL = '/emoji-data.txt', '/emoji-sequences.txt' -VERSION_RE = re.compile(r'>(\d+.\d+)/<') -OUTPUT_TXT = 'emojies.txt' -CODEPOINT_RE = re.compile(r'([\da-fA-F]{3,}(?:[\s.]+[\da-fA-F]{3,}))') -EMOJI_START = 0x20e3 # emoji data has many more ranges, falling outside this -EMOJI_END = 200000 # from some tests those outside the range aren't emojies - - -versions = VERSION_RE.findall(get(PREFIX_URL)) -lines = [] -if not os.path.isfile(OUTPUT_TXT): - with open(OUTPUT_TXT, 'w') as f: - for version in versions: - for s in SUFFIX_URL: - url = PREFIX_URL + version + s - for line in get(url).split('\n'): - line = line.strip() - if not line or line.startswith('#'): - continue - m = CODEPOINT_RE.search(line) - if m and m.start() == 0: - f.write(m.group(1) + '\n') - - -points = set() -with open(OUTPUT_TXT) as f: - for line in f: - line = line.strip() - if ' ' in line: - for p in line.split(): - i = int(p, 16) - if i > 255: - points.add(i) - elif '.' in line: - s, e = line.split('..') - for i in range(int(s, 16), int(e, 16) + 1): - if i > 255: - points.add(i) - else: - i = int(line, 16) - if i > 255: - points.add(int(line, 16)) - - -ranges = [] -points = tuple(sorted(points)) -start = points[0] -last = start -for point in points: - if point - last > 1: - if start == last or not (EMOJI_START < start < EMOJI_END): - eprint( - 'Dropping', last - start + 1, - 'character(s) from', hex(start), ':', chr(start) - ) - else: - ranges.append((start, last)) - start = point - - last = point - - -if start == last or not (EMOJI_START < start < EMOJI_END): - eprint( - 'Dropping', last - start + 1, - 'character(s) from', hex(start), ':', chr(start) - ) -else: - ranges.append((start, last)) - - -print('EMOJI_RANGES = ({})'.format(', '.join(repr(r) for r in ranges)))