mirror of
https://github.com/LonamiWebs/Telethon.git
synced 2025-01-24 00:04:14 +03:00
Remove now unused Emoji ranges generator
This commit is contained in:
parent
49eb281251
commit
e8248b4b8b
|
@ -1,101 +0,0 @@
|
|||
"""
|
||||
Simple module to allow fetching unicode.org emoji lists and printing a
|
||||
Python-like tuple out of them.
|
||||
|
||||
May not be accurate 100%, and is definitely not as efficient as it could be,
|
||||
but it should only be ran whenever the Unicode consortium decides to add
|
||||
new emojies to the list.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
print(*args, file=sys.stderr, **kwargs)
|
||||
|
||||
|
||||
def get(url, enc='utf-8'):
|
||||
try:
|
||||
with urllib.request.urlopen(url) as f:
|
||||
return f.read().decode(enc, errors='replace')
|
||||
except urllib.error.HTTPError as e:
|
||||
eprint('Caught', e, 'for', url, '; returning empty')
|
||||
return ''
|
||||
|
||||
|
||||
PREFIX_URL = 'http://unicode.org/Public/emoji/'
|
||||
SUFFIX_URL = '/emoji-data.txt', '/emoji-sequences.txt'
|
||||
VERSION_RE = re.compile(r'>(\d+.\d+)/<')
|
||||
OUTPUT_TXT = 'emojies.txt'
|
||||
CODEPOINT_RE = re.compile(r'([\da-fA-F]{3,}(?:[\s.]+[\da-fA-F]{3,}))')
|
||||
EMOJI_START = 0x20e3 # emoji data has many more ranges, falling outside this
|
||||
EMOJI_END = 200000 # from some tests those outside the range aren't emojies
|
||||
|
||||
|
||||
versions = VERSION_RE.findall(get(PREFIX_URL))
|
||||
lines = []
|
||||
if not os.path.isfile(OUTPUT_TXT):
|
||||
with open(OUTPUT_TXT, 'w') as f:
|
||||
for version in versions:
|
||||
for s in SUFFIX_URL:
|
||||
url = PREFIX_URL + version + s
|
||||
for line in get(url).split('\n'):
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
m = CODEPOINT_RE.search(line)
|
||||
if m and m.start() == 0:
|
||||
f.write(m.group(1) + '\n')
|
||||
|
||||
|
||||
points = set()
|
||||
with open(OUTPUT_TXT) as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if ' ' in line:
|
||||
for p in line.split():
|
||||
i = int(p, 16)
|
||||
if i > 255:
|
||||
points.add(i)
|
||||
elif '.' in line:
|
||||
s, e = line.split('..')
|
||||
for i in range(int(s, 16), int(e, 16) + 1):
|
||||
if i > 255:
|
||||
points.add(i)
|
||||
else:
|
||||
i = int(line, 16)
|
||||
if i > 255:
|
||||
points.add(int(line, 16))
|
||||
|
||||
|
||||
ranges = []
|
||||
points = tuple(sorted(points))
|
||||
start = points[0]
|
||||
last = start
|
||||
for point in points:
|
||||
if point - last > 1:
|
||||
if start == last or not (EMOJI_START < start < EMOJI_END):
|
||||
eprint(
|
||||
'Dropping', last - start + 1,
|
||||
'character(s) from', hex(start), ':', chr(start)
|
||||
)
|
||||
else:
|
||||
ranges.append((start, last))
|
||||
start = point
|
||||
|
||||
last = point
|
||||
|
||||
|
||||
if start == last or not (EMOJI_START < start < EMOJI_END):
|
||||
eprint(
|
||||
'Dropping', last - start + 1,
|
||||
'character(s) from', hex(start), ':', chr(start)
|
||||
)
|
||||
else:
|
||||
ranges.append((start, last))
|
||||
|
||||
|
||||
print('EMOJI_RANGES = ({})'.format(', '.join(repr(r) for r in ranges)))
|
Loading…
Reference in New Issue
Block a user