# encoding: utf8 from __future__ import unicode_literals TOKENIZER_PREFIXES = r''' , " ( [ { * < > $ £ ¡ ¿ „ “ ' `` ` # ‘ .... ... … ‚ » § US$ C$ A$ a- '''.strip().split('\n') TOKENIZER_SUFFIXES = r''' , \" \) \] \} \* \! \? % \$ > : ; ' ” “ « _ '' 's 'S ’s ’S ’ ‘ ° € … \.\. \.\.\. \.\.\.\. (?<=[a-z0-9)\]”"'%\)])\. (?<=[a-zäöüßÖÄÜ)\]"'´«‘’%\)²“”])\. \-\- ´ (?<=[0-9])km² (?<=[0-9])m² (?<=[0-9])cm² (?<=[0-9])mm² (?<=[0-9])km³ (?<=[0-9])m³ (?<=[0-9])cm³ (?<=[0-9])mm³ (?<=[0-9])ha (?<=[0-9])km (?<=[0-9])m (?<=[0-9])cm (?<=[0-9])mm (?<=[0-9])µm (?<=[0-9])nm (?<=[0-9])yd (?<=[0-9])in (?<=[0-9])ft (?<=[0-9])kg (?<=[0-9])g (?<=[0-9])mg (?<=[0-9])µg (?<=[0-9])t (?<=[0-9])lb (?<=[0-9])oz (?<=[0-9])m/s (?<=[0-9])km/h (?<=[0-9])mph (?<=[0-9])°C (?<=[0-9])°K (?<=[0-9])°F (?<=[0-9])hPa (?<=[0-9])Pa (?<=[0-9])mbar (?<=[0-9])mb (?<=[0-9])T (?<=[0-9])G (?<=[0-9])M (?<=[0-9])K (?<=[0-9])kb '''.strip().split('\n') TOKENIZER_INFIXES = r''' … \.\.\.+ (?<=[a-z])\.(?=[A-Z]) (?<=[a-z])\.(?=[A-Z]) (?<=[a-zA-Z])-(?=[a-zA-z]) (?<=[a-zA-Z])--(?=[a-zA-z]) (?<=[0-9])-(?=[0-9]) (?<=[A-Za-z]),(?=[A-Za-z]) (?<=[a-zöäüßA-ZÖÄÜ"]):(?=[a-zöäüßA-ZÖÄÜ]) (?<=[a-zöäüßA-ZÖÄÜ"])>(?=[a-zöäüßA-ZÖÄÜ]) (?<=[a-zöäüßA-ZÖÄÜ"])<(?=[a-zöäüßA-ZÖÄÜ]) (?<=[a-zöäüßA-ZÖÄÜ"])=(?=[a-zöäüßA-ZÖÄÜ]) '''.strip().split('\n') __all__ = [ "TOKENIZER_PREFIXES", "TOKENIZER_SUFFIXES", "TOKENIZER_INFIXES" ]