# encoding: utf8 from __future__ import unicode_literals TOKENIZER_PREFIXES = r''' + '''.strip().split('\n') TOKENIZER_SUFFIXES = r''' , \" \) \] \} \* \! \? \$ > : ; ' ” “ « _ '' ’ ‘ € \.\. \.\.\. \.\.\.\. (?<=[a-züóőúéáűí)\]"'´«‘’%\)²“”+-])\. (?<=[a-züóőúéáűí)])-e \-\- ´ (?<=[0-9])\+ (?<=[a-z0-9üóőúéáűí][\)\]”"'%\)§/])\. (?<=[0-9])km² (?<=[0-9])m² (?<=[0-9])cm² (?<=[0-9])mm² (?<=[0-9])km³ (?<=[0-9])m³ (?<=[0-9])cm³ (?<=[0-9])mm³ (?<=[0-9])ha (?<=[0-9])km (?<=[0-9])m (?<=[0-9])cm (?<=[0-9])mm (?<=[0-9])µm (?<=[0-9])nm (?<=[0-9])yd (?<=[0-9])in (?<=[0-9])ft (?<=[0-9])kg (?<=[0-9])g (?<=[0-9])mg (?<=[0-9])µg (?<=[0-9])t (?<=[0-9])lb (?<=[0-9])oz (?<=[0-9])m/s (?<=[0-9])km/h (?<=[0-9])mph (?<=°[FCK])\. (?<=[0-9])hPa (?<=[0-9])Pa (?<=[0-9])mbar (?<=[0-9])mb (?<=[0-9])T (?<=[0-9])G (?<=[0-9])M (?<=[0-9])K (?<=[0-9])kb '''.strip().split('\n') TOKENIZER_INFIXES = r''' … \.\.+ (?<=[a-züóőúéáűí])\.(?=[A-ZÜÓŐÚÉÁŰÍ]) (?<=[a-zA-ZüóőúéáűíÜÓŐÚÉÁŰÍ0-9])"(?=[\-a-zA-ZüóőúéáűíÜÓŐÚÉÁŰÍ]) (?<=[a-zA-ZüóőúéáűíÜÓŐÚÉÁŰÍ])--(?=[a-zA-ZüóőúéáűíÜÓŐÚÉÁŰÍ]) (?<=[0-9])[+\-\*/^](?=[0-9]) (?<=[a-zA-ZüóőúéáűíÜÓŐÚÉÁŰÍ]),(?=[a-zA-ZüóőúéáűíÜÓŐÚÉÁŰÍ]) '''.strip().split('\n') __all__ = ["TOKENIZER_PREFIXES", "TOKENIZER_SUFFIXES", "TOKENIZER_INFIXES"]