Minor refactoring together with a wider support for html entities

This commit is contained in:
Miroslav Stampar 2012-07-30 11:21:32 +02:00
parent 20a66567a3
commit a86f9798b2
3 changed files with 282 additions and 1 deletions

View File

@ -1,5 +1,16 @@
# Reference: http://code.activestate.com/recipes/325205-cache-decorator-in-python-24/ #!/usr/bin/env python
"""
Copyright (c) 2006-2012 sqlmap developers (http://sqlmap.org/)
See the file 'doc/COPYING' for copying permission
"""
def cachedmethod(f, cache={}): def cachedmethod(f, cache={}):
"""
Method with a cached content
Reference: http://code.activestate.com/recipes/325205-cache-decorator-in-python-24/
"""
def _(*args, **kwargs): def _(*args, **kwargs):
key = (f, tuple(args), frozenset(kwargs.items())) key = (f, tuple(args), frozenset(kwargs.items()))
if key not in cache: if key not in cache:

263
lib/core/htmlentities.py Normal file
View File

@ -0,0 +1,263 @@
#!/usr/bin/env python
"""
Copyright (c) 2006-2012 sqlmap developers (http://sqlmap.org/)
See the file 'doc/COPYING' for copying permission
"""
# Reference: http://www.w3.org/TR/1999/REC-html401-19991224/sgml/entities.html
htmlEntities = {
'quot': 34,
'amp': 38,
'lt': 60,
'gt': 62,
'nbsp': 160,
'iexcl': 161,
'cent': 162,
'pound': 163,
'curren': 164,
'yen': 165,
'brvbar': 166,
'sect': 167,
'uml': 168,
'copy': 169,
'ordf': 170,
'laquo': 171,
'not': 172,
'shy': 173,
'reg': 174,
'macr': 175,
'deg': 176,
'plusmn': 177,
'sup2': 178,
'sup3': 179,
'acute': 180,
'micro': 181,
'para': 182,
'middot': 183,
'cedil': 184,
'sup1': 185,
'ordm': 186,
'raquo': 187,
'frac14': 188,
'frac12': 189,
'frac34': 190,
'iquest': 191,
'Agrave': 192,
'Aacute': 193,
'Acirc': 194,
'Atilde': 195,
'Auml': 196,
'Aring': 197,
'AElig': 198,
'Ccedil': 199,
'Egrave': 200,
'Eacute': 201,
'Ecirc': 202,
'Euml': 203,
'Igrave': 204,
'Iacute': 205,
'Icirc': 206,
'Iuml': 207,
'ETH': 208,
'Ntilde': 209,
'Ograve': 210,
'Oacute': 211,
'Ocirc': 212,
'Otilde': 213,
'Ouml': 214,
'times': 215,
'Oslash': 216,
'Ugrave': 217,
'Uacute': 218,
'Ucirc': 219,
'Uuml': 220,
'Yacute': 221,
'THORN': 222,
'szlig': 223,
'agrave': 224,
'aacute': 225,
'acirc': 226,
'atilde': 227,
'auml': 228,
'aring': 229,
'aelig': 230,
'ccedil': 231,
'egrave': 232,
'eacute': 233,
'ecirc': 234,
'euml': 235,
'igrave': 236,
'iacute': 237,
'icirc': 238,
'iuml': 239,
'eth': 240,
'ntilde': 241,
'ograve': 242,
'oacute': 243,
'ocirc': 244,
'otilde': 245,
'ouml': 246,
'divide': 247,
'oslash': 248,
'ugrave': 249,
'uacute': 250,
'ucirc': 251,
'uuml': 252,
'yacute': 253,
'thorn': 254,
'yuml': 255,
'OElig': 338,
'oelig': 339,
'Scaron': 352,
'fnof': 402,
'scaron': 353,
'Yuml': 376,
'circ': 710,
'tilde': 732,
'Alpha': 913,
'Beta': 914,
'Gamma': 915,
'Delta': 916,
'Epsilon': 917,
'Zeta': 918,
'Eta': 919,
'Theta': 920,
'Iota': 921,
'Kappa': 922,
'Lambda': 923,
'Mu': 924,
'Nu': 925,
'Xi': 926,
'Omicron': 927,
'Pi': 928,
'Rho': 929,
'Sigma': 931,
'Tau': 932,
'Upsilon': 933,
'Phi': 934,
'Chi': 935,
'Psi': 936,
'Omega': 937,
'alpha': 945,
'beta': 946,
'gamma': 947,
'delta': 948,
'epsilon': 949,
'zeta': 950,
'eta': 951,
'theta': 952,
'iota': 953,
'kappa': 954,
'lambda': 955,
'mu': 956,
'nu': 957,
'xi': 958,
'omicron': 959,
'pi': 960,
'rho': 961,
'sigmaf': 962,
'sigma': 963,
'tau': 964,
'upsilon': 965,
'phi': 966,
'chi': 967,
'psi': 968,
'omega': 969,
'thetasym': 977,
'upsih': 978,
'piv': 982,
'bull': 8226,
'hellip': 8230,
'prime': 8242,
'Prime': 8243,
'oline': 8254,
'frasl': 8260,
'ensp': 8194,
'emsp': 8195,
'thinsp': 8201,
'zwnj': 8204,
'zwj': 8205,
'lrm': 8206,
'rlm': 8207,
'ndash': 8211,
'mdash': 8212,
'lsquo': 8216,
'rsquo': 8217,
'sbquo': 8218,
'ldquo': 8220,
'rdquo': 8221,
'bdquo': 8222,
'dagger': 8224,
'Dagger': 8225,
'permil': 8240,
'lsaquo': 8249,
'rsaquo': 8250,
'euro': 8364,
'weierp': 8472,
'image': 8465,
'real': 8476,
'trade': 8482,
'alefsym': 8501,
'larr': 8592,
'uarr': 8593,
'rarr': 8594,
'darr': 8595,
'harr': 8596,
'crarr': 8629,
'lArr': 8656,
'uArr': 8657,
'rArr': 8658,
'dArr': 8659,
'hArr': 8660,
'forall': 8704,
'part': 8706,
'exist': 8707,
'empty': 8709,
'nabla': 8711,
'isin': 8712,
'notin': 8713,
'ni': 8715,
'prod': 8719,
'sum': 8721,
'minus': 8722,
'lowast': 8727,
'radic': 8730,
'prop': 8733,
'infin': 8734,
'ang': 8736,
'and': 8743,
'or': 8744,
'cap': 8745,
'cup': 8746,
'int': 8747,
'there4': 8756,
'sim': 8764,
'cong': 8773,
'asymp': 8776,
'ne': 8800,
'equiv': 8801,
'le': 8804,
'ge': 8805,
'sub': 8834,
'sup': 8835,
'nsub': 8836,
'sube': 8838,
'supe': 8839,
'oplus': 8853,
'otimes': 8855,
'perp': 8869,
'sdot': 8901,
'lceil': 8968,
'rceil': 8969,
'lfloor': 8970,
'rfloor': 8971,
'lang': 9001,
'rang': 9002,
'loz': 9674,
'spades': 9824,
'clubs': 9827,
'hearts': 9829,
'diams': 9830,
}

View File

@ -23,6 +23,7 @@ from lib.core.data import kb
from lib.core.data import logger from lib.core.data import logger
from lib.core.enums import HTTPHEADER from lib.core.enums import HTTPHEADER
from lib.core.enums import PLACE from lib.core.enums import PLACE
from lib.core.htmlentities import htmlEntities
from lib.core.settings import DEFAULT_COOKIE_DELIMITER from lib.core.settings import DEFAULT_COOKIE_DELIMITER
from lib.core.settings import ML from lib.core.settings import ML
from lib.core.settings import META_CHARSET_REGEX from lib.core.settings import META_CHARSET_REGEX
@ -215,12 +216,18 @@ def decodePage(page, contentEncoding, contentType):
if "&#" in page: if "&#" in page:
page = re.sub('&#(\d{1,3});', lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page) page = re.sub('&#(\d{1,3});', lambda _: chr(int(_.group(1))) if int(_.group(1)) < 256 else _.group(0), page)
# e.g. &amp;
page = re.sub('&([^;]+);', lambda _: chr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 256) < 256 else _.group(0), page)
kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page)) kb.pageEncoding = kb.pageEncoding or checkCharEncoding(getHeuristicCharEncoding(page))
page = getUnicode(page, kb.pageEncoding) page = getUnicode(page, kb.pageEncoding)
# e.g. &#8217;&#8230;&#8482; # e.g. &#8217;&#8230;&#8482;
if "&#" in page: if "&#" in page:
page = re.sub('&#(\d+);', lambda _: unichr(int(_.group(1))), page) page = re.sub('&#(\d+);', lambda _: unichr(int(_.group(1))), page)
# e.g. &zeta;
page = re.sub('&([^;]+);', lambda _: unichr(htmlEntities[_.group(1)]) if htmlEntities.get(_.group(1), 0) > 255 else _.group(0), page)
return page return page