mirror of
				https://github.com/sqlmapproject/sqlmap.git
				synced 2025-10-26 13:41:10 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			3417 lines
		
	
	
		
			124 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			3417 lines
		
	
	
		
			124 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """HTML form handling for web clients.
 | |
| 
 | |
| ClientForm is a Python module for handling HTML forms on the client
 | |
| side, useful for parsing HTML forms, filling them in and returning the
 | |
| completed forms to the server.  It has developed from a port of Gisle
 | |
| Aas' Perl module HTML::Form, from the libwww-perl library, but the
 | |
| interface is not the same.
 | |
| 
 | |
| The most useful docstring is the one for HTMLForm.
 | |
| 
 | |
| RFC 1866: HTML 2.0
 | |
| RFC 1867: Form-based File Upload in HTML
 | |
| RFC 2388: Returning Values from Forms: multipart/form-data
 | |
| HTML 3.2 Specification, W3C Recommendation 14 January 1997 (for ISINDEX)
 | |
| HTML 4.01 Specification, W3C Recommendation 24 December 1999
 | |
| 
 | |
| 
 | |
| Copyright 2002-2007 John J. Lee <jjl@pobox.com>
 | |
| Copyright 2005 Gary Poster
 | |
| Copyright 2005 Zope Corporation
 | |
| Copyright 1998-2000 Gisle Aas.
 | |
| 
 | |
| This code is free software; you can redistribute it and/or modify it
 | |
| under the terms of the BSD or ZPL 2.1 licenses (see the file
 | |
| COPYING.txt included with the distribution).
 | |
| 
 | |
| """
 | |
| 
 | |
| # XXX
 | |
| # Remove parser testing hack
 | |
| # safeUrl()-ize action
 | |
| # Switch to unicode throughout (would be 0.3.x)
 | |
| #  See Wichert Akkerman's 2004-01-22 message to c.l.py.
 | |
| # Add charset parameter to Content-type headers?  How to find value??
 | |
| # Add some more functional tests
 | |
| #  Especially single and multiple file upload on the internet.
 | |
| #  Does file upload work when name is missing?  Sourceforge tracker form
 | |
| #   doesn't like it.  Check standards, and test with Apache.  Test
 | |
| #   binary upload with Apache.
 | |
| # mailto submission & enctype text/plain
 | |
| # I'm not going to fix this unless somebody tells me what real servers
 | |
| #  that want this encoding actually expect: If enctype is
 | |
| #  application/x-www-form-urlencoded and there's a FILE control present.
 | |
| #  Strictly, it should be 'name=data' (see HTML 4.01 spec., section
 | |
| #  17.13.2), but I send "name=" ATM.  What about multiple file upload??
 | |
| 
 | |
| # Would be nice, but I'm not going to do it myself:
 | |
| # -------------------------------------------------
 | |
| # Maybe a 0.4.x?
 | |
| #   Replace by_label etc. with moniker / selector concept. Allows, eg.,
 | |
| #    a choice between selection by value / id / label / element
 | |
| #    contents.  Or choice between matching labels exactly or by
 | |
| #    substring.  Etc.
 | |
| #   Remove deprecated methods.
 | |
| #   ...what else?
 | |
| # Work on DOMForm.
 | |
| # XForms?  Don't know if there's a need here.
 | |
| 
 | |
| __all__ = ['AmbiguityError', 'CheckboxControl', 'Control',
 | |
|            'ControlNotFoundError', 'FileControl', 'FormParser', 'HTMLForm',
 | |
|            'HiddenControl', 'IgnoreControl', 'ImageControl', 'IsindexControl',
 | |
|            'Item', 'ItemCountError', 'ItemNotFoundError', 'Label',
 | |
|            'ListControl', 'LocateError', 'Missing', 'ParseError', 'ParseFile',
 | |
|            'ParseFileEx', 'ParseResponse', 'ParseResponseEx','PasswordControl',
 | |
|            'RadioControl', 'ScalarControl', 'SelectControl',
 | |
|            'SubmitButtonControl', 'SubmitControl', 'TextControl',
 | |
|            'TextareaControl', 'XHTMLCompatibleFormParser']
 | |
| 
 | |
| try:
 | |
|     import logging
 | |
|     import inspect
 | |
| except ImportError:
 | |
|     def debug(msg, *args, **kwds):
 | |
|         pass
 | |
| else:
 | |
|     _logger = logging.getLogger("ClientForm")
 | |
|     OPTIMIZATION_HACK = True
 | |
| 
 | |
|     def debug(msg, *args, **kwds):
 | |
|         if OPTIMIZATION_HACK:
 | |
|             return
 | |
| 
 | |
|         caller_name = inspect.stack()[1][3]
 | |
|         extended_msg = '%%s %s' % msg
 | |
|         extended_args = (caller_name,)+args
 | |
|         debug = _logger.debug(extended_msg, *extended_args, **kwds)
 | |
| 
 | |
|     def _show_debug_messages():
 | |
|         global OPTIMIZATION_HACK
 | |
|         OPTIMIZATION_HACK = False
 | |
|         _logger.setLevel(logging.DEBUG)
 | |
|         handler = logging.StreamHandler(sys.stdout)
 | |
|         handler.setLevel(logging.DEBUG)
 | |
|         _logger.addHandler(handler)
 | |
| 
 | |
| try:
 | |
|     from thirdparty import six
 | |
|     from thirdparty.six import unichr as _unichr
 | |
|     from thirdparty.six.moves import cStringIO as _cStringIO
 | |
|     from thirdparty.six.moves import html_entities as _html_entities
 | |
|     from thirdparty.six.moves import urllib as _urllib
 | |
| except ImportError:
 | |
|     import six
 | |
|     from six import unichr as _unichr
 | |
|     from six.moves import cStringIO as _cStringIO
 | |
|     from six.moves import html_entities as _html_entities
 | |
|     from six.moves import urllib as _urllib
 | |
| 
 | |
| try:
 | |
|     import sgmllib
 | |
| except ImportError:
 | |
|     from lib.utils import sgmllib
 | |
| 
 | |
| import sys, re, random
 | |
| 
 | |
| if sys.version_info >= (3, 0):
 | |
|     xrange = range
 | |
| 
 | |
| # monkeypatch to fix http://www.python.org/sf/803422 :-(
 | |
| sgmllib.charref = re.compile("&#(x?[0-9a-fA-F]+)[^0-9a-fA-F]")
 | |
| 
 | |
| # HTMLParser.HTMLParser is recent, so live without it if it's not available
 | |
| # (also, sgmllib.SGMLParser is much more tolerant of bad HTML)
 | |
| try:
 | |
|     import HTMLParser
 | |
| except ImportError:
 | |
|     HAVE_MODULE_HTMLPARSER = False
 | |
| else:
 | |
|     HAVE_MODULE_HTMLPARSER = True
 | |
| 
 | |
| try:
 | |
|     import warnings
 | |
| except ImportError:
 | |
|     def deprecation(message, stack_offset=0):
 | |
|         pass
 | |
| else:
 | |
|     def deprecation(message, stack_offset=0):
 | |
|         warnings.warn(message, DeprecationWarning, stacklevel=3+stack_offset)
 | |
| 
 | |
| VERSION = "0.2.10"
 | |
| 
 | |
| CHUNK = 1024  # size of chunks fed to parser, in bytes
 | |
| 
 | |
| DEFAULT_ENCODING = "latin-1"
 | |
| 
 | |
| class Missing: pass
 | |
| 
 | |
| _compress_re = re.compile(r"\s+")
 | |
| def compress_text(text): return _compress_re.sub(" ", text.strip())
 | |
| 
 | |
| def normalize_line_endings(text):
 | |
|     return re.sub(r"(?:(?<!\r)\n)|(?:\r(?!\n))", "\r\n", text)
 | |
| 
 | |
| def _quote_plus(value):
 | |
|     if not isinstance(value, six.string_types):
 | |
|         value = six.text_type(value)
 | |
| 
 | |
|     if isinstance(value, six.text_type):
 | |
|         value = value.encode("utf8")
 | |
| 
 | |
|     return _urllib.parse.quote_plus(value)
 | |
| 
 | |
| # This version of urlencode is from my Python 1.5.2 back-port of the
 | |
| # Python 2.1 CVS maintenance branch of urllib.  It will accept a sequence
 | |
| # of pairs instead of a mapping -- the 2.0 version only accepts a mapping.
 | |
| def urlencode(query,doseq=False,):
 | |
|     """Encode a sequence of two-element tuples or dictionary into a URL query \
 | |
| string.
 | |
| 
 | |
|     If any values in the query arg are sequences and doseq is true, each
 | |
|     sequence element is converted to a separate parameter.
 | |
| 
 | |
|     If the query arg is a sequence of two-element tuples, the order of the
 | |
|     parameters in the output will match the order of parameters in the
 | |
|     input.
 | |
|     """
 | |
| 
 | |
|     if hasattr(query,"items"):
 | |
|         # mapping objects
 | |
|         query = query.items()
 | |
|     else:
 | |
|         # it's a bother at times that strings and string-like objects are
 | |
|         # sequences...
 | |
|         try:
 | |
|             # non-sequence items should not work with len()
 | |
|             x = len(query)
 | |
|             # non-empty strings will fail this
 | |
|             if len(query) and type(query[0]) != tuple:
 | |
|                 raise TypeError()
 | |
|             # zero-length sequences of all types will get here and succeed,
 | |
|             # but that's a minor nit - since the original implementation
 | |
|             # allowed empty dicts that type of behavior probably should be
 | |
|             # preserved for consistency
 | |
|         except TypeError:
 | |
|             ty,va,tb = sys.exc_info()
 | |
|             raise TypeError("not a valid non-string sequence or mapping "
 | |
|                             "object", tb)
 | |
| 
 | |
|     l = []
 | |
|     if not doseq:
 | |
|         # preserve old behavior
 | |
|         for k, v in query:
 | |
|             k = _quote_plus(k)
 | |
|             v = _quote_plus(v)
 | |
|             l.append(k + '=' + v)
 | |
|     else:
 | |
|         for k, v in query:
 | |
|             k = _quote_plus(k)
 | |
|             if isinstance(v, six.string_types):
 | |
|                 v = _quote_plus(v)
 | |
|                 l.append(k + '=' + v)
 | |
|             else:
 | |
|                 try:
 | |
|                     # is this a sufficient test for sequence-ness?
 | |
|                     x = len(v)
 | |
|                 except TypeError:
 | |
|                     # not a sequence
 | |
|                     v = _quote_plus(v)
 | |
|                     l.append(k + '=' + v)
 | |
|                 else:
 | |
|                     # loop over the sequence
 | |
|                     for elt in v:
 | |
|                         l.append(k + '=' + _quote_plus(elt))
 | |
|     return '&'.join(l)
 | |
| 
 | |
| def unescape(data, entities, encoding=DEFAULT_ENCODING):
 | |
|     if data is None or "&" not in data:
 | |
|         return data
 | |
| 
 | |
|     if isinstance(data, six.string_types):
 | |
|         encoding = None
 | |
| 
 | |
|     def replace_entities(match, entities=entities, encoding=encoding):
 | |
|         ent = match.group()
 | |
|         if ent[1] == "#":
 | |
|             return unescape_charref(ent[2:-1], encoding)
 | |
| 
 | |
|         repl = entities.get(ent)
 | |
|         if repl is not None:
 | |
|             if hasattr(repl, "decode") and encoding is not None:
 | |
|                 try:
 | |
|                     repl = repl.decode(encoding)
 | |
|                 except UnicodeError:
 | |
|                     repl = ent
 | |
|         else:
 | |
|             repl = ent
 | |
| 
 | |
|         return repl
 | |
| 
 | |
|     return re.sub(r"&#?[A-Za-z0-9]+?;", replace_entities, data)
 | |
| 
 | |
| def unescape_charref(data, encoding):
 | |
|     name, base = data, 10
 | |
|     if name.startswith("x"):
 | |
|         name, base= name[1:], 16
 | |
|     elif not name.isdigit():
 | |
|         base = 16
 | |
| 
 | |
|     try:
 | |
|         return _unichr(int(name, base))
 | |
|     except:
 | |
|         return data
 | |
| 
 | |
| def get_entitydefs():
 | |
|     from codecs import latin_1_decode
 | |
|     entitydefs = {}
 | |
|     try:
 | |
|         _html_entities.name2codepoint
 | |
|     except AttributeError:
 | |
|         entitydefs = {}
 | |
|         for name, char in _html_entities.entitydefs.items():
 | |
|             uc = latin_1_decode(char)[0]
 | |
|             if uc.startswith("&#") and uc.endswith(";"):
 | |
|                 uc = unescape_charref(uc[2:-1], None)
 | |
|             entitydefs["&%s;" % name] = uc
 | |
|     else:
 | |
|         for name, codepoint in _html_entities.name2codepoint.items():
 | |
|             entitydefs["&%s;" % name] = _unichr(codepoint)
 | |
|     return entitydefs
 | |
| 
 | |
| def issequence(x):
 | |
|     try:
 | |
|         x[0]
 | |
|     except (TypeError, KeyError):
 | |
|         return False
 | |
|     except IndexError:
 | |
|         pass
 | |
|     return True
 | |
| 
 | |
| def isstringlike(x):
 | |
|     try: x+""
 | |
|     except: return False
 | |
|     else: return True
 | |
| 
 | |
| 
 | |
| def choose_boundary():
 | |
|     """Return a string usable as a multipart boundary."""
 | |
|     # follow IE and firefox
 | |
|     nonce = "".join([str(random.randint(0, sys.maxsize-1)) for i in (0,1,2)])
 | |
|     return "-"*27 + nonce
 | |
| 
 | |
| # This cut-n-pasted MimeWriter from standard library is here so can add
 | |
| # to HTTP headers rather than message body when appropriate.  It also uses
 | |
| # \r\n in place of \n.  This is a bit nasty.
 | |
| class MimeWriter:
 | |
| 
 | |
|     """Generic MIME writer.
 | |
| 
 | |
|     Methods:
 | |
| 
 | |
|     __init__()
 | |
|     addheader()
 | |
|     flushheaders()
 | |
|     startbody()
 | |
|     startmultipartbody()
 | |
|     nextpart()
 | |
|     lastpart()
 | |
| 
 | |
|     A MIME writer is much more primitive than a MIME parser.  It
 | |
|     doesn't seek around on the output file, and it doesn't use large
 | |
|     amounts of buffer space, so you have to write the parts in the
 | |
|     order they should occur on the output file.  It does buffer the
 | |
|     headers you add, allowing you to rearrange their order.
 | |
| 
 | |
|     General usage is:
 | |
| 
 | |
|     f = <open the output file>
 | |
|     w = MimeWriter(f)
 | |
|     ...call w.addheader(key, value) 0 or more times...
 | |
| 
 | |
|     followed by either:
 | |
| 
 | |
|     f = w.startbody(content_type)
 | |
|     ...call f.write(data) for body data...
 | |
| 
 | |
|     or:
 | |
| 
 | |
|     w.startmultipartbody(subtype)
 | |
|     for each part:
 | |
|         subwriter = w.nextpart()
 | |
|         ...use the subwriter's methods to create the subpart...
 | |
|     w.lastpart()
 | |
| 
 | |
|     The subwriter is another MimeWriter instance, and should be
 | |
|     treated in the same way as the toplevel MimeWriter.  This way,
 | |
|     writing recursive body parts is easy.
 | |
| 
 | |
|     Warning: don't forget to call lastpart()!
 | |
| 
 | |
|     XXX There should be more state so calls made in the wrong order
 | |
|     are detected.
 | |
| 
 | |
|     Some special cases:
 | |
| 
 | |
|     - startbody() just returns the file passed to the constructor;
 | |
|       but don't use this knowledge, as it may be changed.
 | |
| 
 | |
|     - startmultipartbody() actually returns a file as well;
 | |
|       this can be used to write the initial 'if you can read this your
 | |
|       mailer is not MIME-aware' message.
 | |
| 
 | |
|     - If you call flushheaders(), the headers accumulated so far are
 | |
|       written out (and forgotten); this is useful if you don't need a
 | |
|       body part at all, e.g. for a subpart of type message/rfc822
 | |
|       that's (mis)used to store some header-like information.
 | |
| 
 | |
|     - Passing a keyword argument 'prefix=<flag>' to addheader(),
 | |
|       start*body() affects where the header is inserted; 0 means
 | |
|       append at the end, 1 means insert at the start; default is
 | |
|       append for addheader(), but insert for start*body(), which use
 | |
|       it to determine where the Content-type header goes.
 | |
| 
 | |
|     """
 | |
| 
 | |
|     def __init__(self, fp, http_hdrs=None):
 | |
|         self._http_hdrs = http_hdrs
 | |
|         self._fp = fp
 | |
|         self._headers = []
 | |
|         self._boundary = []
 | |
|         self._first_part = True
 | |
| 
 | |
|     def addheader(self, key, value, prefix=0,
 | |
|                   add_to_http_hdrs=0):
 | |
|         """
 | |
|         prefix is ignored if add_to_http_hdrs is true.
 | |
|         """
 | |
|         lines = value.split("\r\n")
 | |
|         while lines and not lines[-1]: del lines[-1]
 | |
|         while lines and not lines[0]: del lines[0]
 | |
|         if add_to_http_hdrs:
 | |
|             value = "".join(lines)
 | |
|             # 2.2 urllib2 doesn't normalize header case
 | |
|             self._http_hdrs.append((key.capitalize(), value))
 | |
|         else:
 | |
|             for i in xrange(1, len(lines)):
 | |
|                 lines[i] = "    " + lines[i].strip()
 | |
|             value = "\r\n".join(lines) + "\r\n"
 | |
|             line = key.title() + ": " + value
 | |
|             if prefix:
 | |
|                 self._headers.insert(0, line)
 | |
|             else:
 | |
|                 self._headers.append(line)
 | |
| 
 | |
|     def flushheaders(self):
 | |
|         self._fp.writelines(self._headers)
 | |
|         self._headers = []
 | |
| 
 | |
|     def startbody(self, ctype=None, plist=[], prefix=1,
 | |
|                   add_to_http_hdrs=0, content_type=1):
 | |
|         """
 | |
|         prefix is ignored if add_to_http_hdrs is true.
 | |
|         """
 | |
|         if content_type and ctype:
 | |
|             for name, value in plist:
 | |
|                 ctype = ctype + ';\r\n %s=%s' % (name, value)
 | |
|             self.addheader("Content-Type", ctype, prefix=prefix,
 | |
|                            add_to_http_hdrs=add_to_http_hdrs)
 | |
|         self.flushheaders()
 | |
|         if not add_to_http_hdrs: self._fp.write("\r\n")
 | |
|         self._first_part = True
 | |
|         return self._fp
 | |
| 
 | |
|     def startmultipartbody(self, subtype, boundary=None, plist=[], prefix=1,
 | |
|                            add_to_http_hdrs=0, content_type=1):
 | |
|         boundary = boundary or choose_boundary()
 | |
|         self._boundary.append(boundary)
 | |
|         return self.startbody("multipart/" + subtype,
 | |
|                               [("boundary", boundary)] + plist,
 | |
|                               prefix=prefix,
 | |
|                               add_to_http_hdrs=add_to_http_hdrs,
 | |
|                               content_type=content_type)
 | |
| 
 | |
|     def nextpart(self):
 | |
|         boundary = self._boundary[-1]
 | |
|         if self._first_part:
 | |
|             self._first_part = False
 | |
|         else:
 | |
|             self._fp.write("\r\n")
 | |
|         self._fp.write("--" + boundary + "\r\n")
 | |
|         return self.__class__(self._fp)
 | |
| 
 | |
|     def lastpart(self):
 | |
|         if self._first_part:
 | |
|             self.nextpart()
 | |
|         boundary = self._boundary.pop()
 | |
|         self._fp.write("\r\n--" + boundary + "--\r\n")
 | |
| 
 | |
| 
 | |
| class LocateError(ValueError): pass
 | |
| class AmbiguityError(LocateError): pass
 | |
| class ControlNotFoundError(LocateError): pass
 | |
| class ItemNotFoundError(LocateError): pass
 | |
| 
 | |
| class ItemCountError(ValueError): pass
 | |
| 
 | |
| # for backwards compatibility, ParseError derives from exceptions that were
 | |
| # raised by versions of ClientForm <= 0.2.5
 | |
| if HAVE_MODULE_HTMLPARSER:
 | |
|     SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
 | |
|     class ParseError(sgmllib.SGMLParseError,
 | |
|                      HTMLParser.HTMLParseError,
 | |
|                      ):
 | |
|         pass
 | |
| else:
 | |
|     if hasattr(sgmllib, "SGMLParseError"):
 | |
|         SGMLLIB_PARSEERROR = sgmllib.SGMLParseError
 | |
|         class ParseError(sgmllib.SGMLParseError):
 | |
|             pass
 | |
|     else:
 | |
|         SGMLLIB_PARSEERROR = RuntimeError
 | |
|         class ParseError(RuntimeError):
 | |
|             pass
 | |
| 
 | |
| 
 | |
| class _AbstractFormParser:
 | |
|     """forms attribute contains HTMLForm instances on completion."""
 | |
|     # thanks to Moshe Zadka for an example of sgmllib/htmllib usage
 | |
|     def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
 | |
|         if entitydefs is None:
 | |
|             entitydefs = get_entitydefs()
 | |
|         self._entitydefs = entitydefs
 | |
|         self._encoding = encoding
 | |
| 
 | |
|         self.base = None
 | |
|         self.forms = []
 | |
|         self.labels = []
 | |
|         self._current_label = None
 | |
|         self._current_form = None
 | |
|         self._select = None
 | |
|         self._optgroup = None
 | |
|         self._option = None
 | |
|         self._textarea = None
 | |
| 
 | |
|         # forms[0] will contain all controls that are outside of any form
 | |
|         # self._global_form is an alias for self.forms[0]
 | |
|         self._global_form = None
 | |
|         self.start_form([])
 | |
|         self.end_form()
 | |
|         self._current_form = self._global_form = self.forms[0]
 | |
| 
 | |
|     def do_base(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         for key, value in attrs:
 | |
|             if key == "href":
 | |
|                 self.base = self.unescape_attr_if_required(value)
 | |
| 
 | |
|     def end_body(self):
 | |
|         debug("")
 | |
|         if self._current_label is not None:
 | |
|             self.end_label()
 | |
|         if self._current_form is not self._global_form:
 | |
|             self.end_form()
 | |
| 
 | |
|     def start_form(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         if self._current_form is not self._global_form:
 | |
|             raise ParseError("nested FORMs")
 | |
|         name = None
 | |
|         action = None
 | |
|         enctype = "application/x-www-form-urlencoded"
 | |
|         method = "GET"
 | |
|         d = {}
 | |
|         for key, value in attrs:
 | |
|             if key == "name":
 | |
|                 name = self.unescape_attr_if_required(value)
 | |
|             elif key == "action":
 | |
|                 action = self.unescape_attr_if_required(value)
 | |
|             elif key == "method":
 | |
|                 method = self.unescape_attr_if_required(value.upper())
 | |
|             elif key == "enctype":
 | |
|                 enctype = self.unescape_attr_if_required(value.lower())
 | |
|             d[key] = self.unescape_attr_if_required(value)
 | |
|         controls = []
 | |
|         self._current_form = (name, action, method, enctype), d, controls
 | |
| 
 | |
|     def end_form(self):
 | |
|         debug("")
 | |
|         if self._current_label is not None:
 | |
|             self.end_label()
 | |
|         if self._current_form is self._global_form:
 | |
|             raise ParseError("end of FORM before start")
 | |
|         self.forms.append(self._current_form)
 | |
|         self._current_form = self._global_form
 | |
| 
 | |
|     def start_select(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         if self._select is not None:
 | |
|             raise ParseError("nested SELECTs")
 | |
|         if self._textarea is not None:
 | |
|             raise ParseError("SELECT inside TEXTAREA")
 | |
|         d = {}
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
| 
 | |
|         self._select = d
 | |
|         self._add_label(d)
 | |
| 
 | |
|         self._append_select_control({"__select": d})
 | |
| 
 | |
|     def end_select(self):
 | |
|         debug("")
 | |
|         if self._select is None:
 | |
|             raise ParseError("end of SELECT before start")
 | |
| 
 | |
|         if self._option is not None:
 | |
|             self._end_option()
 | |
| 
 | |
|         self._select = None
 | |
| 
 | |
|     def start_optgroup(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         if self._select is None:
 | |
|             raise ParseError("OPTGROUP outside of SELECT")
 | |
|         d = {}
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
| 
 | |
|         self._optgroup = d
 | |
| 
 | |
|     def end_optgroup(self):
 | |
|         debug("")
 | |
|         if self._optgroup is None:
 | |
|             raise ParseError("end of OPTGROUP before start")
 | |
|         self._optgroup = None
 | |
| 
 | |
|     def _start_option(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         if self._select is None:
 | |
|             raise ParseError("OPTION outside of SELECT")
 | |
|         if self._option is not None:
 | |
|             self._end_option()
 | |
| 
 | |
|         d = {}
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
| 
 | |
|         self._option = {}
 | |
|         self._option.update(d)
 | |
|         if (self._optgroup and "disabled" in self._optgroup and
 | |
|             "disabled" not in self._option):
 | |
|             self._option["disabled"] = None
 | |
| 
 | |
|     def _end_option(self):
 | |
|         debug("")
 | |
|         if self._option is None:
 | |
|             raise ParseError("end of OPTION before start")
 | |
| 
 | |
|         contents = self._option.get("contents", "").strip()
 | |
|         self._option["contents"] = contents
 | |
|         if "value" not in self._option:
 | |
|             self._option["value"] = contents
 | |
|         if "label" not in self._option:
 | |
|             self._option["label"] = contents
 | |
|         # stuff dict of SELECT HTML attrs into a special private key
 | |
|         #  (gets deleted again later)
 | |
|         self._option["__select"] = self._select
 | |
|         self._append_select_control(self._option)
 | |
|         self._option = None
 | |
| 
 | |
|     def _append_select_control(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         controls = self._current_form[2]
 | |
|         name = self._select.get("name")
 | |
|         controls.append(("select", name, attrs))
 | |
| 
 | |
|     def start_textarea(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         if self._textarea is not None:
 | |
|             raise ParseError("nested TEXTAREAs")
 | |
|         if self._select is not None:
 | |
|             raise ParseError("TEXTAREA inside SELECT")
 | |
|         d = {}
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
|         self._add_label(d)
 | |
| 
 | |
|         self._textarea = d
 | |
| 
 | |
|     def end_textarea(self):
 | |
|         debug("")
 | |
|         if self._textarea is None:
 | |
|             raise ParseError("end of TEXTAREA before start")
 | |
|         controls = self._current_form[2]
 | |
|         name = self._textarea.get("name")
 | |
|         controls.append(("textarea", name, self._textarea))
 | |
|         self._textarea = None
 | |
| 
 | |
|     def start_label(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         if self._current_label:
 | |
|             self.end_label()
 | |
|         d = {}
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
|         taken = bool(d.get("for"))  # empty id is invalid
 | |
|         d["__text"] = ""
 | |
|         d["__taken"] = taken
 | |
|         if taken:
 | |
|             self.labels.append(d)
 | |
|         self._current_label = d
 | |
| 
 | |
|     def end_label(self):
 | |
|         debug("")
 | |
|         label = self._current_label
 | |
|         if label is None:
 | |
|             # something is ugly in the HTML, but we're ignoring it
 | |
|             return
 | |
|         self._current_label = None
 | |
|         # if it is staying around, it is True in all cases
 | |
|         del label["__taken"]
 | |
| 
 | |
|     def _add_label(self, d):
 | |
|         #debug("%s", d)
 | |
|         if self._current_label is not None:
 | |
|             if not self._current_label["__taken"]:
 | |
|                 self._current_label["__taken"] = True
 | |
|                 d["__label"] = self._current_label
 | |
| 
 | |
|     def handle_data(self, data):
 | |
|         debug("%s", data)
 | |
| 
 | |
|         if self._option is not None:
 | |
|             # self._option is a dictionary of the OPTION element's HTML
 | |
|             # attributes, but it has two special keys, one of which is the
 | |
|             # special "contents" key contains text between OPTION tags (the
 | |
|             # other is the "__select" key: see the end_option method)
 | |
|             map = self._option
 | |
|             key = "contents"
 | |
|         elif self._textarea is not None:
 | |
|             map = self._textarea
 | |
|             key = "value"
 | |
|             data = normalize_line_endings(data)
 | |
|         # not if within option or textarea
 | |
|         elif self._current_label is not None:
 | |
|             map = self._current_label
 | |
|             key = "__text"
 | |
|         else:
 | |
|             return
 | |
| 
 | |
|         if data and key not in map:
 | |
|             # according to
 | |
|             # http://www.w3.org/TR/html4/appendix/notes.html#h-B.3.1 line break
 | |
|             # immediately after start tags or immediately before end tags must
 | |
|             # be ignored, but real browsers only ignore a line break after a
 | |
|             # start tag, so we'll do that.
 | |
|             if data[0:2] == "\r\n":
 | |
|                 data = data[2:]
 | |
|             elif data[0:1] in ["\n", "\r"]:
 | |
|                 data = data[1:]
 | |
|             map[key] = data
 | |
|         else:
 | |
|             map[key] = (map[key].decode("utf8", "replace") if isinstance(map[key], six.binary_type) else map[key]) + data
 | |
| 
 | |
|     def do_button(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         d = {}
 | |
|         d["type"] = "submit"  # default
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
|         controls = self._current_form[2]
 | |
| 
 | |
|         type = d["type"]
 | |
|         name = d.get("name")
 | |
|         # we don't want to lose information, so use a type string that
 | |
|         # doesn't clash with INPUT TYPE={SUBMIT,RESET,BUTTON}
 | |
|         # e.g. type for BUTTON/RESET is "resetbutton"
 | |
|         #     (type for INPUT/RESET is "reset")
 | |
|         type = type+"button"
 | |
|         self._add_label(d)
 | |
|         controls.append((type, name, d))
 | |
| 
 | |
|     def do_input(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         d = {}
 | |
|         d["type"] = "text"  # default
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
|         controls = self._current_form[2]
 | |
| 
 | |
|         type = d["type"]
 | |
|         name = d.get("name")
 | |
|         self._add_label(d)
 | |
|         controls.append((type, name, d))
 | |
| 
 | |
|     def do_isindex(self, attrs):
 | |
|         debug("%s", attrs)
 | |
|         d = {}
 | |
|         for key, val in attrs:
 | |
|             d[key] = self.unescape_attr_if_required(val)
 | |
|         controls = self._current_form[2]
 | |
| 
 | |
|         self._add_label(d)
 | |
|         # isindex doesn't have type or name HTML attributes
 | |
|         controls.append(("isindex", None, d))
 | |
| 
 | |
|     def handle_entityref(self, name):
 | |
|         #debug("%s", name)
 | |
|         self.handle_data(unescape(
 | |
|             '&%s;' % name, self._entitydefs, self._encoding))
 | |
| 
 | |
|     def handle_charref(self, name):
 | |
|         #debug("%s", name)
 | |
|         self.handle_data(unescape_charref(name, self._encoding))
 | |
| 
 | |
|     def unescape_attr(self, name):
 | |
|         #debug("%s", name)
 | |
|         return unescape(name, self._entitydefs, self._encoding)
 | |
| 
 | |
|     def unescape_attrs(self, attrs):
 | |
|         #debug("%s", attrs)
 | |
|         escaped_attrs = {}
 | |
|         for key, val in attrs.items():
 | |
|             try:
 | |
|                 val.items
 | |
|             except AttributeError:
 | |
|                 escaped_attrs[key] = self.unescape_attr(val)
 | |
|             else:
 | |
|                 # e.g. "__select" -- yuck!
 | |
|                 escaped_attrs[key] = self.unescape_attrs(val)
 | |
|         return escaped_attrs
 | |
| 
 | |
|     def unknown_entityref(self, ref): self.handle_data("&%s;" % ref)
 | |
|     def unknown_charref(self, ref): self.handle_data("&#%s;" % ref)
 | |
| 
 | |
| 
 | |
| if not HAVE_MODULE_HTMLPARSER:
 | |
|     class XHTMLCompatibleFormParser:
 | |
|         def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
 | |
|             raise ValueError("HTMLParser could not be imported")
 | |
| else:
 | |
|     class XHTMLCompatibleFormParser(_AbstractFormParser, HTMLParser.HTMLParser):
 | |
|         """Good for XHTML, bad for tolerance of incorrect HTML."""
 | |
|         # thanks to Michael Howitz for this!
 | |
|         def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
 | |
|             HTMLParser.HTMLParser.__init__(self)
 | |
|             _AbstractFormParser.__init__(self, entitydefs, encoding)
 | |
| 
 | |
|         def feed(self, data):
 | |
|             try:
 | |
|                 HTMLParser.HTMLParser.feed(self, data)
 | |
|             except HTMLParser.HTMLParseError as exc:
 | |
|                 raise ParseError(exc)
 | |
| 
 | |
|         def start_option(self, attrs):
 | |
|             _AbstractFormParser._start_option(self, attrs)
 | |
| 
 | |
|         def end_option(self):
 | |
|             _AbstractFormParser._end_option(self)
 | |
| 
 | |
|         def handle_starttag(self, tag, attrs):
 | |
|             try:
 | |
|                 method = getattr(self, "start_" + tag)
 | |
|             except AttributeError:
 | |
|                 try:
 | |
|                     method = getattr(self, "do_" + tag)
 | |
|                 except AttributeError:
 | |
|                     pass  # unknown tag
 | |
|                 else:
 | |
|                     method(attrs)
 | |
|             else:
 | |
|                 method(attrs)
 | |
| 
 | |
|         def handle_endtag(self, tag):
 | |
|             try:
 | |
|                 method = getattr(self, "end_" + tag)
 | |
|             except AttributeError:
 | |
|                 pass  # unknown tag
 | |
|             else:
 | |
|                 method()
 | |
| 
 | |
|         def unescape(self, name):
 | |
|             # Use the entitydefs passed into constructor, not
 | |
|             # HTMLParser.HTMLParser's entitydefs.
 | |
|             return self.unescape_attr(name)
 | |
| 
 | |
|         def unescape_attr_if_required(self, name):
 | |
|             return name  # HTMLParser.HTMLParser already did it
 | |
|         def unescape_attrs_if_required(self, attrs):
 | |
|             return attrs  # ditto
 | |
| 
 | |
|         def close(self):
 | |
|             HTMLParser.HTMLParser.close(self)
 | |
|             self.end_body()
 | |
| 
 | |
| 
 | |
| class _AbstractSgmllibParser(_AbstractFormParser):
 | |
| 
 | |
|     def do_option(self, attrs):
 | |
|         _AbstractFormParser._start_option(self, attrs)
 | |
| 
 | |
|     if sys.version_info[:2] >= (2,5):
 | |
|         # we override this attr to decode hex charrefs
 | |
|         entity_or_charref = re.compile(
 | |
|             '&(?:([a-zA-Z][-.a-zA-Z0-9]*)|#(x?[0-9a-fA-F]+))(;?)')
 | |
|         def convert_entityref(self, name):
 | |
|             return unescape("&%s;" % name, self._entitydefs, self._encoding)
 | |
|         def convert_charref(self, name):
 | |
|             return unescape_charref("%s" % name, self._encoding)
 | |
|         def unescape_attr_if_required(self, name):
 | |
|             return name  # sgmllib already did it
 | |
|         def unescape_attrs_if_required(self, attrs):
 | |
|             return attrs  # ditto
 | |
|     else:
 | |
|         def unescape_attr_if_required(self, name):
 | |
|             return self.unescape_attr(name)
 | |
|         def unescape_attrs_if_required(self, attrs):
 | |
|             return self.unescape_attrs(attrs)
 | |
| 
 | |
| 
 | |
| class FormParser(_AbstractSgmllibParser, sgmllib.SGMLParser):
 | |
|     """Good for tolerance of incorrect HTML, bad for XHTML."""
 | |
|     def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
 | |
|         sgmllib.SGMLParser.__init__(self)
 | |
|         _AbstractFormParser.__init__(self, entitydefs, encoding)
 | |
| 
 | |
|     def feed(self, data):
 | |
|         try:
 | |
|             sgmllib.SGMLParser.feed(self, data)
 | |
|         except SGMLLIB_PARSEERROR as exc:
 | |
|             raise ParseError(exc)
 | |
| 
 | |
|     def close(self):
 | |
|         sgmllib.SGMLParser.close(self)
 | |
|         self.end_body()
 | |
| 
 | |
| 
 | |
| # sigh, must support mechanize by allowing dynamic creation of classes based on
 | |
| # its bundled copy of BeautifulSoup (which was necessary because of dependency
 | |
| # problems)
 | |
| 
 | |
| def _create_bs_classes(bs,
 | |
|                        icbinbs,
 | |
|                        ):
 | |
|     class _AbstractBSFormParser(_AbstractSgmllibParser):
 | |
|         bs_base_class = None
 | |
|         def __init__(self, entitydefs=None, encoding=DEFAULT_ENCODING):
 | |
|             _AbstractFormParser.__init__(self, entitydefs, encoding)
 | |
|             self.bs_base_class.__init__(self)
 | |
|         def handle_data(self, data):
 | |
|             _AbstractFormParser.handle_data(self, data)
 | |
|             self.bs_base_class.handle_data(self, data)
 | |
|         def feed(self, data):
 | |
|             try:
 | |
|                 self.bs_base_class.feed(self, data)
 | |
|             except SGMLLIB_PARSEERROR as exc:
 | |
|                 raise ParseError(exc)
 | |
|         def close(self):
 | |
|             self.bs_base_class.close(self)
 | |
|             self.end_body()
 | |
| 
 | |
|     class RobustFormParser(_AbstractBSFormParser, bs):
 | |
|         """Tries to be highly tolerant of incorrect HTML."""
 | |
|         pass
 | |
|     RobustFormParser.bs_base_class = bs
 | |
|     class NestingRobustFormParser(_AbstractBSFormParser, icbinbs):
 | |
|         """Tries to be highly tolerant of incorrect HTML.
 | |
| 
 | |
|         Different from RobustFormParser in that it more often guesses nesting
 | |
|         above missing end tags (see BeautifulSoup docs).
 | |
| 
 | |
|         """
 | |
|         pass
 | |
|     NestingRobustFormParser.bs_base_class = icbinbs
 | |
| 
 | |
|     return RobustFormParser, NestingRobustFormParser
 | |
| 
 | |
| try:
 | |
|     if sys.version_info[:2] < (2, 2):
 | |
|         raise ImportError  # BeautifulSoup uses generators
 | |
|     import BeautifulSoup
 | |
| except ImportError:
 | |
|     pass
 | |
| else:
 | |
|     RobustFormParser, NestingRobustFormParser = _create_bs_classes(
 | |
|         BeautifulSoup.BeautifulSoup, BeautifulSoup.ICantBelieveItsBeautifulSoup
 | |
|         )
 | |
|     __all__ += ['RobustFormParser', 'NestingRobustFormParser']
 | |
| 
 | |
| 
 | |
| #FormParser = XHTMLCompatibleFormParser  # testing hack
 | |
| #FormParser = RobustFormParser  # testing hack
 | |
| 
 | |
| 
 | |
| def ParseResponseEx(response,
 | |
|                     select_default=False,
 | |
|                     form_parser_class=FormParser,
 | |
|                     request_class=_urllib.request.Request,
 | |
|                     entitydefs=None,
 | |
|                     encoding=DEFAULT_ENCODING,
 | |
| 
 | |
|                     # private
 | |
|                     _urljoin=_urllib.parse.urljoin,
 | |
|                     _urlparse=_urllib.parse.urlparse,
 | |
|                     _urlunparse=_urllib.parse.urlunparse,
 | |
|                     ):
 | |
|     """Identical to ParseResponse, except that:
 | |
| 
 | |
|     1. The returned list contains an extra item.  The first form in the list
 | |
|     contains all controls not contained in any FORM element.
 | |
| 
 | |
|     2. The arguments ignore_errors and backwards_compat have been removed.
 | |
| 
 | |
|     3. Backwards-compatibility mode (backwards_compat=True) is not available.
 | |
|     """
 | |
|     return _ParseFileEx(response, response.geturl(),
 | |
|                         select_default,
 | |
|                         False,
 | |
|                         form_parser_class,
 | |
|                         request_class,
 | |
|                         entitydefs,
 | |
|                         False,
 | |
|                         encoding,
 | |
|                         _urljoin=_urljoin,
 | |
|                         _urlparse=_urlparse,
 | |
|                         _urlunparse=_urlunparse,
 | |
|                         )
 | |
| 
 | |
| def ParseFileEx(file, base_uri,
 | |
|                 select_default=False,
 | |
|                 form_parser_class=FormParser,
 | |
|                 request_class=_urllib.request.Request,
 | |
|                 entitydefs=None,
 | |
|                 encoding=DEFAULT_ENCODING,
 | |
| 
 | |
|                 # private
 | |
|                 _urljoin=_urllib.parse.urljoin,
 | |
|                 _urlparse=_urllib.parse.urlparse,
 | |
|                 _urlunparse=_urllib.parse.urlunparse,
 | |
|                 ):
 | |
|     """Identical to ParseFile, except that:
 | |
| 
 | |
|     1. The returned list contains an extra item.  The first form in the list
 | |
|     contains all controls not contained in any FORM element.
 | |
| 
 | |
|     2. The arguments ignore_errors and backwards_compat have been removed.
 | |
| 
 | |
|     3. Backwards-compatibility mode (backwards_compat=True) is not available.
 | |
|     """
 | |
|     return _ParseFileEx(file, base_uri,
 | |
|                         select_default,
 | |
|                         False,
 | |
|                         form_parser_class,
 | |
|                         request_class,
 | |
|                         entitydefs,
 | |
|                         False,
 | |
|                         encoding,
 | |
|                         _urljoin=_urljoin,
 | |
|                         _urlparse=_urlparse,
 | |
|                         _urlunparse=_urlunparse,
 | |
|                         )
 | |
| 
 | |
| def ParseResponse(response, *args, **kwds):
 | |
|     """Parse HTTP response and return a list of HTMLForm instances.
 | |
| 
 | |
|     The return value of urllib2.urlopen can be conveniently passed to this
 | |
|     function as the response parameter.
 | |
| 
 | |
|     ClientForm.ParseError is raised on parse errors.
 | |
| 
 | |
|     response: file-like object (supporting read() method) with a method
 | |
|      geturl(), returning the URI of the HTTP response
 | |
|     select_default: for multiple-selection SELECT controls and RADIO controls,
 | |
|      pick the first item as the default if none are selected in the HTML
 | |
|     form_parser_class: class to instantiate and use to pass
 | |
|     request_class: class to return from .click() method (default is
 | |
|      _urllib.request.Request)
 | |
|     entitydefs: mapping like {"&": "&", ...} containing HTML entity
 | |
|      definitions (a sensible default is used)
 | |
|     encoding: character encoding used for encoding numeric character references
 | |
|      when matching link text.  ClientForm does not attempt to find the encoding
 | |
|      in a META HTTP-EQUIV attribute in the document itself (mechanize, for
 | |
|      example, does do that and will pass the correct value to ClientForm using
 | |
|      this parameter).
 | |
| 
 | |
|     backwards_compat: boolean that determines whether the returned HTMLForm
 | |
|      objects are backwards-compatible with old code.  If backwards_compat is
 | |
|      true:
 | |
| 
 | |
|      - ClientForm 0.1 code will continue to work as before.
 | |
| 
 | |
|      - Label searches that do not specify a nr (number or count) will always
 | |
|        get the first match, even if other controls match.  If
 | |
|        backwards_compat is False, label searches that have ambiguous results
 | |
|        will raise an AmbiguityError.
 | |
| 
 | |
|      - Item label matching is done by strict string comparison rather than
 | |
|        substring matching.
 | |
| 
 | |
|      - De-selecting individual list items is allowed even if the Item is
 | |
|        disabled.
 | |
| 
 | |
|     The backwards_compat argument will be deprecated in a future release.
 | |
| 
 | |
|     Pass a true value for select_default if you want the behaviour specified by
 | |
|     RFC 1866 (the HTML 2.0 standard), which is to select the first item in a
 | |
|     RADIO or multiple-selection SELECT control if none were selected in the
 | |
|     HTML.  Most browsers (including Microsoft Internet Explorer (IE) and
 | |
|     Netscape Navigator) instead leave all items unselected in these cases.  The
 | |
|     W3C HTML 4.0 standard leaves this behaviour undefined in the case of
 | |
|     multiple-selection SELECT controls, but insists that at least one RADIO
 | |
|     button should be checked at all times, in contradiction to browser
 | |
|     behaviour.
 | |
| 
 | |
|     There is a choice of parsers.  ClientForm.XHTMLCompatibleFormParser (uses
 | |
|     HTMLParser.HTMLParser) works best for XHTML, ClientForm.FormParser (uses
 | |
|     sgmllib.SGMLParser) (the default) works better for ordinary grubby HTML.
 | |
|     Note that HTMLParser is only available in Python 2.2 and later.  You can
 | |
|     pass your own class in here as a hack to work around bad HTML, but at your
 | |
|     own risk: there is no well-defined interface.
 | |
| 
 | |
|     """
 | |
|     return _ParseFileEx(response, response.geturl(), *args, **kwds)[1:]
 | |
| 
 | |
| def ParseFile(file, base_uri, *args, **kwds):
 | |
|     """Parse HTML and return a list of HTMLForm instances.
 | |
| 
 | |
|     ClientForm.ParseError is raised on parse errors.
 | |
| 
 | |
|     file: file-like object (supporting read() method) containing HTML with zero
 | |
|      or more forms to be parsed
 | |
|     base_uri: the URI of the document (note that the base URI used to submit
 | |
|      the form will be that given in the BASE element if present, not that of
 | |
|      the document)
 | |
| 
 | |
|     For the other arguments and further details, see ParseResponse.__doc__.
 | |
| 
 | |
|     """
 | |
|     return _ParseFileEx(file, base_uri, *args, **kwds)[1:]
 | |
| 
 | |
| def _ParseFileEx(file, base_uri,
 | |
|                  select_default=False,
 | |
|                  ignore_errors=False,
 | |
|                  form_parser_class=FormParser,
 | |
|                  request_class=_urllib.request.Request,
 | |
|                  entitydefs=None,
 | |
|                  backwards_compat=True,
 | |
|                  encoding=DEFAULT_ENCODING,
 | |
|                  _urljoin=_urllib.parse.urljoin,
 | |
|                  _urlparse=_urllib.parse.urlparse,
 | |
|                  _urlunparse=_urllib.parse.urlunparse,
 | |
|                  ):
 | |
|     if backwards_compat:
 | |
|         deprecation("operating in backwards-compatibility mode", 1)
 | |
|     fp = form_parser_class(entitydefs, encoding)
 | |
|     while 1:
 | |
|         data = file.read(CHUNK)
 | |
|         try:
 | |
|             fp.feed(data)
 | |
|         except ParseError as e:
 | |
|             e.base_uri = base_uri
 | |
|             raise
 | |
|         if len(data) != CHUNK: break
 | |
|     fp.close()
 | |
|     if fp.base is not None:
 | |
|         # HTML BASE element takes precedence over document URI
 | |
|         base_uri = fp.base
 | |
|     labels = []  # Label(label) for label in fp.labels]
 | |
|     id_to_labels = {}
 | |
|     for l in fp.labels:
 | |
|         label = Label(l)
 | |
|         labels.append(label)
 | |
|         for_id = l["for"]
 | |
|         coll = id_to_labels.get(for_id)
 | |
|         if coll is None:
 | |
|             id_to_labels[for_id] = [label]
 | |
|         else:
 | |
|             coll.append(label)
 | |
|     forms = []
 | |
|     for (name, action, method, enctype), attrs, controls in fp.forms:
 | |
|         if action is None:
 | |
|             action = base_uri
 | |
|         else:
 | |
|             action = six.text_type(action, "utf8") if action and isinstance(action, six.binary_type) else action
 | |
|             action = _urljoin(base_uri, action)
 | |
|         # would be nice to make HTMLForm class (form builder) pluggable
 | |
|         form = HTMLForm(
 | |
|             action, method, enctype, name, attrs, request_class,
 | |
|             forms, labels, id_to_labels, backwards_compat)
 | |
|         form._urlparse = _urlparse
 | |
|         form._urlunparse = _urlunparse
 | |
|         for ii in xrange(len(controls)):
 | |
|             type, name, attrs = controls[ii]
 | |
|             # index=ii*10 allows ImageControl to return multiple ordered pairs
 | |
|             form.new_control(
 | |
|                 type, name, attrs, select_default=select_default, index=ii*10)
 | |
|         forms.append(form)
 | |
|     for form in forms:
 | |
|         try:
 | |
|             form.fixup()
 | |
|         except AttributeError as ex:
 | |
|             if not any(_ in str(ex) for _ in ("is disabled", "is readonly")):
 | |
|                 raise
 | |
|     return forms
 | |
| 
 | |
| 
 | |
| class Label:
 | |
|     def __init__(self, attrs):
 | |
|         self.id = attrs.get("for")
 | |
|         self._text = attrs.get("__text").strip()
 | |
|         self._ctext = compress_text(self._text)
 | |
|         self.attrs = attrs
 | |
|         self._backwards_compat = False  # maintained by HTMLForm
 | |
| 
 | |
|     def __getattr__(self, name):
 | |
|         if name == "text":
 | |
|             if self._backwards_compat:
 | |
|                 return self._text
 | |
|             else:
 | |
|                 return self._ctext
 | |
|         return getattr(Label, name)
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         if name == "text":
 | |
|             # don't see any need for this, so make it read-only
 | |
|             raise AttributeError("text attribute is read-only")
 | |
|         self.__dict__[name] = value
 | |
| 
 | |
|     def __str__(self):
 | |
|         return "<Label(id=%r, text=%r)>" % (self.id, self.text)
 | |
| 
 | |
| 
 | |
| def _get_label(attrs):
 | |
|     text = attrs.get("__label")
 | |
|     if text is not None:
 | |
|         return Label(text)
 | |
|     else:
 | |
|         return None
 | |
| 
 | |
| class Control:
 | |
|     """An HTML form control.
 | |
| 
 | |
|     An HTMLForm contains a sequence of Controls.  The Controls in an HTMLForm
 | |
|     are accessed using the HTMLForm.find_control method or the
 | |
|     HTMLForm.controls attribute.
 | |
| 
 | |
|     Control instances are usually constructed using the ParseFile /
 | |
|     ParseResponse functions.  If you use those functions, you can ignore the
 | |
|     rest of this paragraph.  A Control is only properly initialised after the
 | |
|     fixup method has been called.  In fact, this is only strictly necessary for
 | |
|     ListControl instances.  This is necessary because ListControls are built up
 | |
|     from ListControls each containing only a single item, and their initial
 | |
|     value(s) can only be known after the sequence is complete.
 | |
| 
 | |
|     The types and values that are acceptable for assignment to the value
 | |
|     attribute are defined by subclasses.
 | |
| 
 | |
|     If the disabled attribute is true, this represents the state typically
 | |
|     represented by browsers by 'greying out' a control.  If the disabled
 | |
|     attribute is true, the Control will raise AttributeError if an attempt is
 | |
|     made to change its value.  In addition, the control will not be considered
 | |
|     'successful' as defined by the W3C HTML 4 standard -- ie. it will
 | |
|     contribute no data to the return value of the HTMLForm.click* methods.  To
 | |
|     enable a control, set the disabled attribute to a false value.
 | |
| 
 | |
|     If the readonly attribute is true, the Control will raise AttributeError if
 | |
|     an attempt is made to change its value.  To make a control writable, set
 | |
|     the readonly attribute to a false value.
 | |
| 
 | |
|     All controls have the disabled and readonly attributes, not only those that
 | |
|     may have the HTML attributes of the same names.
 | |
| 
 | |
|     On assignment to the value attribute, the following exceptions are raised:
 | |
|     TypeError, AttributeError (if the value attribute should not be assigned
 | |
|     to, because the control is disabled, for example) and ValueError.
 | |
| 
 | |
|     If the name or value attributes are None, or the value is an empty list, or
 | |
|     if the control is disabled, the control is not successful.
 | |
| 
 | |
|     Public attributes:
 | |
| 
 | |
|     type: string describing type of control (see the keys of the
 | |
|      HTMLForm.type2class dictionary for the allowable values) (readonly)
 | |
|     name: name of control (readonly)
 | |
|     value: current value of control (subclasses may allow a single value, a
 | |
|      sequence of values, or either)
 | |
|     disabled: disabled state
 | |
|     readonly: readonly state
 | |
|     id: value of id HTML attribute
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         """
 | |
|         type: string describing type of control (see the keys of the
 | |
|          HTMLForm.type2class dictionary for the allowable values)
 | |
|         name: control name
 | |
|         attrs: HTML attributes of control's HTML element
 | |
| 
 | |
|         """
 | |
|         raise NotImplementedError()
 | |
| 
 | |
|     def add_to_form(self, form):
 | |
|         self._form = form
 | |
|         form.controls.append(self)
 | |
| 
 | |
|     def fixup(self):
 | |
|         pass
 | |
| 
 | |
|     def is_of_kind(self, kind):
 | |
|         raise NotImplementedError()
 | |
| 
 | |
|     def clear(self):
 | |
|         raise NotImplementedError()
 | |
| 
 | |
|     def __getattr__(self, name): raise NotImplementedError()
 | |
|     def __setattr__(self, name, value): raise NotImplementedError()
 | |
| 
 | |
|     def pairs(self):
 | |
|         """Return list of (key, value) pairs suitable for passing to urlencode.
 | |
|         """
 | |
|         return [(k, v) for (i, k, v) in self._totally_ordered_pairs()]
 | |
| 
 | |
|     def _totally_ordered_pairs(self):
 | |
|         """Return list of (key, value, index) tuples.
 | |
| 
 | |
|         Like pairs, but allows preserving correct ordering even where several
 | |
|         controls are involved.
 | |
| 
 | |
|         """
 | |
|         raise NotImplementedError()
 | |
| 
 | |
|     def _write_mime_data(self, mw, name, value):
 | |
|         """Write data for a subitem of this control to a MimeWriter."""
 | |
|         # called by HTMLForm
 | |
|         mw2 = mw.nextpart()
 | |
|         mw2.addheader("Content-Disposition",
 | |
|                       'form-data; name="%s"' % name, 1)
 | |
|         f = mw2.startbody(prefix=0)
 | |
|         f.write(value)
 | |
| 
 | |
|     def __str__(self):
 | |
|         raise NotImplementedError()
 | |
| 
 | |
|     def get_labels(self):
 | |
|         """Return all labels (Label instances) for this control.
 | |
| 
 | |
|         If the control was surrounded by a <label> tag, that will be the first
 | |
|         label; all other labels, connected by 'for' and 'id', are in the order
 | |
|         that appear in the HTML.
 | |
| 
 | |
|         """
 | |
|         res = []
 | |
|         if self._label:
 | |
|             res.append(self._label)
 | |
|         if self.id:
 | |
|             res.extend(self._form._id_to_labels.get(self.id, ()))
 | |
|         return res
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| class ScalarControl(Control):
 | |
|     """Control whose value is not restricted to one of a prescribed set.
 | |
| 
 | |
|     Some ScalarControls don't accept any value attribute.  Otherwise, takes a
 | |
|     single value, which must be string-like.
 | |
| 
 | |
|     Additional read-only public attribute:
 | |
| 
 | |
|     attrs: dictionary mapping the names of original HTML attributes of the
 | |
|      control to their values
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         self._index = index
 | |
|         self._label = _get_label(attrs)
 | |
|         self.__dict__["type"] = type.lower()
 | |
|         self.__dict__["name"] = name
 | |
|         self._value = attrs.get("value")
 | |
|         self.disabled = "disabled" in attrs
 | |
|         self.readonly = "readonly" in attrs
 | |
|         self.id = attrs.get("id")
 | |
| 
 | |
|         self.attrs = attrs.copy()
 | |
| 
 | |
|         self._clicked = False
 | |
| 
 | |
|         self._urlparse = _urllib.parse.urlparse
 | |
|         self._urlunparse = _urllib.parse.urlunparse
 | |
| 
 | |
|     def __getattr__(self, name):
 | |
|         if name == "value":
 | |
|             return self.__dict__["_value"]
 | |
|         else:
 | |
|             raise AttributeError("%s instance has no attribute '%s'" %
 | |
|                                  (self.__class__.__name__, name))
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         if name == "value":
 | |
|             if not isstringlike(value):
 | |
|                 raise TypeError("must assign a string")
 | |
|             elif self.readonly:
 | |
|                 raise AttributeError("control '%s' is readonly" % self.name)
 | |
|             elif self.disabled:
 | |
|                 raise AttributeError("control '%s' is disabled" % self.name)
 | |
|             self.__dict__["_value"] = value
 | |
|         elif name in ("name", "type"):
 | |
|             raise AttributeError("%s attribute is readonly" % name)
 | |
|         else:
 | |
|             self.__dict__[name] = value
 | |
| 
 | |
|     def _totally_ordered_pairs(self):
 | |
|         name = self.name
 | |
|         value = self.value
 | |
|         if name is None or value is None or self.disabled:
 | |
|             return []
 | |
|         return [(self._index, name, value)]
 | |
| 
 | |
|     def clear(self):
 | |
|         if self.readonly:
 | |
|             raise AttributeError("control '%s' is readonly" % self.name)
 | |
|         self.__dict__["_value"] = None
 | |
| 
 | |
|     def __str__(self):
 | |
|         name = self.name
 | |
|         value = self.value
 | |
|         if name is None: name = "<None>"
 | |
|         if value is None: value = "<None>"
 | |
| 
 | |
|         infos = []
 | |
|         if self.disabled: infos.append("disabled")
 | |
|         if self.readonly: infos.append("readonly")
 | |
|         info = ", ".join(infos)
 | |
|         if info: info = " (%s)" % info
 | |
| 
 | |
|         return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| class TextControl(ScalarControl):
 | |
|     """Textual input control.
 | |
| 
 | |
|     Covers:
 | |
| 
 | |
|     INPUT/TEXT
 | |
|     INPUT/PASSWORD
 | |
|     INPUT/HIDDEN
 | |
|     TEXTAREA
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         ScalarControl.__init__(self, type, name, attrs, index)
 | |
|         if self.type == "hidden": self.readonly = True
 | |
|         if self._value is None:
 | |
|             self._value = ""
 | |
| 
 | |
|     def is_of_kind(self, kind): return kind == "text"
 | |
| 
 | |
| #---------------------------------------------------
 | |
| class FileControl(ScalarControl):
 | |
|     """File upload with INPUT TYPE=FILE.
 | |
| 
 | |
|     The value attribute of a FileControl is always None.  Use add_file instead.
 | |
| 
 | |
|     Additional public method: add_file
 | |
| 
 | |
|     """
 | |
| 
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         ScalarControl.__init__(self, type, name, attrs, index)
 | |
|         self._value = None
 | |
|         self._upload_data = []
 | |
| 
 | |
|     def is_of_kind(self, kind): return kind == "file"
 | |
| 
 | |
|     def clear(self):
 | |
|         if self.readonly:
 | |
|             raise AttributeError("control '%s' is readonly" % self.name)
 | |
|         self._upload_data = []
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         if name in ("value", "name", "type"):
 | |
|             raise AttributeError("%s attribute is readonly" % name)
 | |
|         else:
 | |
|             self.__dict__[name] = value
 | |
| 
 | |
|     def add_file(self, file_object, content_type=None, filename=None):
 | |
|         if not hasattr(file_object, "read"):
 | |
|             raise TypeError("file-like object must have read method")
 | |
|         if content_type is not None and not isstringlike(content_type):
 | |
|             raise TypeError("content type must be None or string-like")
 | |
|         if filename is not None and not isstringlike(filename):
 | |
|             raise TypeError("filename must be None or string-like")
 | |
|         if content_type is None:
 | |
|             content_type = "application/octet-stream"
 | |
|         self._upload_data.append((file_object, content_type, filename))
 | |
| 
 | |
|     def _totally_ordered_pairs(self):
 | |
|         # XXX should it be successful even if unnamed?
 | |
|         if self.name is None or self.disabled:
 | |
|             return []
 | |
|         return [(self._index, self.name, "")]
 | |
| 
 | |
|     def _write_mime_data(self, mw, _name, _value):
 | |
|         # called by HTMLForm
 | |
|         # assert _name == self.name and _value == ''
 | |
|         if len(self._upload_data) < 2:
 | |
|             if len(self._upload_data) == 0:
 | |
|                 file_object = _cStringIO()
 | |
|                 content_type = "application/octet-stream"
 | |
|                 filename = ""
 | |
|             else:
 | |
|                 file_object, content_type, filename = self._upload_data[0]
 | |
|                 if filename is None:
 | |
|                     filename = ""
 | |
|             mw2 = mw.nextpart()
 | |
|             fn_part = '; filename="%s"' % filename
 | |
|             disp = 'form-data; name="%s"%s' % (self.name, fn_part)
 | |
|             mw2.addheader("Content-Disposition", disp, prefix=1)
 | |
|             fh = mw2.startbody(content_type, prefix=0)
 | |
|             fh.write(file_object.read())
 | |
|         else:
 | |
|             # multiple files
 | |
|             mw2 = mw.nextpart()
 | |
|             disp = 'form-data; name="%s"' % self.name
 | |
|             mw2.addheader("Content-Disposition", disp, prefix=1)
 | |
|             fh = mw2.startmultipartbody("mixed", prefix=0)
 | |
|             for file_object, content_type, filename in self._upload_data:
 | |
|                 mw3 = mw2.nextpart()
 | |
|                 if filename is None:
 | |
|                     filename = ""
 | |
|                 fn_part = '; filename="%s"' % filename
 | |
|                 disp = "file%s" % fn_part
 | |
|                 mw3.addheader("Content-Disposition", disp, prefix=1)
 | |
|                 fh2 = mw3.startbody(content_type, prefix=0)
 | |
|                 fh2.write(file_object.read())
 | |
|             mw2.lastpart()
 | |
| 
 | |
|     def __str__(self):
 | |
|         name = self.name
 | |
|         if name is None: name = "<None>"
 | |
| 
 | |
|         if not self._upload_data:
 | |
|             value = "<No files added>"
 | |
|         else:
 | |
|             value = []
 | |
|             for file, ctype, filename in self._upload_data:
 | |
|                 if filename is None:
 | |
|                     value.append("<Unnamed file>")
 | |
|                 else:
 | |
|                     value.append(filename)
 | |
|             value = ", ".join(value)
 | |
| 
 | |
|         info = []
 | |
|         if self.disabled: info.append("disabled")
 | |
|         if self.readonly: info.append("readonly")
 | |
|         info = ", ".join(info)
 | |
|         if info: info = " (%s)" % info
 | |
| 
 | |
|         return "<%s(%s=%s)%s>" % (self.__class__.__name__, name, value, info)
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| class IsindexControl(ScalarControl):
 | |
|     """ISINDEX control.
 | |
| 
 | |
|     ISINDEX is the odd-one-out of HTML form controls.  In fact, it isn't really
 | |
|     part of regular HTML forms at all, and predates it.  You're only allowed
 | |
|     one ISINDEX per HTML document.  ISINDEX and regular form submission are
 | |
|     mutually exclusive -- either submit a form, or the ISINDEX.
 | |
| 
 | |
|     Having said this, since ISINDEX controls may appear in forms (which is
 | |
|     probably bad HTML), ParseFile / ParseResponse will include them in the
 | |
|     HTMLForm instances it returns.  You can set the ISINDEX's value, as with
 | |
|     any other control (but note that ISINDEX controls have no name, so you'll
 | |
|     need to use the type argument of set_value!).  When you submit the form,
 | |
|     the ISINDEX will not be successful (ie., no data will get returned to the
 | |
|     server as a result of its presence), unless you click on the ISINDEX
 | |
|     control, in which case the ISINDEX gets submitted instead of the form:
 | |
| 
 | |
|     form.set_value("my isindex value", type="isindex")
 | |
|     urllib2.urlopen(form.click(type="isindex"))
 | |
| 
 | |
|     ISINDEX elements outside of FORMs are ignored.  If you want to submit one
 | |
|     by hand, do it like so:
 | |
| 
 | |
|     url = _urllib.parse.urljoin(page_uri, "?"+_urllib.parse.quote_plus("my isindex value"))
 | |
|     result = urllib2.urlopen(url)
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         ScalarControl.__init__(self, type, name, attrs, index)
 | |
|         if self._value is None:
 | |
|             self._value = ""
 | |
| 
 | |
|     def is_of_kind(self, kind): return kind in ["text", "clickable"]
 | |
| 
 | |
|     def _totally_ordered_pairs(self):
 | |
|         return []
 | |
| 
 | |
|     def _click(self, form, coord, return_type, request_class=_urllib.request.Request):
 | |
|         # Relative URL for ISINDEX submission: instead of "foo=bar+baz",
 | |
|         # want "bar+baz".
 | |
|         # This doesn't seem to be specified in HTML 4.01 spec. (ISINDEX is
 | |
|         # deprecated in 4.01, but it should still say how to submit it).
 | |
|         # Submission of ISINDEX is explained in the HTML 3.2 spec, though.
 | |
|         parts = self._urlparse(form.action)
 | |
|         rest, (query, frag) = parts[:-2], parts[-2:]
 | |
|         parts = rest + (_urllib.parse.quote_plus(self.value), None)
 | |
|         url = self._urlunparse(parts)
 | |
|         req_data = url, None, []
 | |
| 
 | |
|         if return_type == "pairs":
 | |
|             return []
 | |
|         elif return_type == "request_data":
 | |
|             return req_data
 | |
|         else:
 | |
|             return request_class(url)
 | |
| 
 | |
|     def __str__(self):
 | |
|         value = self.value
 | |
|         if value is None: value = "<None>"
 | |
| 
 | |
|         infos = []
 | |
|         if self.disabled: infos.append("disabled")
 | |
|         if self.readonly: infos.append("readonly")
 | |
|         info = ", ".join(infos)
 | |
|         if info: info = " (%s)" % info
 | |
| 
 | |
|         return "<%s(%s)%s>" % (self.__class__.__name__, value, info)
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| class IgnoreControl(ScalarControl):
 | |
|     """Control that we're not interested in.
 | |
| 
 | |
|     Covers:
 | |
| 
 | |
|     INPUT/RESET
 | |
|     BUTTON/RESET
 | |
|     INPUT/BUTTON
 | |
|     BUTTON/BUTTON
 | |
| 
 | |
|     These controls are always unsuccessful, in the terminology of HTML 4 (ie.
 | |
|     they never require any information to be returned to the server).
 | |
| 
 | |
|     BUTTON/BUTTON is used to generate events for script embedded in HTML.
 | |
| 
 | |
|     The value attribute of IgnoreControl is always None.
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         ScalarControl.__init__(self, type, name, attrs, index)
 | |
|         self._value = None
 | |
| 
 | |
|     def is_of_kind(self, kind): return False
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         if name == "value":
 | |
|             raise AttributeError(
 | |
|                 "control '%s' is ignored, hence read-only" % self.name)
 | |
|         elif name in ("name", "type"):
 | |
|             raise AttributeError("%s attribute is readonly" % name)
 | |
|         else:
 | |
|             self.__dict__[name] = value
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| # ListControls
 | |
| 
 | |
| # helpers and subsidiary classes
 | |
| 
 | |
| class Item:
 | |
|     def __init__(self, control, attrs, index=None):
 | |
|         label = _get_label(attrs)
 | |
|         self.__dict__.update({
 | |
|             "name": attrs["value"],
 | |
|             "_labels": label and [label] or [],
 | |
|             "attrs": attrs,
 | |
|             "_control": control,
 | |
|             "disabled": "disabled" in attrs,
 | |
|             "_selected": False,
 | |
|             "id": attrs.get("id"),
 | |
|             "_index": index,
 | |
|             })
 | |
|         control.items.append(self)
 | |
| 
 | |
|     def get_labels(self):
 | |
|         """Return all labels (Label instances) for this item.
 | |
| 
 | |
|         For items that represent radio buttons or checkboxes, if the item was
 | |
|         surrounded by a <label> tag, that will be the first label; all other
 | |
|         labels, connected by 'for' and 'id', are in the order that appear in
 | |
|         the HTML.
 | |
| 
 | |
|         For items that represent select options, if the option had a label
 | |
|         attribute, that will be the first label.  If the option has contents
 | |
|         (text within the option tags) and it is not the same as the label
 | |
|         attribute (if any), that will be a label.  There is nothing in the
 | |
|         spec to my knowledge that makes an option with an id unable to be the
 | |
|         target of a label's for attribute, so those are included, if any, for
 | |
|         the sake of consistency and completeness.
 | |
| 
 | |
|         """
 | |
|         res = []
 | |
|         res.extend(self._labels)
 | |
|         if self.id:
 | |
|             res.extend(self._control._form._id_to_labels.get(self.id, ()))
 | |
|         return res
 | |
| 
 | |
|     def __getattr__(self, name):
 | |
|         if name=="selected":
 | |
|             return self._selected
 | |
|         raise AttributeError(name)
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         if name == "selected":
 | |
|             self._control._set_selected_state(self, value)
 | |
|         elif name == "disabled":
 | |
|             self.__dict__["disabled"] = bool(value)
 | |
|         else:
 | |
|             raise AttributeError(name)
 | |
| 
 | |
|     def __str__(self):
 | |
|         res = self.name
 | |
|         if self.selected:
 | |
|             res = "*" + res
 | |
|         if self.disabled:
 | |
|             res = "(%s)" % res
 | |
|         return res
 | |
| 
 | |
|     def __repr__(self):
 | |
|         # XXX appending the attrs without distinguishing them from name and id
 | |
|         # is silly
 | |
|         attrs = [("name", self.name), ("id", self.id)]+self.attrs.items()
 | |
|         return "<%s %s>" % (
 | |
|             self.__class__.__name__,
 | |
|             " ".join(["%s=%r" % (k, v) for k, v in attrs])
 | |
|             )
 | |
| 
 | |
| def disambiguate(items, nr, **kwds):
 | |
|     msgs = []
 | |
|     for key, value in kwds.items():
 | |
|         msgs.append("%s=%r" % (key, value))
 | |
|     msg = " ".join(msgs)
 | |
|     if not items:
 | |
|         raise ItemNotFoundError(msg)
 | |
|     if nr is None:
 | |
|         if len(items) > 1:
 | |
|             raise AmbiguityError(msg)
 | |
|         nr = 0
 | |
|     if len(items) <= nr:
 | |
|         raise ItemNotFoundError(msg)
 | |
|     return items[nr]
 | |
| 
 | |
| class ListControl(Control):
 | |
|     """Control representing a sequence of items.
 | |
| 
 | |
|     The value attribute of a ListControl represents the successful list items
 | |
|     in the control.  The successful list items are those that are selected and
 | |
|     not disabled.
 | |
| 
 | |
|     ListControl implements both list controls that take a length-1 value
 | |
|     (single-selection) and those that take length >1 values
 | |
|     (multiple-selection).
 | |
| 
 | |
|     ListControls accept sequence values only.  Some controls only accept
 | |
|     sequences of length 0 or 1 (RADIO, and single-selection SELECT).
 | |
|     In those cases, ItemCountError is raised if len(sequence) > 1.  CHECKBOXes
 | |
|     and multiple-selection SELECTs (those having the "multiple" HTML attribute)
 | |
|     accept sequences of any length.
 | |
| 
 | |
|     Note the following mistake:
 | |
| 
 | |
|     control.value = some_value
 | |
|     assert control.value == some_value    # not necessarily true
 | |
| 
 | |
|     The reason for this is that the value attribute always gives the list items
 | |
|     in the order they were listed in the HTML.
 | |
| 
 | |
|     ListControl items can also be referred to by their labels instead of names.
 | |
|     Use the label argument to .get(), and the .set_value_by_label(),
 | |
|     .get_value_by_label() methods.
 | |
| 
 | |
|     Note that, rather confusingly, though SELECT controls are represented in
 | |
|     HTML by SELECT elements (which contain OPTION elements, representing
 | |
|     individual list items), CHECKBOXes and RADIOs are not represented by *any*
 | |
|     element.  Instead, those controls are represented by a collection of INPUT
 | |
|     elements.  For example, this is a SELECT control, named "control1":
 | |
| 
 | |
|     <select name="control1">
 | |
|      <option>foo</option>
 | |
|      <option value="1">bar</option>
 | |
|     </select>
 | |
| 
 | |
|     and this is a CHECKBOX control, named "control2":
 | |
| 
 | |
|     <input type="checkbox" name="control2" value="foo" id="cbe1">
 | |
|     <input type="checkbox" name="control2" value="bar" id="cbe2">
 | |
| 
 | |
|     The id attribute of a CHECKBOX or RADIO ListControl is always that of its
 | |
|     first element (for example, "cbe1" above).
 | |
| 
 | |
| 
 | |
|     Additional read-only public attribute: multiple.
 | |
| 
 | |
|     """
 | |
| 
 | |
|     # ListControls are built up by the parser from their component items by
 | |
|     # creating one ListControl per item, consolidating them into a single
 | |
|     # master ListControl held by the HTMLForm:
 | |
| 
 | |
|     # -User calls form.new_control(...)
 | |
|     # -Form creates Control, and calls control.add_to_form(self).
 | |
|     # -Control looks for a Control with the same name and type in the form,
 | |
|     #  and if it finds one, merges itself with that control by calling
 | |
|     #  control.merge_control(self).  The first Control added to the form, of
 | |
|     #  a particular name and type, is the only one that survives in the
 | |
|     #  form.
 | |
|     # -Form calls control.fixup for all its controls.  ListControls in the
 | |
|     #  form know they can now safely pick their default values.
 | |
| 
 | |
|     # To create a ListControl without an HTMLForm, use:
 | |
| 
 | |
|     # control.merge_control(new_control)
 | |
| 
 | |
|     # (actually, it's much easier just to use ParseFile)
 | |
| 
 | |
|     _label = None
 | |
| 
 | |
|     def __init__(self, type, name, attrs={}, select_default=False,
 | |
|                  called_as_base_class=False, index=None):
 | |
|         """
 | |
|         select_default: for RADIO and multiple-selection SELECT controls, pick
 | |
|          the first item as the default if no 'selected' HTML attribute is
 | |
|          present
 | |
| 
 | |
|         """
 | |
|         if not called_as_base_class:
 | |
|             raise NotImplementedError()
 | |
| 
 | |
|         self.__dict__["type"] = type.lower()
 | |
|         self.__dict__["name"] = name
 | |
|         self._value = attrs.get("value")
 | |
|         self.disabled = False
 | |
|         self.readonly = False
 | |
|         self.id = attrs.get("id")
 | |
|         self._closed = False
 | |
| 
 | |
|         # As Controls are merged in with .merge_control(), self.attrs will
 | |
|         # refer to each Control in turn -- always the most recently merged
 | |
|         # control.  Each merged-in Control instance corresponds to a single
 | |
|         # list item: see ListControl.__doc__.
 | |
|         self.items = []
 | |
|         self._form = None
 | |
| 
 | |
|         self._select_default = select_default
 | |
|         self._clicked = False
 | |
| 
 | |
|     def clear(self):
 | |
|         self.value = []
 | |
| 
 | |
|     def is_of_kind(self, kind):
 | |
|         if kind  == "list":
 | |
|             return True
 | |
|         elif kind == "multilist":
 | |
|             return bool(self.multiple)
 | |
|         elif kind == "singlelist":
 | |
|             return not self.multiple
 | |
|         else:
 | |
|             return False
 | |
| 
 | |
|     def get_items(self, name=None, label=None, id=None,
 | |
|                   exclude_disabled=False):
 | |
|         """Return matching items by name or label.
 | |
| 
 | |
|         For argument docs, see the docstring for .get()
 | |
| 
 | |
|         """
 | |
|         if name is not None and not isstringlike(name):
 | |
|             raise TypeError("item name must be string-like")
 | |
|         if label is not None and not isstringlike(label):
 | |
|             raise TypeError("item label must be string-like")
 | |
|         if id is not None and not isstringlike(id):
 | |
|             raise TypeError("item id must be string-like")
 | |
|         items = []  # order is important
 | |
|         compat = self._form.backwards_compat
 | |
|         for o in self.items:
 | |
|             if exclude_disabled and o.disabled:
 | |
|                 continue
 | |
|             if name is not None and o.name != name:
 | |
|                 continue
 | |
|             if label is not None:
 | |
|                 for l in o.get_labels():
 | |
|                     if ((compat and l.text == label) or
 | |
|                         (not compat and l.text.find(label) > -1)):
 | |
|                         break
 | |
|                 else:
 | |
|                     continue
 | |
|             if id is not None and o.id != id:
 | |
|                 continue
 | |
|             items.append(o)
 | |
|         return items
 | |
| 
 | |
|     def get(self, name=None, label=None, id=None, nr=None,
 | |
|             exclude_disabled=False):
 | |
|         """Return item by name or label, disambiguating if necessary with nr.
 | |
| 
 | |
|         All arguments must be passed by name, with the exception of 'name',
 | |
|         which may be used as a positional argument.
 | |
| 
 | |
|         If name is specified, then the item must have the indicated name.
 | |
| 
 | |
|         If label is specified, then the item must have a label whose
 | |
|         whitespace-compressed, stripped, text substring-matches the indicated
 | |
|         label string (eg. label="please choose" will match
 | |
|         "  Do  please  choose an item ").
 | |
| 
 | |
|         If id is specified, then the item must have the indicated id.
 | |
| 
 | |
|         nr is an optional 0-based index of the items matching the query.
 | |
| 
 | |
|         If nr is the default None value and more than item is found, raises
 | |
|         AmbiguityError (unless the HTMLForm instance's backwards_compat
 | |
|         attribute is true).
 | |
| 
 | |
|         If no item is found, or if items are found but nr is specified and not
 | |
|         found, raises ItemNotFoundError.
 | |
| 
 | |
|         Optionally excludes disabled items.
 | |
| 
 | |
|         """
 | |
|         if nr is None and self._form.backwards_compat:
 | |
|             nr = 0  # :-/
 | |
|         items = self.get_items(name, label, id, exclude_disabled)
 | |
|         return disambiguate(items, nr, name=name, label=label, id=id)
 | |
| 
 | |
|     def _get(self, name, by_label=False, nr=None, exclude_disabled=False):
 | |
|         # strictly for use by deprecated methods
 | |
|         if by_label:
 | |
|             name, label = None, name
 | |
|         else:
 | |
|             name, label = name, None
 | |
|         return self.get(name, label, nr, exclude_disabled)
 | |
| 
 | |
|     def toggle(self, name, by_label=False, nr=None):
 | |
|         """Deprecated: given a name or label and optional disambiguating index
 | |
|         nr, toggle the matching item's selection.
 | |
| 
 | |
|         Selecting items follows the behavior described in the docstring of the
 | |
|         'get' method.
 | |
| 
 | |
|         if the item is disabled, or this control is disabled or readonly,
 | |
|         raise AttributeError.
 | |
| 
 | |
|         """
 | |
|         deprecation(
 | |
|             "item = control.get(...); item.selected = not item.selected")
 | |
|         o = self._get(name, by_label, nr)
 | |
|         self._set_selected_state(o, not o.selected)
 | |
| 
 | |
|     def set(self, selected, name, by_label=False, nr=None):
 | |
|         """Deprecated: given a name or label and optional disambiguating index
 | |
|         nr, set the matching item's selection to the bool value of selected.
 | |
| 
 | |
|         Selecting items follows the behavior described in the docstring of the
 | |
|         'get' method.
 | |
| 
 | |
|         if the item is disabled, or this control is disabled or readonly,
 | |
|         raise AttributeError.
 | |
| 
 | |
|         """
 | |
|         deprecation(
 | |
|             "control.get(...).selected = <boolean>")
 | |
|         self._set_selected_state(self._get(name, by_label, nr), selected)
 | |
| 
 | |
|     def _set_selected_state(self, item, action):
 | |
|         # action:
 | |
|         # bool False: off
 | |
|         # bool True: on
 | |
|         if self.disabled:
 | |
|             raise AttributeError("control '%s' is disabled" % self.name)
 | |
|         if self.readonly:
 | |
|             raise AttributeError("control '%s' is readonly" % self.name)
 | |
|         action = bool(action)
 | |
|         compat = self._form.backwards_compat
 | |
|         if not compat and item.disabled:
 | |
|             raise AttributeError("item is disabled")
 | |
|         else:
 | |
|             if compat and item.disabled and action:
 | |
|                 raise AttributeError("item is disabled")
 | |
|             if self.multiple:
 | |
|                 item.__dict__["_selected"] = action
 | |
|             else:
 | |
|                 if not action:
 | |
|                     item.__dict__["_selected"] = False
 | |
|                 else:
 | |
|                     for o in self.items:
 | |
|                         o.__dict__["_selected"] = False
 | |
|                     item.__dict__["_selected"] = True
 | |
| 
 | |
|     def toggle_single(self, by_label=None):
 | |
|         """Deprecated: toggle the selection of the single item in this control.
 | |
| 
 | |
|         Raises ItemCountError if the control does not contain only one item.
 | |
| 
 | |
|         by_label argument is ignored, and included only for backwards
 | |
|         compatibility.
 | |
| 
 | |
|         """
 | |
|         deprecation(
 | |
|             "control.items[0].selected = not control.items[0].selected")
 | |
|         if len(self.items) != 1:
 | |
|             raise ItemCountError(
 | |
|                 "'%s' is not a single-item control" % self.name)
 | |
|         item = self.items[0]
 | |
|         self._set_selected_state(item, not item.selected)
 | |
| 
 | |
|     def set_single(self, selected, by_label=None):
 | |
|         """Deprecated: set the selection of the single item in this control.
 | |
| 
 | |
|         Raises ItemCountError if the control does not contain only one item.
 | |
| 
 | |
|         by_label argument is ignored, and included only for backwards
 | |
|         compatibility.
 | |
| 
 | |
|         """
 | |
|         deprecation(
 | |
|             "control.items[0].selected = <boolean>")
 | |
|         if len(self.items) != 1:
 | |
|             raise ItemCountError(
 | |
|                 "'%s' is not a single-item control" % self.name)
 | |
|         self._set_selected_state(self.items[0], selected)
 | |
| 
 | |
|     def get_item_disabled(self, name, by_label=False, nr=None):
 | |
|         """Get disabled state of named list item in a ListControl."""
 | |
|         deprecation(
 | |
|             "control.get(...).disabled")
 | |
|         return self._get(name, by_label, nr).disabled
 | |
| 
 | |
|     def set_item_disabled(self, disabled, name, by_label=False, nr=None):
 | |
|         """Set disabled state of named list item in a ListControl.
 | |
| 
 | |
|         disabled: boolean disabled state
 | |
| 
 | |
|         """
 | |
|         deprecation(
 | |
|             "control.get(...).disabled = <boolean>")
 | |
|         self._get(name, by_label, nr).disabled = disabled
 | |
| 
 | |
|     def set_all_items_disabled(self, disabled):
 | |
|         """Set disabled state of all list items in a ListControl.
 | |
| 
 | |
|         disabled: boolean disabled state
 | |
| 
 | |
|         """
 | |
|         for o in self.items:
 | |
|             o.disabled = disabled
 | |
| 
 | |
|     def get_item_attrs(self, name, by_label=False, nr=None):
 | |
|         """Return dictionary of HTML attributes for a single ListControl item.
 | |
| 
 | |
|         The HTML element types that describe list items are: OPTION for SELECT
 | |
|         controls, INPUT for the rest.  These elements have HTML attributes that
 | |
|         you may occasionally want to know about -- for example, the "alt" HTML
 | |
|         attribute gives a text string describing the item (graphical browsers
 | |
|         usually display this as a tooltip).
 | |
| 
 | |
|         The returned dictionary maps HTML attribute names to values.  The names
 | |
|         and values are taken from the original HTML.
 | |
| 
 | |
|         """
 | |
|         deprecation(
 | |
|             "control.get(...).attrs")
 | |
|         return self._get(name, by_label, nr).attrs
 | |
| 
 | |
|     def close_control(self):
 | |
|         self._closed = True
 | |
| 
 | |
|     def add_to_form(self, form):
 | |
|         assert self._form is None or form == self._form, (
 | |
|             "can't add control to more than one form")
 | |
|         self._form = form
 | |
|         if self.name is None:
 | |
|             # always count nameless elements as separate controls
 | |
|             Control.add_to_form(self, form)
 | |
|         else:
 | |
|             for ii in xrange(len(form.controls)-1, -1, -1):
 | |
|                 control = form.controls[ii]
 | |
|                 if control.name == self.name and control.type == self.type:
 | |
|                     if control._closed:
 | |
|                         Control.add_to_form(self, form)
 | |
|                     else:
 | |
|                         control.merge_control(self)
 | |
|                     break
 | |
|             else:
 | |
|                 Control.add_to_form(self, form)
 | |
| 
 | |
|     def merge_control(self, control):
 | |
|         assert bool(control.multiple) == bool(self.multiple)
 | |
|         # usually, isinstance(control, self.__class__)
 | |
|         self.items.extend(control.items)
 | |
| 
 | |
|     def fixup(self):
 | |
|         """
 | |
|         ListControls are built up from component list items (which are also
 | |
|         ListControls) during parsing.  This method should be called after all
 | |
|         items have been added.  See ListControl.__doc__ for the reason this is
 | |
|         required.
 | |
| 
 | |
|         """
 | |
|         # Need to set default selection where no item was indicated as being
 | |
|         # selected by the HTML:
 | |
| 
 | |
|         # CHECKBOX:
 | |
|         #  Nothing should be selected.
 | |
|         # SELECT/single, SELECT/multiple and RADIO:
 | |
|         #  RFC 1866 (HTML 2.0): says first item should be selected.
 | |
|         #  W3C HTML 4.01 Specification: says that client behaviour is
 | |
|         #   undefined in this case.  For RADIO, exactly one must be selected,
 | |
|         #   though which one is undefined.
 | |
|         #  Both Netscape and Microsoft Internet Explorer (IE) choose first
 | |
|         #   item for SELECT/single.  However, both IE5 and Mozilla (both 1.0
 | |
|         #   and Firebird 0.6) leave all items unselected for RADIO and
 | |
|         #   SELECT/multiple.
 | |
| 
 | |
|         # Since both Netscape and IE all choose the first item for
 | |
|         # SELECT/single, we do the same.  OTOH, both Netscape and IE
 | |
|         # leave SELECT/multiple with nothing selected, in violation of RFC 1866
 | |
|         # (but not in violation of the W3C HTML 4 standard); the same is true
 | |
|         # of RADIO (which *is* in violation of the HTML 4 standard).  We follow
 | |
|         # RFC 1866 if the _select_default attribute is set, and Netscape and IE
 | |
|         # otherwise.  RFC 1866 and HTML 4 are always violated insofar as you
 | |
|         # can deselect all items in a RadioControl.
 | |
| 
 | |
|         for o in self.items: 
 | |
|             # set items' controls to self, now that we've merged
 | |
|             o.__dict__["_control"] = self
 | |
| 
 | |
|     def __getattr__(self, name):
 | |
|         if name == "value":
 | |
|             compat = self._form.backwards_compat
 | |
|             if self.name is None:
 | |
|                 return []
 | |
|             return [o.name for o in self.items if o.selected and
 | |
|                     (not o.disabled or compat)]
 | |
|         else:
 | |
|             raise AttributeError("%s instance has no attribute '%s'" %
 | |
|                                  (self.__class__.__name__, name))
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         if name == "value":
 | |
|             if self.disabled:
 | |
|                 raise AttributeError("control '%s' is disabled" % self.name)
 | |
|             if self.readonly:
 | |
|                 raise AttributeError("control '%s' is readonly" % self.name)
 | |
|             self._set_value(value)
 | |
|         elif name in ("name", "type", "multiple"):
 | |
|             raise AttributeError("%s attribute is readonly" % name)
 | |
|         else:
 | |
|             self.__dict__[name] = value
 | |
| 
 | |
|     def _set_value(self, value):
 | |
|         if value is None or isstringlike(value):
 | |
|             raise TypeError("ListControl, must set a sequence")
 | |
|         if not value:
 | |
|             compat = self._form.backwards_compat
 | |
|             for o in self.items:
 | |
|                 if not o.disabled or compat:
 | |
|                     o.selected = False
 | |
|         elif self.multiple:
 | |
|             self._multiple_set_value(value)
 | |
|         elif len(value) > 1:
 | |
|             raise ItemCountError(
 | |
|                 "single selection list, must set sequence of "
 | |
|                 "length 0 or 1")
 | |
|         else:
 | |
|             self._single_set_value(value)
 | |
| 
 | |
|     def _get_items(self, name, target=1):
 | |
|         all_items = self.get_items(name)
 | |
|         items = [o for o in all_items if not o.disabled]
 | |
|         if len(items) < target:
 | |
|             if len(all_items) < target:
 | |
|                 raise ItemNotFoundError(
 | |
|                     "insufficient items with name %r" % name)
 | |
|             else:
 | |
|                 raise AttributeError(
 | |
|                     "insufficient non-disabled items with name %s" % name)
 | |
|         on = []
 | |
|         off = []
 | |
|         for o in items:
 | |
|             if o.selected:
 | |
|                 on.append(o)
 | |
|             else:
 | |
|                 off.append(o)
 | |
|         return on, off
 | |
| 
 | |
|     def _single_set_value(self, value):
 | |
|         assert len(value) == 1
 | |
|         on, off = self._get_items(value[0])
 | |
|         assert len(on) <= 1
 | |
|         if not on:
 | |
|             off[0].selected = True
 | |
| 
 | |
|     def _multiple_set_value(self, value):
 | |
|         compat = self._form.backwards_compat
 | |
|         turn_on = []  # transactional-ish
 | |
|         turn_off = [item for item in self.items if
 | |
|                     item.selected and (not item.disabled or compat)]
 | |
|         names = {}
 | |
|         for nn in value:
 | |
|             if nn in names.keys():
 | |
|                 names[nn] += 1
 | |
|             else:
 | |
|                 names[nn] = 1
 | |
|         for name, count in names.items():
 | |
|             on, off = self._get_items(name, count)
 | |
|             for i in xrange(count):
 | |
|                 if on:
 | |
|                     item = on[0]
 | |
|                     del on[0]
 | |
|                     del turn_off[turn_off.index(item)]
 | |
|                 else:
 | |
|                     item = off[0]
 | |
|                     del off[0]
 | |
|                     turn_on.append(item)
 | |
|         for item in turn_off:
 | |
|             item.selected = False
 | |
|         for item in turn_on:
 | |
|             item.selected = True
 | |
| 
 | |
|     def set_value_by_label(self, value):
 | |
|         """Set the value of control by item labels.
 | |
| 
 | |
|         value is expected to be an iterable of strings that are substrings of
 | |
|         the item labels that should be selected.  Before substring matching is
 | |
|         performed, the original label text is whitespace-compressed
 | |
|         (consecutive whitespace characters are converted to a single space
 | |
|         character) and leading and trailing whitespace is stripped.  Ambiguous
 | |
|         labels are accepted without complaint if the form's backwards_compat is
 | |
|         True; otherwise, it will not complain as long as all ambiguous labels
 | |
|         share the same item name (e.g. OPTION value).
 | |
| 
 | |
|         """
 | |
|         if isstringlike(value):
 | |
|             raise TypeError(value)
 | |
|         if not self.multiple and len(value) > 1:
 | |
|             raise ItemCountError(
 | |
|                 "single selection list, must set sequence of "
 | |
|                 "length 0 or 1")
 | |
|         items = []
 | |
|         for nn in value:
 | |
|             found = self.get_items(label=nn)
 | |
|             if len(found) > 1:
 | |
|                 if not self._form.backwards_compat:
 | |
|                     # ambiguous labels are fine as long as item names (e.g.
 | |
|                     # OPTION values) are same
 | |
|                     opt_name = found[0].name
 | |
|                     if [o for o in found[1:] if o.name != opt_name]:
 | |
|                         raise AmbiguityError(nn)
 | |
|                 else:
 | |
|                     # OK, we'll guess :-(  Assume first available item.
 | |
|                     found = found[:1]
 | |
|             for o in found:
 | |
|                 # For the multiple-item case, we could try to be smarter,
 | |
|                 # saving them up and trying to resolve, but that's too much.
 | |
|                 if self._form.backwards_compat or o not in items:
 | |
|                     items.append(o)
 | |
|                     break
 | |
|             else:  # all of them are used
 | |
|                 raise ItemNotFoundError(nn)
 | |
|         # now we have all the items that should be on
 | |
|         # let's just turn everything off and then back on.
 | |
|         self.value = []
 | |
|         for o in items:
 | |
|             o.selected = True
 | |
| 
 | |
|     def get_value_by_label(self):
 | |
|         """Return the value of the control as given by normalized labels."""
 | |
|         res = []
 | |
|         compat = self._form.backwards_compat
 | |
|         for o in self.items:
 | |
|             if (not o.disabled or compat) and o.selected:
 | |
|                 for l in o.get_labels():
 | |
|                     if l.text:
 | |
|                         res.append(l.text)
 | |
|                         break
 | |
|                 else:
 | |
|                     res.append(None)
 | |
|         return res
 | |
| 
 | |
|     def possible_items(self, by_label=False):
 | |
|         """Deprecated: return the names or labels of all possible items.
 | |
| 
 | |
|         Includes disabled items, which may be misleading for some use cases.
 | |
| 
 | |
|         """
 | |
|         deprecation(
 | |
|             "[item.name for item in self.items]")
 | |
|         if by_label:
 | |
|             res = []
 | |
|             for o in self.items:
 | |
|                 for l in o.get_labels():
 | |
|                     if l.text:
 | |
|                         res.append(l.text)
 | |
|                         break
 | |
|                 else:
 | |
|                     res.append(None)
 | |
|             return res
 | |
|         return [o.name for o in self.items]
 | |
| 
 | |
|     def _totally_ordered_pairs(self):
 | |
|         if self.disabled or self.name is None:
 | |
|             return []
 | |
|         else:
 | |
|             return [(o._index, self.name, o.name) for o in self.items
 | |
|                     if o.selected and not o.disabled]
 | |
| 
 | |
|     def __str__(self):
 | |
|         name = self.name
 | |
|         if name is None: name = "<None>"
 | |
| 
 | |
|         display = [str(o) for o in self.items]
 | |
| 
 | |
|         infos = []
 | |
|         if self.disabled: infos.append("disabled")
 | |
|         if self.readonly: infos.append("readonly")
 | |
|         info = ", ".join(infos)
 | |
|         if info: info = " (%s)" % info
 | |
| 
 | |
|         return "<%s(%s=[%s])%s>" % (self.__class__.__name__,
 | |
|                                     name, ", ".join(display), info)
 | |
| 
 | |
| 
 | |
| class RadioControl(ListControl):
 | |
|     """
 | |
|     Covers:
 | |
| 
 | |
|     INPUT/RADIO
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, select_default=False, index=None):
 | |
|         attrs.setdefault("value", "on")
 | |
|         ListControl.__init__(self, type, name, attrs, select_default,
 | |
|                              called_as_base_class=True, index=index)
 | |
|         self.__dict__["multiple"] = False
 | |
|         o = Item(self, attrs, index)
 | |
|         o.__dict__["_selected"] = "checked" in attrs
 | |
| 
 | |
|     def fixup(self):
 | |
|         ListControl.fixup(self)
 | |
|         found = [o for o in self.items if o.selected and not o.disabled]
 | |
|         if not found:
 | |
|             if self._select_default:
 | |
|                 for o in self.items:
 | |
|                     if not o.disabled:
 | |
|                         o.selected = True
 | |
|                         break
 | |
|         else:
 | |
|             # Ensure only one item selected.  Choose the last one,
 | |
|             # following IE and Firefox.
 | |
|             for o in found[:-1]:
 | |
|                 o.selected = False
 | |
| 
 | |
|     def get_labels(self):
 | |
|         return []
 | |
| 
 | |
| class CheckboxControl(ListControl):
 | |
|     """
 | |
|     Covers:
 | |
| 
 | |
|     INPUT/CHECKBOX
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, select_default=False, index=None):
 | |
|         attrs.setdefault("value", "on")
 | |
|         ListControl.__init__(self, type, name, attrs, select_default,
 | |
|                              called_as_base_class=True, index=index)
 | |
|         self.__dict__["multiple"] = True
 | |
|         o = Item(self, attrs, index)
 | |
|         o.__dict__["_selected"] = "checked" in attrs
 | |
| 
 | |
|     def get_labels(self):
 | |
|         return []
 | |
| 
 | |
| 
 | |
| class SelectControl(ListControl):
 | |
|     """
 | |
|     Covers:
 | |
| 
 | |
|     SELECT (and OPTION)
 | |
| 
 | |
| 
 | |
|     OPTION 'values', in HTML parlance, are Item 'names' in ClientForm parlance.
 | |
| 
 | |
|     SELECT control values and labels are subject to some messy defaulting
 | |
|     rules.  For example, if the HTML representation of the control is:
 | |
| 
 | |
|     <SELECT name=year>
 | |
|       <OPTION value=0 label="2002">current year</OPTION>
 | |
|       <OPTION value=1>2001</OPTION>
 | |
|       <OPTION>2000</OPTION>
 | |
|     </SELECT>
 | |
| 
 | |
|     The items, in order, have labels "2002", "2001" and "2000", whereas their
 | |
|     names (the OPTION values) are "0", "1" and "2000" respectively.  Note that
 | |
|     the value of the last OPTION in this example defaults to its contents, as
 | |
|     specified by RFC 1866, as do the labels of the second and third OPTIONs.
 | |
| 
 | |
|     The OPTION labels are sometimes more meaningful than the OPTION values,
 | |
|     which can make for more maintainable code.
 | |
| 
 | |
|     Additional read-only public attribute: attrs
 | |
| 
 | |
|     The attrs attribute is a dictionary of the original HTML attributes of the
 | |
|     SELECT element.  Other ListControls do not have this attribute, because in
 | |
|     other cases the control as a whole does not correspond to any single HTML
 | |
|     element.  control.get(...).attrs may be used as usual to get at the HTML
 | |
|     attributes of the HTML elements corresponding to individual list items (for
 | |
|     SELECT controls, these are OPTION elements).
 | |
| 
 | |
|     Another special case is that the Item.attrs dictionaries have a special key
 | |
|     "contents" which does not correspond to any real HTML attribute, but rather
 | |
|     contains the contents of the OPTION element:
 | |
| 
 | |
|     <OPTION>this bit</OPTION>
 | |
| 
 | |
|     """
 | |
|     # HTML attributes here are treated slightly differently from other list
 | |
|     # controls:
 | |
|     # -The SELECT HTML attributes dictionary is stuffed into the OPTION
 | |
|     #  HTML attributes dictionary under the "__select" key.
 | |
|     # -The content of each OPTION element is stored under the special
 | |
|     #  "contents" key of the dictionary.
 | |
|     # After all this, the dictionary is passed to the SelectControl constructor
 | |
|     # as the attrs argument, as usual.  However:
 | |
|     # -The first SelectControl constructed when building up a SELECT control
 | |
|     #  has a constructor attrs argument containing only the __select key -- so
 | |
|     #  this SelectControl represents an empty SELECT control.
 | |
|     # -Subsequent SelectControls have both OPTION HTML-attribute in attrs and
 | |
|     #  the __select dictionary containing the SELECT HTML-attributes.
 | |
| 
 | |
|     def __init__(self, type, name, attrs, select_default=False, index=None):
 | |
|         # fish out the SELECT HTML attributes from the OPTION HTML attributes
 | |
|         # dictionary
 | |
|         self.attrs = attrs["__select"].copy()
 | |
|         self.__dict__["_label"] = _get_label(self.attrs)
 | |
|         self.__dict__["id"] = self.attrs.get("id")
 | |
|         self.__dict__["multiple"] = "multiple" in self.attrs
 | |
|         # the majority of the contents, label, and value dance already happened
 | |
|         contents = attrs.get("contents")
 | |
|         attrs = attrs.copy()
 | |
|         del attrs["__select"]
 | |
| 
 | |
|         ListControl.__init__(self, type, name, self.attrs, select_default,
 | |
|                              called_as_base_class=True, index=index)
 | |
|         self.disabled = "disabled" in self.attrs
 | |
|         self.readonly = "readonly" in self.attrs
 | |
|         if "value" in attrs:
 | |
|             # otherwise it is a marker 'select started' token
 | |
|             o = Item(self, attrs, index)
 | |
|             o.__dict__["_selected"] = "selected" in attrs
 | |
|             # add 'label' label and contents label, if different.  If both are
 | |
|             # provided, the 'label' label is used for display in HTML 
 | |
|             # 4.0-compliant browsers (and any lower spec? not sure) while the
 | |
|             # contents are used for display in older or less-compliant
 | |
|             # browsers.  We make label objects for both, if the values are
 | |
|             # different.
 | |
|             label = attrs.get("label")
 | |
|             if label:
 | |
|                 o._labels.append(Label({"__text": label}))
 | |
|                 if contents and contents != label:
 | |
|                     o._labels.append(Label({"__text": contents}))
 | |
|             elif contents:
 | |
|                 o._labels.append(Label({"__text": contents}))
 | |
| 
 | |
|     def fixup(self):
 | |
|         ListControl.fixup(self)
 | |
|         # Firefox doesn't exclude disabled items from those considered here
 | |
|         # (i.e. from 'found', for both branches of the if below).  Note that
 | |
|         # IE6 doesn't support the disabled attribute on OPTIONs at all.
 | |
|         found = [o for o in self.items if o.selected]
 | |
|         if not found:
 | |
|             if not self.multiple or self._select_default:
 | |
|                 for o in self.items:
 | |
|                     if not o.disabled:
 | |
|                         was_disabled = self.disabled
 | |
|                         self.disabled = False
 | |
|                         try:
 | |
|                             o.selected = True
 | |
|                         finally:
 | |
|                             o.disabled = was_disabled
 | |
|                         break
 | |
|         elif not self.multiple:
 | |
|             # Ensure only one item selected.  Choose the last one,
 | |
|             # following IE and Firefox.
 | |
|             for o in found[:-1]:
 | |
|                 o.selected = False
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| class SubmitControl(ScalarControl):
 | |
|     """
 | |
|     Covers:
 | |
| 
 | |
|     INPUT/SUBMIT
 | |
|     BUTTON/SUBMIT
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         ScalarControl.__init__(self, type, name, attrs, index)
 | |
|         # IE5 defaults SUBMIT value to "Submit Query"; Firebird 0.6 leaves it
 | |
|         # blank, Konqueror 3.1 defaults to "Submit".  HTML spec. doesn't seem
 | |
|         # to define this.
 | |
|         if self.value is None and not self.disabled and not self.readonly: self.value = ""
 | |
|         self.readonly = True
 | |
| 
 | |
|     def get_labels(self):
 | |
|         res = []
 | |
|         if self.value:
 | |
|             res.append(Label({"__text": self.value}))
 | |
|         res.extend(ScalarControl.get_labels(self))
 | |
|         return res
 | |
| 
 | |
|     def is_of_kind(self, kind): return kind == "clickable"
 | |
| 
 | |
|     def _click(self, form, coord, return_type, request_class=_urllib.request.Request):
 | |
|         self._clicked = coord
 | |
|         r = form._switch_click(return_type, request_class)
 | |
|         self._clicked = False
 | |
|         return r
 | |
| 
 | |
|     def _totally_ordered_pairs(self):
 | |
|         if not self._clicked:
 | |
|             return []
 | |
|         return ScalarControl._totally_ordered_pairs(self)
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| class ImageControl(SubmitControl):
 | |
|     """
 | |
|     Covers:
 | |
| 
 | |
|     INPUT/IMAGE
 | |
| 
 | |
|     Coordinates are specified using one of the HTMLForm.click* methods.
 | |
| 
 | |
|     """
 | |
|     def __init__(self, type, name, attrs, index=None):
 | |
|         SubmitControl.__init__(self, type, name, attrs, index)
 | |
|         self.readonly = False
 | |
| 
 | |
|     def _totally_ordered_pairs(self):
 | |
|         clicked = self._clicked
 | |
|         if self.disabled or not clicked:
 | |
|             return []
 | |
|         name = self.name
 | |
|         if name is None: return []
 | |
|         pairs = [
 | |
|             (self._index, "%s.x" % name, str(clicked[0])),
 | |
|             (self._index+1, "%s.y" % name, str(clicked[1])),
 | |
|             ]
 | |
|         value = self._value
 | |
|         if value:
 | |
|             pairs.append((self._index+2, name, value))
 | |
|         return pairs
 | |
| 
 | |
|     get_labels = ScalarControl.get_labels
 | |
| 
 | |
| # aliases, just to make str(control) and str(form) clearer
 | |
| class PasswordControl(TextControl): pass
 | |
| class HiddenControl(TextControl): pass
 | |
| class TextareaControl(TextControl): pass
 | |
| class SubmitButtonControl(SubmitControl): pass
 | |
| 
 | |
| 
 | |
| def is_listcontrol(control): return control.is_of_kind("list")
 | |
| 
 | |
| 
 | |
| class HTMLForm:
 | |
|     """Represents a single HTML <form> ... </form> element.
 | |
| 
 | |
|     A form consists of a sequence of controls that usually have names, and
 | |
|     which can take on various values.  The values of the various types of
 | |
|     controls represent variously: text, zero-or-one-of-many or many-of-many
 | |
|     choices, and files to be uploaded.  Some controls can be clicked on to
 | |
|     submit the form, and clickable controls' values sometimes include the
 | |
|     coordinates of the click.
 | |
| 
 | |
|     Forms can be filled in with data to be returned to the server, and then
 | |
|     submitted, using the click method to generate a request object suitable for
 | |
|     passing to urllib2.urlopen (or the click_request_data or click_pairs
 | |
|     methods if you're not using urllib2).
 | |
| 
 | |
|     import ClientForm
 | |
|     forms = ClientForm.ParseFile(html, base_uri)
 | |
|     form = forms[0]
 | |
| 
 | |
|     form["query"] = "Python"
 | |
|     form.find_control("nr_results").get("lots").selected = True
 | |
| 
 | |
|     response = urllib2.urlopen(form.click())
 | |
| 
 | |
|     Usually, HTMLForm instances are not created directly.  Instead, the
 | |
|     ParseFile or ParseResponse factory functions are used.  If you do construct
 | |
|     HTMLForm objects yourself, however, note that an HTMLForm instance is only
 | |
|     properly initialised after the fixup method has been called (ParseFile and
 | |
|     ParseResponse do this for you).  See ListControl.__doc__ for the reason
 | |
|     this is required.
 | |
| 
 | |
|     Indexing a form (form["control_name"]) returns the named Control's value
 | |
|     attribute.  Assignment to a form index (form["control_name"] = something)
 | |
|     is equivalent to assignment to the named Control's value attribute.  If you
 | |
|     need to be more specific than just supplying the control's name, use the
 | |
|     set_value and get_value methods.
 | |
| 
 | |
|     ListControl values are lists of item names (specifically, the names of the
 | |
|     items that are selected and not disabled, and hence are "successful" -- ie.
 | |
|     cause data to be returned to the server).  The list item's name is the
 | |
|     value of the corresponding HTML element's"value" attribute.
 | |
| 
 | |
|     Example:
 | |
| 
 | |
|       <INPUT type="CHECKBOX" name="cheeses" value="leicester"></INPUT>
 | |
|       <INPUT type="CHECKBOX" name="cheeses" value="cheddar"></INPUT>
 | |
| 
 | |
|     defines a CHECKBOX control with name "cheeses" which has two items, named
 | |
|     "leicester" and "cheddar".
 | |
| 
 | |
|     Another example:
 | |
| 
 | |
|       <SELECT name="more_cheeses">
 | |
|         <OPTION>1</OPTION>
 | |
|         <OPTION value="2" label="CHEDDAR">cheddar</OPTION>
 | |
|       </SELECT>
 | |
| 
 | |
|     defines a SELECT control with name "more_cheeses" which has two items,
 | |
|     named "1" and "2" (because the OPTION element's value HTML attribute
 | |
|     defaults to the element contents -- see SelectControl.__doc__ for more on
 | |
|     these defaulting rules).
 | |
| 
 | |
|     To select, deselect or otherwise manipulate individual list items, use the
 | |
|     HTMLForm.find_control() and ListControl.get() methods.  To set the whole
 | |
|     value, do as for any other control: use indexing or the set_/get_value
 | |
|     methods.
 | |
| 
 | |
|     Example:
 | |
| 
 | |
|     # select *only* the item named "cheddar"
 | |
|     form["cheeses"] = ["cheddar"]
 | |
|     # select "cheddar", leave other items unaffected
 | |
|     form.find_control("cheeses").get("cheddar").selected = True
 | |
| 
 | |
|     Some controls (RADIO and SELECT without the multiple attribute) can only
 | |
|     have zero or one items selected at a time.  Some controls (CHECKBOX and
 | |
|     SELECT with the multiple attribute) can have multiple items selected at a
 | |
|     time.  To set the whole value of a ListControl, assign a sequence to a form
 | |
|     index:
 | |
| 
 | |
|     form["cheeses"] = ["cheddar", "leicester"]
 | |
| 
 | |
|     If the ListControl is not multiple-selection, the assigned list must be of
 | |
|     length one.
 | |
| 
 | |
|     To check if a control has an item, if an item is selected, or if an item is
 | |
|     successful (selected and not disabled), respectively:
 | |
| 
 | |
|     "cheddar" in [item.name for item in form.find_control("cheeses").items]
 | |
|     "cheddar" in [item.name for item in form.find_control("cheeses").items and
 | |
|                   item.selected]
 | |
|     "cheddar" in form["cheeses"]  # (or "cheddar" in form.get_value("cheeses"))
 | |
| 
 | |
|     Note that some list items may be disabled (see below).
 | |
| 
 | |
|     Note the following mistake:
 | |
| 
 | |
|     form[control_name] = control_value
 | |
|     assert form[control_name] == control_value  # not necessarily true
 | |
| 
 | |
|     The reason for this is that form[control_name] always gives the list items
 | |
|     in the order they were listed in the HTML.
 | |
| 
 | |
|     List items (hence list values, too) can be referred to in terms of list
 | |
|     item labels rather than list item names using the appropriate label
 | |
|     arguments.  Note that each item may have several labels.
 | |
| 
 | |
|     The question of default values of OPTION contents, labels and values is
 | |
|     somewhat complicated: see SelectControl.__doc__ and
 | |
|     ListControl.get_item_attrs.__doc__ if you think you need to know.
 | |
| 
 | |
|     Controls can be disabled or readonly.  In either case, the control's value
 | |
|     cannot be changed until you clear those flags (see example below).
 | |
|     Disabled is the state typically represented by browsers by 'greying out' a
 | |
|     control.  Disabled controls are not 'successful' -- they don't cause data
 | |
|     to get returned to the server.  Readonly controls usually appear in
 | |
|     browsers as read-only text boxes.  Readonly controls are successful.  List
 | |
|     items can also be disabled.  Attempts to select or deselect disabled items
 | |
|     fail with AttributeError.
 | |
| 
 | |
|     If a lot of controls are readonly, it can be useful to do this:
 | |
| 
 | |
|     form.set_all_readonly(False)
 | |
| 
 | |
|     To clear a control's value attribute, so that it is not successful (until a
 | |
|     value is subsequently set):
 | |
| 
 | |
|     form.clear("cheeses")
 | |
| 
 | |
|     More examples:
 | |
| 
 | |
|     control = form.find_control("cheeses")
 | |
|     control.disabled = False
 | |
|     control.readonly = False
 | |
|     control.get("gruyere").disabled = True
 | |
|     control.items[0].selected = True
 | |
| 
 | |
|     See the various Control classes for further documentation.  Many methods
 | |
|     take name, type, kind, id, label and nr arguments to specify the control to
 | |
|     be operated on: see HTMLForm.find_control.__doc__.
 | |
| 
 | |
|     ControlNotFoundError (subclass of ValueError) is raised if the specified
 | |
|     control can't be found.  This includes occasions where a non-ListControl
 | |
|     is found, but the method (set, for example) requires a ListControl.
 | |
|     ItemNotFoundError (subclass of ValueError) is raised if a list item can't
 | |
|     be found.  ItemCountError (subclass of ValueError) is raised if an attempt
 | |
|     is made to select more than one item and the control doesn't allow that, or
 | |
|     set/get_single are called and the control contains more than one item.
 | |
|     AttributeError is raised if a control or item is readonly or disabled and
 | |
|     an attempt is made to alter its value.
 | |
| 
 | |
|     Security note: Remember that any passwords you store in HTMLForm instances
 | |
|     will be saved to disk in the clear if you pickle them (directly or
 | |
|     indirectly).  The simplest solution to this is to avoid pickling HTMLForm
 | |
|     objects.  You could also pickle before filling in any password, or just set
 | |
|     the password to "" before pickling.
 | |
| 
 | |
| 
 | |
|     Public attributes:
 | |
| 
 | |
|     action: full (absolute URI) form action
 | |
|     method: "GET" or "POST"
 | |
|     enctype: form transfer encoding MIME type
 | |
|     name: name of form (None if no name was specified)
 | |
|     attrs: dictionary mapping original HTML form attributes to their values
 | |
| 
 | |
|     controls: list of Control instances; do not alter this list
 | |
|      (instead, call form.new_control to make a Control and add it to the
 | |
|      form, or control.add_to_form if you already have a Control instance)
 | |
| 
 | |
| 
 | |
| 
 | |
|     Methods for form filling:
 | |
|     -------------------------
 | |
| 
 | |
|     Most of the these methods have very similar arguments.  See
 | |
|     HTMLForm.find_control.__doc__ for details of the name, type, kind, label
 | |
|     and nr arguments.
 | |
| 
 | |
|     def find_control(self,
 | |
|                      name=None, type=None, kind=None, id=None, predicate=None,
 | |
|                      nr=None, label=None)
 | |
| 
 | |
|     get_value(name=None, type=None, kind=None, id=None, nr=None,
 | |
|               by_label=False,  # by_label is deprecated
 | |
|               label=None)
 | |
|     set_value(value,
 | |
|               name=None, type=None, kind=None, id=None, nr=None,
 | |
|               by_label=False,  # by_label is deprecated
 | |
|               label=None)
 | |
| 
 | |
|     clear_all()
 | |
|     clear(name=None, type=None, kind=None, id=None, nr=None, label=None)
 | |
| 
 | |
|     set_all_readonly(readonly)
 | |
| 
 | |
| 
 | |
|     Method applying only to FileControls:
 | |
| 
 | |
|     add_file(file_object,
 | |
|              content_type="application/octet-stream", filename=None,
 | |
|              name=None, id=None, nr=None, label=None)
 | |
| 
 | |
| 
 | |
|     Methods applying only to clickable controls:
 | |
| 
 | |
|     click(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
 | |
|     click_request_data(name=None, type=None, id=None, nr=0, coord=(1,1),
 | |
|                        label=None)
 | |
|     click_pairs(name=None, type=None, id=None, nr=0, coord=(1,1), label=None)
 | |
| 
 | |
|     """
 | |
| 
 | |
|     type2class = {
 | |
|         "text": TextControl,
 | |
|         "password": PasswordControl,
 | |
|         "hidden": HiddenControl,
 | |
|         "textarea": TextareaControl,
 | |
| 
 | |
|         "isindex": IsindexControl,
 | |
| 
 | |
|         "file": FileControl,
 | |
| 
 | |
|         "button": IgnoreControl,
 | |
|         "buttonbutton": IgnoreControl,
 | |
|         "reset": IgnoreControl,
 | |
|         "resetbutton": IgnoreControl,
 | |
| 
 | |
|         "submit": SubmitControl,
 | |
|         "submitbutton": SubmitButtonControl,
 | |
|         "image": ImageControl,
 | |
| 
 | |
|         "radio": RadioControl,
 | |
|         "checkbox": CheckboxControl,
 | |
|         "select": SelectControl,
 | |
|         }
 | |
| 
 | |
| #---------------------------------------------------
 | |
| # Initialisation.  Use ParseResponse / ParseFile instead.
 | |
| 
 | |
|     def __init__(self, action, method="GET",
 | |
|                  enctype=None,
 | |
|                  name=None, attrs=None,
 | |
|                  request_class=_urllib.request.Request,
 | |
|                  forms=None, labels=None, id_to_labels=None,
 | |
|                  backwards_compat=True):
 | |
|         """
 | |
|         In the usual case, use ParseResponse (or ParseFile) to create new
 | |
|         HTMLForm objects.
 | |
| 
 | |
|         action: full (absolute URI) form action
 | |
|         method: "GET" or "POST"
 | |
|         enctype: form transfer encoding MIME type
 | |
|         name: name of form
 | |
|         attrs: dictionary mapping original HTML form attributes to their values
 | |
| 
 | |
|         """
 | |
|         self.action = action
 | |
|         self.method = method
 | |
|         self.enctype = enctype or "application/x-www-form-urlencoded"
 | |
|         self.name = name
 | |
|         if attrs is not None:
 | |
|             self.attrs = attrs.copy()
 | |
|         else:
 | |
|             self.attrs = {}
 | |
|         self.controls = []
 | |
|         self._request_class = request_class
 | |
| 
 | |
|         # these attributes are used by zope.testbrowser
 | |
|         self._forms = forms  # this is a semi-public API!
 | |
|         self._labels = labels  # this is a semi-public API!
 | |
|         self._id_to_labels = id_to_labels  # this is a semi-public API!
 | |
| 
 | |
|         self.backwards_compat = backwards_compat  # note __setattr__
 | |
| 
 | |
|         self._urlunparse = _urllib.parse.urlunparse
 | |
|         self._urlparse = _urllib.parse.urlparse
 | |
| 
 | |
|     def __getattr__(self, name):
 | |
|         if name == "backwards_compat":
 | |
|             return self._backwards_compat
 | |
|         return getattr(HTMLForm, name)
 | |
| 
 | |
|     def __setattr__(self, name, value):
 | |
|         # yuck
 | |
|         if name == "backwards_compat":
 | |
|             name = "_backwards_compat"
 | |
|             value = bool(value)
 | |
|             for cc in self.controls:
 | |
|                 try:
 | |
|                     items = cc.items 
 | |
|                 except AttributeError:
 | |
|                     continue
 | |
|                 else:
 | |
|                     for ii in items:
 | |
|                         for ll in ii.get_labels():
 | |
|                             ll._backwards_compat = value
 | |
|         self.__dict__[name] = value
 | |
| 
 | |
|     def new_control(self, type, name, attrs,
 | |
|                     ignore_unknown=False, select_default=False, index=None):
 | |
|         """Adds a new control to the form.
 | |
| 
 | |
|         This is usually called by ParseFile and ParseResponse.  Don't call it
 | |
|         youself unless you're building your own Control instances.
 | |
| 
 | |
|         Note that controls representing lists of items are built up from
 | |
|         controls holding only a single list item.  See ListControl.__doc__ for
 | |
|         further information.
 | |
| 
 | |
|         type: type of control (see Control.__doc__ for a list)
 | |
|         attrs: HTML attributes of control
 | |
|         ignore_unknown: if true, use a dummy Control instance for controls of
 | |
|          unknown type; otherwise, use a TextControl
 | |
|         select_default: for RADIO and multiple-selection SELECT controls, pick
 | |
|          the first item as the default if no 'selected' HTML attribute is
 | |
|          present (this defaulting happens when the HTMLForm.fixup method is
 | |
|          called)
 | |
|         index: index of corresponding element in HTML (see
 | |
|          MoreFormTests.test_interspersed_controls for motivation)
 | |
| 
 | |
|         """
 | |
|         type = type.lower()
 | |
|         klass = self.type2class.get(type)
 | |
|         if klass is None:
 | |
|             if ignore_unknown:
 | |
|                 klass = IgnoreControl
 | |
|             else:
 | |
|                 klass = TextControl
 | |
| 
 | |
|         a = attrs.copy()
 | |
|         if issubclass(klass, ListControl):
 | |
|             control = klass(type, name, a, select_default, index)
 | |
|         else:
 | |
|             control = klass(type, name, a, index)
 | |
| 
 | |
|         if type == "select" and len(attrs) == 1:
 | |
|             for ii in xrange(len(self.controls)-1, -1, -1):
 | |
|                 ctl = self.controls[ii]
 | |
|                 if ctl.type == "select":
 | |
|                     ctl.close_control()
 | |
|                     break
 | |
| 
 | |
|         control.add_to_form(self)
 | |
|         control._urlparse = self._urlparse
 | |
|         control._urlunparse = self._urlunparse
 | |
| 
 | |
|     def fixup(self):
 | |
|         """Normalise form after all controls have been added.
 | |
| 
 | |
|         This is usually called by ParseFile and ParseResponse.  Don't call it
 | |
|         youself unless you're building your own Control instances.
 | |
| 
 | |
|         This method should only be called once, after all controls have been
 | |
|         added to the form.
 | |
| 
 | |
|         """
 | |
|         for control in self.controls:
 | |
|             control.fixup()
 | |
|         self.backwards_compat = self._backwards_compat
 | |
| 
 | |
| #---------------------------------------------------
 | |
|     def __str__(self):
 | |
|         header = "%s%s %s %s" % (
 | |
|             (self.name and self.name+" " or ""),
 | |
|             self.method, self.action, self.enctype)
 | |
|         rep = [header]
 | |
|         for control in self.controls:
 | |
|             rep.append("  %s" % str(control))
 | |
|         return "<%s>" % "\n".join(rep)
 | |
| 
 | |
| #---------------------------------------------------
 | |
| # Form-filling methods.
 | |
| 
 | |
|     def __getitem__(self, name):
 | |
|         return self.find_control(name).value
 | |
|     def __contains__(self, name):
 | |
|         return bool(self.find_control(name))
 | |
|     def __setitem__(self, name, value):
 | |
|         control = self.find_control(name)
 | |
|         try:
 | |
|             control.value = value
 | |
|         except AttributeError as e:
 | |
|             raise ValueError(str(e))
 | |
| 
 | |
|     def get_value(self,
 | |
|                   name=None, type=None, kind=None, id=None, nr=None,
 | |
|                   by_label=False,  # by_label is deprecated
 | |
|                   label=None):
 | |
|         """Return value of control.
 | |
| 
 | |
|         If only name and value arguments are supplied, equivalent to
 | |
| 
 | |
|         form[name]
 | |
| 
 | |
|         """
 | |
|         if by_label:
 | |
|             deprecation("form.get_value_by_label(...)")
 | |
|         c = self.find_control(name, type, kind, id, label=label, nr=nr)
 | |
|         if by_label:
 | |
|             try:
 | |
|                 meth = c.get_value_by_label
 | |
|             except AttributeError:
 | |
|                 raise NotImplementedError(
 | |
|                     "control '%s' does not yet support by_label" % c.name)
 | |
|             else:
 | |
|                 return meth()
 | |
|         else:
 | |
|             return c.value
 | |
|     def set_value(self, value,
 | |
|                   name=None, type=None, kind=None, id=None, nr=None,
 | |
|                   by_label=False,  # by_label is deprecated
 | |
|                   label=None):
 | |
|         """Set value of control.
 | |
| 
 | |
|         If only name and value arguments are supplied, equivalent to
 | |
| 
 | |
|         form[name] = value
 | |
| 
 | |
|         """
 | |
|         if by_label:
 | |
|             deprecation("form.get_value_by_label(...)")
 | |
|         c = self.find_control(name, type, kind, id, label=label, nr=nr)
 | |
|         if by_label:
 | |
|             try:
 | |
|                 meth = c.set_value_by_label
 | |
|             except AttributeError:
 | |
|                 raise NotImplementedError(
 | |
|                     "control '%s' does not yet support by_label" % c.name)
 | |
|             else:
 | |
|                 meth(value)
 | |
|         else:
 | |
|             c.value = value
 | |
|     def get_value_by_label(
 | |
|         self, name=None, type=None, kind=None, id=None, label=None, nr=None):
 | |
|         """
 | |
| 
 | |
|         All arguments should be passed by name.
 | |
| 
 | |
|         """
 | |
|         c = self.find_control(name, type, kind, id, label=label, nr=nr)
 | |
|         return c.get_value_by_label()
 | |
| 
 | |
|     def set_value_by_label(
 | |
|         self, value,
 | |
|         name=None, type=None, kind=None, id=None, label=None, nr=None):
 | |
|         """
 | |
| 
 | |
|         All arguments should be passed by name.
 | |
| 
 | |
|         """
 | |
|         c = self.find_control(name, type, kind, id, label=label, nr=nr)
 | |
|         c.set_value_by_label(value)
 | |
| 
 | |
|     def set_all_readonly(self, readonly):
 | |
|         for control in self.controls:
 | |
|             control.readonly = bool(readonly)
 | |
| 
 | |
|     def clear_all(self):
 | |
|         """Clear the value attributes of all controls in the form.
 | |
| 
 | |
|         See HTMLForm.clear.__doc__.
 | |
| 
 | |
|         """
 | |
|         for control in self.controls:
 | |
|             control.clear()
 | |
| 
 | |
|     def clear(self,
 | |
|               name=None, type=None, kind=None, id=None, nr=None, label=None):
 | |
|         """Clear the value attribute of a control.
 | |
| 
 | |
|         As a result, the affected control will not be successful until a value
 | |
|         is subsequently set.  AttributeError is raised on readonly controls.
 | |
| 
 | |
|         """
 | |
|         c = self.find_control(name, type, kind, id, label=label, nr=nr)
 | |
|         c.clear()
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------
 | |
| # Form-filling methods applying only to ListControls.
 | |
| 
 | |
|     def possible_items(self,  # deprecated
 | |
|                        name=None, type=None, kind=None, id=None,
 | |
|                        nr=None, by_label=False, label=None):
 | |
|         """Return a list of all values that the specified control can take."""
 | |
|         c = self._find_list_control(name, type, kind, id, label, nr)
 | |
|         return c.possible_items(by_label)
 | |
| 
 | |
|     def set(self, selected, item_name,  # deprecated
 | |
|             name=None, type=None, kind=None, id=None, nr=None,
 | |
|             by_label=False, label=None):
 | |
|         """Select / deselect named list item.
 | |
| 
 | |
|         selected: boolean selected state
 | |
| 
 | |
|         """
 | |
|         self._find_list_control(name, type, kind, id, label, nr).set(
 | |
|             selected, item_name, by_label)
 | |
|     def toggle(self, item_name,  # deprecated
 | |
|                name=None, type=None, kind=None, id=None, nr=None,
 | |
|                by_label=False, label=None):
 | |
|         """Toggle selected state of named list item."""
 | |
|         self._find_list_control(name, type, kind, id, label, nr).toggle(
 | |
|             item_name, by_label)
 | |
| 
 | |
|     def set_single(self, selected,  # deprecated
 | |
|                    name=None, type=None, kind=None, id=None,
 | |
|                    nr=None, by_label=None, label=None):
 | |
|         """Select / deselect list item in a control having only one item.
 | |
| 
 | |
|         If the control has multiple list items, ItemCountError is raised.
 | |
| 
 | |
|         This is just a convenience method, so you don't need to know the item's
 | |
|         name -- the item name in these single-item controls is usually
 | |
|         something meaningless like "1" or "on".
 | |
| 
 | |
|         For example, if a checkbox has a single item named "on", the following
 | |
|         two calls are equivalent:
 | |
| 
 | |
|         control.toggle("on")
 | |
|         control.toggle_single()
 | |
| 
 | |
|         """  # by_label ignored and deprecated
 | |
|         self._find_list_control(
 | |
|             name, type, kind, id, label, nr).set_single(selected)
 | |
|     def toggle_single(self, name=None, type=None, kind=None, id=None,
 | |
|                       nr=None, by_label=None, label=None):  # deprecated
 | |
|         """Toggle selected state of list item in control having only one item.
 | |
| 
 | |
|         The rest is as for HTMLForm.set_single.__doc__.
 | |
| 
 | |
|         """  # by_label ignored and deprecated
 | |
|         self._find_list_control(name, type, kind, id, label, nr).toggle_single()
 | |
| 
 | |
| #---------------------------------------------------
 | |
| # Form-filling method applying only to FileControls.
 | |
| 
 | |
|     def add_file(self, file_object, content_type=None, filename=None,
 | |
|                  name=None, id=None, nr=None, label=None):
 | |
|         """Add a file to be uploaded.
 | |
| 
 | |
|         file_object: file-like object (with read method) from which to read
 | |
|          data to upload
 | |
|         content_type: MIME content type of data to upload
 | |
|         filename: filename to pass to server
 | |
| 
 | |
|         If filename is None, no filename is sent to the server.
 | |
| 
 | |
|         If content_type is None, the content type is guessed based on the
 | |
|         filename and the data from read from the file object.
 | |
| 
 | |
|         XXX
 | |
|         At the moment, guessed content type is always application/octet-stream.
 | |
|         Use sndhdr, imghdr modules.  Should also try to guess HTML, XML, and
 | |
|         plain text.
 | |
| 
 | |
|         Note the following useful HTML attributes of file upload controls (see
 | |
|         HTML 4.01 spec, section 17):
 | |
| 
 | |
|         accept: comma-separated list of content types that the server will
 | |
|          handle correctly; you can use this to filter out non-conforming files
 | |
|         size: XXX IIRC, this is indicative of whether form wants multiple or
 | |
|          single files
 | |
|         maxlength: XXX hint of max content length in bytes?
 | |
| 
 | |
|         """
 | |
|         self.find_control(name, "file", id=id, label=label, nr=nr).add_file(
 | |
|             file_object, content_type, filename)
 | |
| 
 | |
| #---------------------------------------------------
 | |
| # Form submission methods, applying only to clickable controls.
 | |
| 
 | |
|     def click(self, name=None, type=None, id=None, nr=0, coord=(1,1),
 | |
|               request_class=_urllib.request.Request,
 | |
|               label=None):
 | |
|         """Return request that would result from clicking on a control.
 | |
| 
 | |
|         The request object is a _urllib.request.Request instance, which you can pass to
 | |
|         urllib2.urlopen (or ClientCookie.urlopen).
 | |
| 
 | |
|         Only some control types (INPUT/SUBMIT & BUTTON/SUBMIT buttons and
 | |
|         IMAGEs) can be clicked.
 | |
| 
 | |
|         Will click on the first clickable control, subject to the name, type
 | |
|         and nr arguments (as for find_control).  If no name, type, id or number
 | |
|         is specified and there are no clickable controls, a request will be
 | |
|         returned for the form in its current, un-clicked, state.
 | |
| 
 | |
|         IndexError is raised if any of name, type, id or nr is specified but no
 | |
|         matching control is found.  ValueError is raised if the HTMLForm has an
 | |
|         enctype attribute that is not recognised.
 | |
| 
 | |
|         You can optionally specify a coordinate to click at, which only makes a
 | |
|         difference if you clicked on an image.
 | |
| 
 | |
|         """
 | |
|         return self._click(name, type, id, label, nr, coord, "request",
 | |
|                            self._request_class)
 | |
| 
 | |
|     def click_request_data(self,
 | |
|                            name=None, type=None, id=None,
 | |
|                            nr=0, coord=(1,1),
 | |
|                            request_class=_urllib.request.Request,
 | |
|                            label=None):
 | |
|         """As for click method, but return a tuple (url, data, headers).
 | |
| 
 | |
|         You can use this data to send a request to the server.  This is useful
 | |
|         if you're using httplib or urllib rather than urllib2.  Otherwise, use
 | |
|         the click method.
 | |
| 
 | |
|         # Untested.  Have to subclass to add headers, I think -- so use urllib2
 | |
|         # instead!
 | |
|         import urllib
 | |
|         url, data, hdrs = form.click_request_data()
 | |
|         r = _urllib.request.urlopen(url, data)
 | |
| 
 | |
|         # Untested.  I don't know of any reason to use httplib -- you can get
 | |
|         # just as much control with urllib2.
 | |
|         import httplib, urlparse
 | |
|         url, data, hdrs = form.click_request_data()
 | |
|         tup = urlparse(url)
 | |
|         host, path = tup[1], _urllib.parse.urlunparse((None, None)+tup[2:])
 | |
|         conn = httplib.HTTPConnection(host)
 | |
|         if data:
 | |
|             httplib.request("POST", path, data, hdrs)
 | |
|         else:
 | |
|             httplib.request("GET", path, headers=hdrs)
 | |
|         r = conn.getresponse()
 | |
| 
 | |
|         """
 | |
|         return self._click(name, type, id, label, nr, coord, "request_data",
 | |
|                            self._request_class)
 | |
| 
 | |
|     def click_pairs(self, name=None, type=None, id=None,
 | |
|                     nr=0, coord=(1,1),
 | |
|                     label=None):
 | |
|         """As for click_request_data, but returns a list of (key, value) pairs.
 | |
| 
 | |
|         You can use this list as an argument to ClientForm.urlencode.  This is
 | |
|         usually only useful if you're using httplib or urllib rather than
 | |
|         urllib2 or ClientCookie.  It may also be useful if you want to manually
 | |
|         tweak the keys and/or values, but this should not be necessary.
 | |
|         Otherwise, use the click method.
 | |
| 
 | |
|         Note that this method is only useful for forms of MIME type
 | |
|         x-www-form-urlencoded.  In particular, it does not return the
 | |
|         information required for file upload.  If you need file upload and are
 | |
|         not using urllib2, use click_request_data.
 | |
| 
 | |
|         Also note that Python 2.0's urllib.urlencode is slightly broken: it
 | |
|         only accepts a mapping, not a sequence of pairs, as an argument.  This
 | |
|         messes up any ordering in the argument.  Use ClientForm.urlencode
 | |
|         instead.
 | |
| 
 | |
|         """
 | |
|         return self._click(name, type, id, label, nr, coord, "pairs",
 | |
|                            self._request_class)
 | |
| 
 | |
| #---------------------------------------------------
 | |
| 
 | |
|     def find_control(self,
 | |
|                      name=None, type=None, kind=None, id=None,
 | |
|                      predicate=None, nr=None,
 | |
|                      label=None):
 | |
|         """Locate and return some specific control within the form.
 | |
| 
 | |
|         At least one of the name, type, kind, predicate and nr arguments must
 | |
|         be supplied.  If no matching control is found, ControlNotFoundError is
 | |
|         raised.
 | |
| 
 | |
|         If name is specified, then the control must have the indicated name.
 | |
| 
 | |
|         If type is specified then the control must have the specified type (in
 | |
|         addition to the types possible for <input> HTML tags: "text",
 | |
|         "password", "hidden", "submit", "image", "button", "radio", "checkbox",
 | |
|         "file" we also have "reset", "buttonbutton", "submitbutton",
 | |
|         "resetbutton", "textarea", "select" and "isindex").
 | |
| 
 | |
|         If kind is specified, then the control must fall into the specified
 | |
|         group, each of which satisfies a particular interface.  The types are
 | |
|         "text", "list", "multilist", "singlelist", "clickable" and "file".
 | |
| 
 | |
|         If id is specified, then the control must have the indicated id.
 | |
| 
 | |
|         If predicate is specified, then the control must match that function.
 | |
|         The predicate function is passed the control as its single argument,
 | |
|         and should return a boolean value indicating whether the control
 | |
|         matched.
 | |
| 
 | |
|         nr, if supplied, is the sequence number of the control (where 0 is the
 | |
|         first).  Note that control 0 is the first control matching all the
 | |
|         other arguments (if supplied); it is not necessarily the first control
 | |
|         in the form.  If no nr is supplied, AmbiguityError is raised if
 | |
|         multiple controls match the other arguments (unless the
 | |
|         .backwards-compat attribute is true).
 | |
| 
 | |
|         If label is specified, then the control must have this label.  Note
 | |
|         that radio controls and checkboxes never have labels: their items do.
 | |
| 
 | |
|         """
 | |
|         if ((name is None) and (type is None) and (kind is None) and
 | |
|             (id is None) and (label is None) and (predicate is None) and
 | |
|             (nr is None)):
 | |
|             raise ValueError(
 | |
|                 "at least one argument must be supplied to specify control")
 | |
|         return self._find_control(name, type, kind, id, label, predicate, nr)
 | |
| 
 | |
| #---------------------------------------------------
 | |
| # Private methods.
 | |
| 
 | |
|     def _find_list_control(self,
 | |
|                            name=None, type=None, kind=None, id=None, 
 | |
|                            label=None, nr=None):
 | |
|         if ((name is None) and (type is None) and (kind is None) and
 | |
|             (id is None) and (label is None) and (nr is None)):
 | |
|             raise ValueError(
 | |
|                 "at least one argument must be supplied to specify control")
 | |
| 
 | |
|         return self._find_control(name, type, kind, id, label, 
 | |
|                                   is_listcontrol, nr)
 | |
| 
 | |
|     def _find_control(self, name, type, kind, id, label, predicate, nr):
 | |
|         if ((name is not None) and (name is not Missing) and
 | |
|             not isstringlike(name)):
 | |
|             raise TypeError("control name must be string-like")
 | |
|         if (type is not None) and not isstringlike(type):
 | |
|             raise TypeError("control type must be string-like")
 | |
|         if (kind is not None) and not isstringlike(kind):
 | |
|             raise TypeError("control kind must be string-like")
 | |
|         if (id is not None) and not isstringlike(id):
 | |
|             raise TypeError("control id must be string-like")
 | |
|         if (label is not None) and not isstringlike(label):
 | |
|             raise TypeError("control label must be string-like")
 | |
|         if (predicate is not None) and not callable(predicate):
 | |
|             raise TypeError("control predicate must be callable")
 | |
|         if (nr is not None) and nr < 0:
 | |
|             raise ValueError("control number must be a positive integer")
 | |
| 
 | |
|         orig_nr = nr
 | |
|         found = None
 | |
|         ambiguous = False
 | |
|         if nr is None and self.backwards_compat:
 | |
|             nr = 0
 | |
| 
 | |
|         for control in self.controls:
 | |
|             if ((name is not None and name != control.name) and
 | |
|                 (name is not Missing or control.name is not None)):
 | |
|                 continue
 | |
|             if type is not None and type != control.type:
 | |
|                 continue
 | |
|             if kind is not None and not control.is_of_kind(kind):
 | |
|                 continue
 | |
|             if id is not None and id != control.id:
 | |
|                 continue
 | |
|             if predicate and not predicate(control):
 | |
|                 continue
 | |
|             if label:
 | |
|                 for l in control.get_labels():
 | |
|                     if l.text.find(label) > -1:
 | |
|                         break
 | |
|                 else:
 | |
|                     continue
 | |
|             if nr is not None:
 | |
|                 if nr == 0:
 | |
|                     return control  # early exit: unambiguous due to nr
 | |
|                 nr -= 1
 | |
|                 continue
 | |
|             if found:
 | |
|                 ambiguous = True
 | |
|                 break
 | |
|             found = control
 | |
| 
 | |
|         if found and not ambiguous:
 | |
|             return found
 | |
| 
 | |
|         description = []
 | |
|         if name is not None: description.append("name %s" % repr(name))
 | |
|         if type is not None: description.append("type '%s'" % type)
 | |
|         if kind is not None: description.append("kind '%s'" % kind)
 | |
|         if id is not None: description.append("id '%s'" % id)
 | |
|         if label is not None: description.append("label '%s'" % label)
 | |
|         if predicate is not None:
 | |
|             description.append("predicate %s" % predicate)
 | |
|         if orig_nr: description.append("nr %d" % orig_nr)
 | |
|         description = ", ".join(description)
 | |
| 
 | |
|         if ambiguous:
 | |
|             raise AmbiguityError("more than one control matching "+description)
 | |
|         elif not found:
 | |
|             raise ControlNotFoundError("no control matching "+description)
 | |
|         assert False
 | |
| 
 | |
|     def _click(self, name, type, id, label, nr, coord, return_type,
 | |
|                request_class=_urllib.request.Request):
 | |
|         try:
 | |
|             control = self._find_control(
 | |
|                 name, type, "clickable", id, label, None, nr)
 | |
|         except ControlNotFoundError:
 | |
|             if ((name is not None) or (type is not None) or (id is not None) or
 | |
|                 (nr != 0)):
 | |
|                 raise
 | |
|             # no clickable controls, but no control was explicitly requested,
 | |
|             # so return state without clicking any control
 | |
|             return self._switch_click(return_type, request_class)
 | |
|         else:
 | |
|             return control._click(self, coord, return_type, request_class)
 | |
| 
 | |
|     def _pairs(self):
 | |
|         """Return sequence of (key, value) pairs suitable for urlencoding."""
 | |
|         return [(k, v) for (i, k, v, c_i) in self._pairs_and_controls()]
 | |
| 
 | |
| 
 | |
|     def _pairs_and_controls(self):
 | |
|         """Return sequence of (index, key, value, control_index)
 | |
|         of totally ordered pairs suitable for urlencoding.
 | |
| 
 | |
|         control_index is the index of the control in self.controls
 | |
|         """
 | |
|         pairs = []
 | |
|         for control_index in xrange(len(self.controls)):
 | |
|             control = self.controls[control_index]
 | |
|             for ii, key, val in control._totally_ordered_pairs():
 | |
|                 pairs.append((ii, key, val, control_index))
 | |
| 
 | |
|         # stable sort by ONLY first item in tuple
 | |
|         pairs.sort()
 | |
| 
 | |
|         return pairs
 | |
| 
 | |
|     def _request_data(self):
 | |
|         """Return a tuple (url, data, headers)."""
 | |
|         method = self.method.upper()
 | |
|         #scheme, netloc, path, parameters, query, frag = _urllib.parse.urlparse(self.action)
 | |
|         parts = self._urlparse(self.action)
 | |
|         rest, (query, frag) = parts[:-2], parts[-2:]
 | |
| 
 | |
|         if method == "GET":
 | |
|             self.enctype = "application/x-www-form-urlencoded"  # force it
 | |
|             parts = rest + (urlencode(self._pairs()), None)
 | |
|             uri = self._urlunparse(parts)
 | |
|             return uri, None, []
 | |
|         elif method == "POST":
 | |
|             parts = rest + (query, None)
 | |
|             uri = self._urlunparse(parts)
 | |
|             if self.enctype == "application/x-www-form-urlencoded":
 | |
|                 return (uri, urlencode(self._pairs()),
 | |
|                         [("Content-Type", self.enctype)])
 | |
|             elif self.enctype == "text/plain":
 | |
|                 return (uri, self._pairs(),
 | |
|                         [("Content-Type", self.enctype)])
 | |
|             elif self.enctype == "multipart/form-data":
 | |
|                 data = _cStringIO()
 | |
|                 http_hdrs = []
 | |
|                 mw = MimeWriter(data, http_hdrs)
 | |
|                 f = mw.startmultipartbody("form-data", add_to_http_hdrs=True,
 | |
|                                           prefix=0)
 | |
|                 for ii, k, v, control_index in self._pairs_and_controls():
 | |
|                     self.controls[control_index]._write_mime_data(mw, k, v)
 | |
|                 mw.lastpart()
 | |
|                 return uri, data.getvalue(), http_hdrs
 | |
|             else:
 | |
|                 raise ValueError(
 | |
|                     "unknown POST form encoding type '%s'" % self.enctype)
 | |
|         else:
 | |
|             raise ValueError("Unknown method '%s'" % method)
 | |
| 
 | |
|     def _switch_click(self, return_type, request_class=_urllib.request.Request):
 | |
|         # This is called by HTMLForm and clickable Controls to hide switching
 | |
|         # on return_type.
 | |
|         if return_type == "pairs":
 | |
|             return self._pairs()
 | |
|         elif return_type == "request_data":
 | |
|             return self._request_data()
 | |
|         else:
 | |
|             req_data = self._request_data()
 | |
| 
 | |
|             req = request_class(req_data[0], req_data[1])
 | |
|             for key, val in req_data[2]:
 | |
|                 add_hdr = req.add_header
 | |
|                 if key.lower() == "content-type":
 | |
|                     try:
 | |
|                         add_hdr = req.add_unredirected_header
 | |
|                     except AttributeError:
 | |
|                         # pre-2.4 and not using ClientCookie
 | |
|                         pass
 | |
|                 add_hdr(key, val)
 | |
|             return req
 |