Patch for an Issue #801

This commit is contained in:
Miroslav Stampar 2014-08-28 00:00:16 +02:00
parent fd36250026
commit fce671c899
2 changed files with 3 additions and 0 deletions

View File

@ -3337,6 +3337,8 @@ def findPageForms(content, url, raise_=False, addToTargets=False):
try: try:
forms = ParseResponse(response, backwards_compat=False) forms = ParseResponse(response, backwards_compat=False)
except UnicodeError:
pass
except ParseError: except ParseError:
warnMsg = "badly formed HTML at the given URL ('%s'). Going to filter it" % url warnMsg = "badly formed HTML at the given URL ('%s'). Going to filter it" % url
logger.warning(warnMsg) logger.warning(warnMsg)

View File

@ -1124,6 +1124,7 @@ def _ParseFileEx(file, base_uri,
if action is None: if action is None:
action = base_uri action = base_uri
else: else:
action = unicode(action, "utf8") if action and not isinstance(action, unicode) else action
action = _urljoin(base_uri, action) action = _urljoin(base_uri, action)
# would be nice to make HTMLForm class (form builder) pluggable # would be nice to make HTMLForm class (form builder) pluggable
form = HTMLForm( form = HTMLForm(