From 3e669f369f886dff8fa8272f3bfa37be6360a0ba Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Nov 2012 00:02:55 +0100 Subject: [PATCH] Py3 compat for unichr and htmlentitydefs --- youtube_dl/utils.py | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index ac7e161af..668338270 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import gzip -import htmlentitydefs import HTMLParser import locale import os @@ -17,19 +16,6 @@ try: except ImportError: import StringIO -std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', - 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', - 'Accept-Encoding': 'gzip, deflate', - 'Accept-Language': 'en-us,en;q=0.5', -} - -try: - compat_str = unicode # Python 2 -except NameError: - compat_str = str - try: import urllib.request as compat_urllib_request except ImportError: # Python 2 @@ -50,6 +36,29 @@ try: except ImportError: # Python 2 import cookielib as compat_cookiejar +try: + import html.entities as compat_html_entities +except NameError: # Python 2 + import htmlentitydefs as compat_html_entities + +try: + compat_str = unicode # Python 2 +except NameError: + compat_str = str + +try: + compat_chr = unichr # Python 2 +except NameError: + compat_chr = chr + + +std_headers = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20100101 Firefox/10.0', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Encoding': 'gzip, deflate', + 'Accept-Language': 'en-us,en;q=0.5', +} def preferredencoding(): """Get preferred encoding. @@ -74,8 +83,8 @@ def htmlentity_transform(matchobj): entity = matchobj.group(1) # Known non-numeric HTML entity - if entity in htmlentitydefs.name2codepoint: - return unichr(htmlentitydefs.name2codepoint[entity]) + if entity in compat_html_entities.name2codepoint: + return compat_chr(compat_html_entities.name2codepoint[entity]) mobj = re.match(u'(?u)#(x?\\d+)', entity) if mobj is not None: @@ -85,7 +94,7 @@ def htmlentity_transform(matchobj): numstr = u'0%s' % numstr else: base = 10 - return unichr(int(numstr, base)) + return compat_chr(int(numstr, base)) # Unknown entity in name, return its literal representation return (u'&%s;' % entity)