From d90b3854ca9e8602f440cc9439e1cba240192286 Mon Sep 17 00:00:00 2001
From: PeterDing <dfhayst@gmail.com>
Date: Thu, 28 May 2015 00:37:00 +0800
Subject: [PATCH 1/8] [porn91] Add new extractor for 91porn.com

---
 youtube_dl/extractor/__init__.py |  1 +
 youtube_dl/extractor/porn91.py   | 62 ++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 youtube_dl/extractor/porn91.py
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 80c9cb107..d20ad286d 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -400,6 +400,7 @@ from .playfm import PlayFMIE
 from .playvid import PlayvidIE
 from .playwire import PlaywireIE
 from .podomatic import PodomaticIE
+from .porn91 import Porn91IE
 from .pornhd import PornHdIE
 from .pornhub import (
     PornHubIE,
diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
new file mode 100644
index 000000000..af06af2b7
--- /dev/null
+++ b/youtube_dl/extractor/porn91.py
@@ -0,0 +1,62 @@
+# encoding: utf-8
+from __future__ import unicode_literals
+
+import re
+import json
+
+from ..compat import compat_urllib_parse
+from .common import InfoExtractor
+from ..utils import ExtractorError
+
+
+class Porn91IE(InfoExtractor):
+    IE_NAME = '91porn'
+    _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)'
+
+    _TEST = {
+            'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
+            'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
+            'info_dict': {
+                'id': '7e42283b4f5ab36da134',
+                'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
+                'ext': 'mp4'
+            }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+        url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
+        self._set_cookie('91porn.com', 'language', 'cn_CN')
+        webpage = self._download_webpage(url, video_id, "get HTML content")
+        title = re.search(
+            r'<div id="viewvideo-title">(.+?)</div>',
+            webpage,
+            re.DOTALL)
+        assert title
+        title = title.group(1).replace('\n', '')
+
+        # get real url
+        n1 = re.search(r'so.addVariable\(\'file\',\'(\d+)\'', webpage)
+        n2 = re.search(r'so.addVariable\(\'seccode\',\'(.+?)\'', webpage)
+        n3 = re.search(r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage)
+        if not (n1 and n2 and n3):
+            raise ExtractorError("You are Blocked by Server.")
+
+        url_params = compat_urllib_parse.urlencode({
+            'VID': n1.group(1),
+            'mp4': '1',
+            'seccode': n2.group(1),
+            'max_vid': n3.group(1),
+        })
+        t_url = 'http://91porn.com/getfile.php?' + url_params
+        info_cn = self._download_webpage(t_url, video_id, "get real video_url")
+        video_url = re.search(r'file=(http.+?)&', info_cn).group(1)
+
+        info = {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+        }
+
+        return info

From 703d78bbf5edf73f60447ac273c0d303d28cc340 Mon Sep 17 00:00:00 2001
From: PeterDing <dfhayst@gmail.com>
Date: Thu, 28 May 2015 01:37:24 +0800
Subject: [PATCH 2/8] [porn91] change re to _search_regex

---
 youtube_dl/extractor/porn91.py | 29 +++++++++++++++--------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index af06af2b7..f3a97df64 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 import re
-import json
 
 from ..compat import compat_urllib_parse
 from .common import InfoExtractor
@@ -29,30 +28,32 @@ class Porn91IE(InfoExtractor):
         url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
         self._set_cookie('91porn.com', 'language', 'cn_CN')
         webpage = self._download_webpage(url, video_id, "get HTML content")
-        title = re.search(
-            r'<div id="viewvideo-title">(.+?)</div>',
-            webpage,
-            re.DOTALL)
+        title = self._search_regex(
+            r'<div id="viewvideo-title">(?P<title>.+?)</div>',
+            webpage, 'title', flags=re.DOTALL)
         assert title
-        title = title.group(1).replace('\n', '')
+        title = title.replace('\n', '')
 
         # get real url
-        n1 = re.search(r'so.addVariable\(\'file\',\'(\d+)\'', webpage)
-        n2 = re.search(r'so.addVariable\(\'seccode\',\'(.+?)\'', webpage)
-        n3 = re.search(r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage)
+        n1 = self._search_regex(
+            r'so.addVariable\(\'file\',\'(?P<n1>\d+)\'', webpage, 'n1')
+        n2 = self._search_regex(
+            r'so.addVariable\(\'seccode\',\'(?P<n2>.+?)\'', webpage, 'n2')
+        n3 = self._search_regex(
+            r'so.addVariable\(\'max_vid\',\'(?P<n3>\d+)\'', webpage, 'n3')
         if not (n1 and n2 and n3):
             raise ExtractorError("You are Blocked by Server.")
-
         url_params = compat_urllib_parse.urlencode({
-            'VID': n1.group(1),
+            'VID': n1,
             'mp4': '1',
-            'seccode': n2.group(1),
-            'max_vid': n3.group(1),
+            'seccode': n2,
+            'max_vid': n3,
         })
         t_url = 'http://91porn.com/getfile.php?' + url_params
         info_cn = self._download_webpage(t_url, video_id, "get real video_url")
-        video_url = re.search(r'file=(http.+?)&', info_cn).group(1)
+        video_url = self._search_regex(r'file=(?P<url>http.+?)&', info_cn, 'url')
 
+        # construct info
         info = {
             'id': video_id,
             'title': title,

From 806598b94dec1268566ae71d671116060f7971d6 Mon Sep 17 00:00:00 2001
From: PeterDing <dfhayst@gmail.com>
Date: Fri, 29 May 2015 08:21:24 +0800
Subject: [PATCH 3/8] [porn91] the one that _search_regex returns not needs to
 be checked

---
 youtube_dl/extractor/porn91.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index f3a97df64..b62eec92d 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -31,7 +31,6 @@ class Porn91IE(InfoExtractor):
         title = self._search_regex(
             r'<div id="viewvideo-title">(?P<title>.+?)</div>',
             webpage, 'title', flags=re.DOTALL)
-        assert title
         title = title.replace('\n', '')
 
         # get real url
@@ -41,8 +40,6 @@ class Porn91IE(InfoExtractor):
             r'so.addVariable\(\'seccode\',\'(?P<n2>.+?)\'', webpage, 'n2')
         n3 = self._search_regex(
             r'so.addVariable\(\'max_vid\',\'(?P<n3>\d+)\'', webpage, 'n3')
-        if not (n1 and n2 and n3):
-            raise ExtractorError("You are Blocked by Server.")
         url_params = compat_urllib_parse.urlencode({
             'VID': n1,
             'mp4': '1',

From 9ff811c5cddbf3481fdcd44e97cf3683a925b33f Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sat, 30 May 2015 23:35:55 +0800
Subject: [PATCH 4/8] [porn91] PEP8

---
 youtube_dl/extractor/porn91.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index b62eec92d..cdf308f3d 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -5,7 +5,6 @@ import re
 
 from ..compat import compat_urllib_parse
 from .common import InfoExtractor
-from ..utils import ExtractorError
 
 
 class Porn91IE(InfoExtractor):
@@ -13,13 +12,13 @@ class Porn91IE(InfoExtractor):
     _VALID_URL = r'(?:https?://)(?:www\.|)91porn\.com/.+?\?viewkey=(?P<id>[\w\d]+)'
 
     _TEST = {
-            'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
-            'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
-            'info_dict': {
-                'id': '7e42283b4f5ab36da134',
-                'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
-                'ext': 'mp4'
-            }
+        'url': 'http://91porn.com/view_video.php?viewkey=7e42283b4f5ab36da134',
+        'md5': '6df8f6d028bc8b14f5dbd73af742fb20',
+        'info_dict': {
+            'id': '7e42283b4f5ab36da134',
+            'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
+            'ext': 'mp4'
+        }
     }
 
     def _real_extract(self, url):

From 1c2223875664f99325b73fe7765677db9b87e105 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 31 May 2015 00:03:19 +0800
Subject: [PATCH 5/8] [porn91] Simplify

---
 youtube_dl/extractor/porn91.py | 38 ++++++++++++++--------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index cdf308f3d..377ca2c77 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -1,8 +1,6 @@
 # encoding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from ..compat import compat_urllib_parse
 from .common import InfoExtractor
 
@@ -22,38 +20,34 @@ class Porn91IE(InfoExtractor):
     }
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
+        video_id = self._match_id(url)
         url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
         self._set_cookie('91porn.com', 'language', 'cn_CN')
         webpage = self._download_webpage(url, video_id, "get HTML content")
         title = self._search_regex(
-            r'<div id="viewvideo-title">(?P<title>.+?)</div>',
-            webpage, 'title', flags=re.DOTALL)
+            r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
         title = title.replace('\n', '')
 
         # get real url
-        n1 = self._search_regex(
-            r'so.addVariable\(\'file\',\'(?P<n1>\d+)\'', webpage, 'n1')
-        n2 = self._search_regex(
-            r'so.addVariable\(\'seccode\',\'(?P<n2>.+?)\'', webpage, 'n2')
-        n3 = self._search_regex(
-            r'so.addVariable\(\'max_vid\',\'(?P<n3>\d+)\'', webpage, 'n3')
+        file_id = self._search_regex(
+            r'so.addVariable\(\'file\',\'(\d+)\'', webpage, 'file id')
+        sec_code = self._search_regex(
+            r'so.addVariable\(\'seccode\',\'([^\']+)\'', webpage, 'sec code')
+        max_vid = self._search_regex(
+            r'so.addVariable\(\'max_vid\',\'(\d+)\'', webpage, 'max vid')
         url_params = compat_urllib_parse.urlencode({
-            'VID': n1,
+            'VID': file_id,
             'mp4': '1',
-            'seccode': n2,
-            'max_vid': n3,
+            'seccode': sec_code,
+            'max_vid': max_vid,
         })
-        t_url = 'http://91porn.com/getfile.php?' + url_params
-        info_cn = self._download_webpage(t_url, video_id, "get real video_url")
-        video_url = self._search_regex(r'file=(?P<url>http.+?)&', info_cn, 'url')
+        info_cn = self._download_webpage(
+            'http://91porn.com/getfile.php?' + url_params, video_id,
+            "get real video url")
+        video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
 
-        # construct info
-        info = {
+        return {
             'id': video_id,
             'title': title,
             'url': video_url,
         }
-
-        return info

From a80601f8d9789e27c0a916e63d7192c3f398d5d5 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 31 May 2015 00:20:37 +0800
Subject: [PATCH 6/8] [porn91] Extract more info

---
 youtube_dl/extractor/porn91.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index 377ca2c77..ea1efc71b 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -3,6 +3,10 @@ from __future__ import unicode_literals
 
 from ..compat import compat_urllib_parse
 from .common import InfoExtractor
+from ..utils import (
+    parse_duration,
+    int_or_none,
+)
 
 
 class Porn91IE(InfoExtractor):
@@ -15,7 +19,8 @@ class Porn91IE(InfoExtractor):
         'info_dict': {
             'id': '7e42283b4f5ab36da134',
             'title': '18岁大一漂亮学妹，水嫩性感，再爽一次！',
-            'ext': 'mp4'
+            'ext': 'mp4',
+            'duration': 431,
         }
     }
 
@@ -46,8 +51,16 @@ class Porn91IE(InfoExtractor):
             "get real video url")
         video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
 
+        duration = parse_duration(self._search_regex(
+            r'时长:\s*</span>\s*(\d+:\d+)', webpage, 'duration', fatal=False))
+
+        comment_count = int_or_none(self._search_regex(
+            r'留言:\s*</span>\s*(\d+)', webpage, 'comment count', fatal=False))
+
         return {
             'id': video_id,
             'title': title,
             'url': video_url,
+            'duration': duration,
+            'comment_count': comment_count,
         }

From d05a1dbe7013d6314ec477b50d864726e509a872 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 31 May 2015 00:26:12 +0800
Subject: [PATCH 7/8] [porn91] Catch daily limit error

---
 youtube_dl/extractor/porn91.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index ea1efc71b..c119c7e94 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -6,6 +6,7 @@ from .common import InfoExtractor
 from ..utils import (
     parse_duration,
     int_or_none,
+    ExtractorError,
 )
 
 
@@ -29,6 +30,10 @@ class Porn91IE(InfoExtractor):
         url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
         self._set_cookie('91porn.com', 'language', 'cn_CN')
         webpage = self._download_webpage(url, video_id, "get HTML content")
+
+        if '作为游客，你每天只可观看10个视频' in webpage:
+            raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
+
         title = self._search_regex(
             r'<div id="viewvideo-title">([^<]+)</div>', webpage, 'title')
         title = title.replace('\n', '')

From a2d971309b75c79f3f688a0c381707d828cb1026 Mon Sep 17 00:00:00 2001
From: Yen Chi Hsuan <yan12125@gmail.com>
Date: Sun, 31 May 2015 00:31:18 +0800
Subject: [PATCH 8/8] [porn91] Use single quotes

---
 youtube_dl/extractor/porn91.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/youtube_dl/extractor/porn91.py b/youtube_dl/extractor/porn91.py
index c119c7e94..72d1b2718 100644
--- a/youtube_dl/extractor/porn91.py
+++ b/youtube_dl/extractor/porn91.py
@@ -29,7 +29,7 @@ class Porn91IE(InfoExtractor):
         video_id = self._match_id(url)
         url = 'http://91porn.com/view_video.php?viewkey=%s' % video_id
         self._set_cookie('91porn.com', 'language', 'cn_CN')
-        webpage = self._download_webpage(url, video_id, "get HTML content")
+        webpage = self._download_webpage(url, video_id, 'get HTML content')
 
         if '作为游客，你每天只可观看10个视频' in webpage:
             raise ExtractorError('91 Porn says: Daily limit 10 videos exceeded', expected=True)
@@ -53,7 +53,7 @@ class Porn91IE(InfoExtractor):
         })
         info_cn = self._download_webpage(
             'http://91porn.com/getfile.php?' + url_params, video_id,
-            "get real video url")
+            'get real video url')
         video_url = self._search_regex(r'file=([^&]+)&', info_cn, 'url')
 
         duration = parse_duration(self._search_regex(