diff --git a/a.c b/a.c new file mode 100644 index 000000000..50dc1cc91 --- /dev/null +++ b/a.c @@ -0,0 +1,254 @@ +#include +#include +#include +#include + +#define FLVF_HEADER 1 +#define FLVF_SCRIPT 2 + +struct flvhdr +{ + char fh_magic[3]; + char fh_version; + char fh_flags; + char fh_hlen[4]; + char fh_pads[4]; +}__attribute__((packed)); + +struct taghdr +{ + uint8_t th_type; + uint8_t th_dlen[3]; + uint8_t th_tstamp[3]; + uint8_t th_xstamp; + uint8_t th_streamid[3]; +}__attribute__((packed)); + +struct flvcombine +{ + FILE * fc_file; + uint32_t fc_flags; + uint32_t fc_timestamp; + uint32_t fc_filesize; + double fc_duration; + int fc_filesize_offset; + int fc_duration_offset; +}; + +/* duration, filesize */ + +uint32_t buftoint(const void *buf, size_t len) +{ + uint32_t bufint = 0; + const uint8_t *pval = (const uint8_t *)buf; + while (len-- > 0) + bufint = (bufint << 8) + *pval++; + return bufint; +} + +int dd_copy(FILE * dst_fp, FILE * src_fp, size_t dlen) +{ + size_t len; + char buf[64 * 1024]; + while (dlen > 0 && !feof(src_fp)) { + len = fread(buf, 1, dlen < sizeof(buf)? dlen: sizeof(buf), src_fp); + if (fwrite(buf, 1, len, dst_fp) != len) + break; + dlen -= len; + } + return dlen; +} + +void adjtimestamp(struct taghdr *header, uint32_t stampbase) +{ + uint32_t netval = 0; + uint32_t adjtime = stampbase; + adjtime += buftoint(&header->th_tstamp, sizeof(header->th_tstamp)); + adjtime += (header->th_xstamp << 24); + header->th_xstamp = (adjtime >> 24); + header->th_tstamp[0] = (adjtime >> 16); + header->th_tstamp[1] = (adjtime >> 8); + header->th_tstamp[2] = (adjtime >> 0); +} + +void update_metainfo(struct flvcombine *combine, FILE *fp, size_t dlen) +{ + int i; + size_t len; + char *pmem = NULL; + char buf[256 * 1024]; + double duration = 0.0; + uint8_t duration_bytes[8]; + printf("dlen: %d\n", dlen); + assert (dlen < (256 * 1024)); + + len = fread(buf, 1, dlen < sizeof(buf)? dlen: sizeof(buf), fp); + if (len == 0) + return; + pmem = (char *)memmem(buf, len, "duration", 8); + if (pmem == NULL || pmem + 17l - buf > len) + return; + memcpy(&duration_bytes, pmem + 9, 8); + for (i = 0; i < 4; i ++) { + uint8_t tmp = duration_bytes[i]; + duration_bytes[i] = duration_bytes[7 - i]; + duration_bytes[7 - i] = tmp; + } + memcpy(&duration, &duration_bytes, 8); + combine->fc_duration += duration; + if (combine->fc_flags & FLVF_SCRIPT) + return; + combine->fc_duration_offset = + combine->fc_filesize + (pmem + 9l - buf) + sizeof(struct taghdr); + pmem = (char *)memmem(buf, len, "filesize", 8); + if (pmem == NULL || pmem + 17l - buf > len) + return; + combine->fc_filesize_offset = + combine->fc_filesize + (pmem + 9l - buf) + sizeof(struct taghdr); +} + +int addflv(struct flvcombine *combine, const char *path) +{ + int error = 0; + FILE *fp, *fout; + char magic[4]; + long savepos; + size_t len, dlen, flags; + struct flvhdr header; + struct taghdr *last; + struct taghdr tagvideo; + struct taghdr tagaudio; + struct taghdr tagheader; + + fp = fopen(path, "rb"); + fout = combine->fc_file; + if (fp == NULL || fout == NULL) + return 0; + + last = NULL; + memset(magic, 0, sizeof(magic)); + memset(&tagvideo, 0, sizeof(tagvideo)); + memset(&tagaudio, 0, sizeof(tagaudio)); + + if ( !fread(&header, sizeof(header), 1, fp) ) + goto fail; + + memcpy(magic, header.fh_magic, 3); + if ( strcmp("FLV", magic) ) + goto fail; + + if ((combine->fc_flags & FLVF_HEADER) == 0) { + fwrite(&header, sizeof(header), 1, fout); + combine->fc_filesize += sizeof(header); + combine->fc_flags |= FLVF_HEADER; + } + + printf("magic: %s\n", magic); + printf("flags: 0x%02x\n", header.fh_flags); + printf("version: 0x%02x\n", header.fh_version); + printf("header len: %d\n", buftoint(header.fh_hlen, sizeof(header.fh_hlen))); + + while (feof(fp) == 0) { + if ( !fread(&tagheader, sizeof(tagheader), 1, fp) ) + goto fail; + + dlen = buftoint(tagheader.th_dlen, sizeof(tagheader.th_dlen)); + + switch (tagheader.th_type) + { + case 0x09: + adjtimestamp(&tagheader, combine->fc_timestamp); + tagvideo = tagheader; + last = &tagvideo; + break; + case 0x08: + adjtimestamp(&tagheader, combine->fc_timestamp); + tagaudio = tagheader; + last = &tagaudio; + break; + default: + flags = combine->fc_flags; + savepos = ftell(fp); + if (savepos == -1) + goto fail; + savepos = (flags & FLVF_SCRIPT)? (savepos + dlen + 4): savepos; + update_metainfo(combine, fp, dlen); + combine->fc_flags |= FLVF_SCRIPT; + if ( fseek(fp, savepos, SEEK_SET) ) + goto fail; + if (flags & FLVF_SCRIPT) + continue; + break; + } + fwrite(&tagheader, sizeof(tagheader), 1, fout); + combine->fc_filesize += sizeof(tagheader); + combine->fc_filesize += (dlen + 4); + if ( dd_copy(fout, fp, dlen + 4)) { + error = -__LINE__; + break; + } + } + +fail: + fclose(fp); + if (last == &tagvideo || last == &tagaudio) { + combine->fc_timestamp = buftoint(last->th_tstamp, sizeof(last->th_tstamp)); + combine->fc_timestamp |= (last->th_xstamp << 24); + printf("time stamp: %d\n", combine->fc_timestamp); + } + return 0; +} + +void fixedflv(struct flvcombine *context) +{ + int i; + double dblval = 0.0; + uint8_t dblbytes[8]; + FILE *fout = context->fc_file; + + if (context->fc_filesize_offset > 0) { + if ( fseek(fout, context->fc_filesize_offset, SEEK_SET) ) + return; + dblval = context->fc_filesize; + memcpy(dblbytes, &dblval, 8); + + for (i = 0; i < 4; i ++) { + uint8_t tmp = dblbytes[i]; + dblbytes[i] = dblbytes[7 - i]; + dblbytes[7 - i] = tmp; + } + fwrite(dblbytes, 8, 1, fout); + } + + if (context->fc_duration_offset > 0) { + if ( fseek(fout, context->fc_duration_offset, SEEK_SET) ) + return; + dblval = context->fc_duration; + memcpy(dblbytes, &dblval, 8); + + for (i = 0; i < 4; i ++) { + uint8_t tmp = dblbytes[i]; + dblbytes[i] = dblbytes[7 - i]; + dblbytes[7 - i] = tmp; + } + fwrite(dblbytes, 8, 1, fout); + } +} + +int main(int argc, char *argv[]) +{ + int i; + struct flvcombine context; + memset(&context, 0, sizeof(context)); + context.fc_file = fopen("out.flv", "wb"); + if (context.fc_file == NULL) + return -1; + context.fc_duration = 0; + for (i = 1; i < argc; i++) + addflv(&context, argv[i]); + fixedflv(&context); + fclose(context.fc_file); + + printf("seconds: %d\n", context.fc_timestamp); + return 0; +} diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index dfd8f76ee..3b24f593d 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -13,6 +13,8 @@ import urllib import urllib2 import email.utils import xml.etree.ElementTree +import random +import math from urlparse import parse_qs try: @@ -2955,3 +2957,137 @@ class MTVIE(InfoExtractor): } return [info] + + + +class YoukuIE(InfoExtractor): + + _VALID_URL = r'(?:http://)?v\.youku\.com/v_show/id_(?P[A-Za-z0-9]+)\.html' + IE_NAME = u'Youku' + + def __init__(self, downloader=None): + InfoExtractor.__init__(self, downloader) + + def report_download_webpage(self, file_id): + """Report webpage download.""" + self._downloader.to_screen(u'[Youku] %s: Downloading webpage' % file_id) + + def report_extraction(self, file_id): + """Report information extraction.""" + self._downloader.to_screen(u'[Youku] %s: Extracting information' % file_id) + + def _gen_sid(self): + nowTime = int(time.time() * 1000) + random1 = random.randint(1000,1998) + random2 = random.randint(1000,9999) + + return "%d%d%d" %(nowTime,random1,random2) + + def _get_file_ID_mix_string(self, seed): + mixed = [] + source = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ/\:._-1234567890") + seed = float(seed) + for i in range(len(source)): + seed = (seed * 211 + 30031 ) % 65536 + index = math.floor(seed / 65536 * len(source) ) + mixed.append(source[int(index)]) + source.remove(source[int(index)]) + #return ''.join(mixed) + return mixed + + + def _get_file_id(self, fileId, seed): + mixed = self._get_file_ID_mix_string(seed) + ids = fileId.split('*') + realId = [] + for ch in ids: + if ch is not '': + realId.append(mixed[int(ch)]) + return ''.join(realId) + + def _gen_key(self, key1, key2): + pass + + + + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + if mobj is None: + self._downloader.trouble(u'ERROR: invalid URL: %s' % url) + return + video_id = mobj.group('ID') + + info_url = 'http://v.youku.com/player/getPlayList/VideoIDS/' + video_id +<<<<<<< HEAD + print info_url +======= +>>>>>>> 51661d86005ffbdd4debd051cd9683cf5f5c2fe9 + + request = urllib2.Request(info_url, None, std_headers) + try: + self.report_download_webpage(video_id) + jsondata = urllib2.urlopen(request).read() + except (urllib2.URLError, httplib.HTTPException, socket.error) as err: + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + return + + self.report_extraction(video_id) + try: + config = json.loads(jsondata) + + video_title = config['data'][0]['title'] + seed = config['data'][0]['seed'] + + format = self._downloader.params.get('format', None) + supported_format = config['data'][0]['streamfileids'].keys() + + if format is None or format == 'best': + if 'hd2' in supported_format: + format = 'hd2' + else: + format = 'flv' + ext = u'flv' + elif format == 'worst': + format = 'mp4' + ext = u'mp4' + else: + format = 'flv' + ext = u'flv' + + + fileid = config['data'][0]['streamfileids'][format] + seg_number = len(config['data'][0]['segs'][format]) + + keys=[] + for i in xrange(seg_number): + keys.append(config['data'][0]['segs'][format][i]['k']) + + #TODO check error + #youku only could be viewed from mainland china + except: + self._downloader.trouble(u'ERROR: unable to extract info section') + return + + files_info=[] + sid = self._gen_sid() + fileid = self._get_file_id(fileid, seed) + + #column 8,9 of fileid represent the segment number + #fileid[7:9] should be changed + for index, key in enumerate(keys): + + temp_fileid = '%s%02X%s' % (fileid[0:8], index, fileid[10:]) + download_url = 'http://f.youku.com/player/getFlvPath/sid/%s_%02X/st/flv/fileid/%s?k=%s' % (sid, index, temp_fileid, key) + print download_url + info = { + 'id': '%s_part%02d' % (video_id, index), + 'url': download_url, + 'uploader': None, + 'title': video_title, + 'ext': ext, + 'format': u'NA' + } + files_info.append(info) + + return files_info diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 86951840d..84b972d51 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -351,6 +351,7 @@ def gen_extractors(): MixcloudIE(), StanfordOpenClassroomIE(), MTVIE(), + YoukuIE(), GenericIE() ]