summaryrefslogtreecommitdiffstats
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py29
1 files changed, 18 insertions, 11 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index c1792c534..7b8a9cf9a 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
@@ -27,7 +27,6 @@ from ..utils import (
unified_strdate,
unsmuggle_url,
UnsupportedError,
- url_basename,
xpath_text,
)
from .brightcove import (
@@ -1549,7 +1548,7 @@ class GenericIE(InfoExtractor):
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
else:
- video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ video_id = self._generic_id(url)
self.to_screen('%s: Requesting header' % video_id)
@@ -1578,7 +1577,7 @@ class GenericIE(InfoExtractor):
info_dict = {
'id': video_id,
- 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+ 'title': self._generic_title(url),
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
}
@@ -1754,9 +1753,9 @@ class GenericIE(InfoExtractor):
if matches:
return _playlist_from_matches(matches, ie='RtlNl')
- vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
- if vimeo_url is not None:
- return self.url_result(vimeo_url)
+ vimeo_urls = VimeoIE._extract_urls(url, webpage)
+ if vimeo_urls:
+ return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -2332,12 +2331,23 @@ class GenericIE(InfoExtractor):
info_dict.update(json_ld)
return info_dict
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ for entry in entries:
+ entry.update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries)
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
def filter_video(urls):
return list(filter(check_video, urls))
@@ -2388,9 +2398,6 @@ class GenericIE(InfoExtractor):
if m_video_type is not None:
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
- # HTML5 video
- found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
- if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'