From ed9a25dd612fb06d9cf007a6491ac9982535a8f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 18 Mar 2015 00:05:40 +0600 Subject: [PATCH] [generic] Generalize redirect regex --- youtube_dl/extractor/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 0b8d96c27..dc5755d12 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1268,16 +1268,16 @@ class GenericIE(InfoExtractor): # HTML5 video found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage) if not found: + REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' - r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)', + r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX, webpage) if not found: # Look also in Refresh HTTP header refresh_header = head_response.headers.get('Refresh') if refresh_header: - found = re.search( - r'[0-9]{,2};\s*(?:URL|url)=(.+)', refresh_header) + found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = found.group(1) self.report_following_redirect(new_url)