1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2025-01-05 09:16:18 +00:00

Add an option to discard style information from TTML subtitles

This commit is contained in:
filip-hejsek 2022-08-11 05:28:03 +02:00 committed by GitHub
parent e6a836d54c
commit 74bbdc079b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 5 deletions

View File

@ -280,6 +280,7 @@ def _real_main(argv=None):
postprocessors.append({ postprocessors.append({
'key': 'FFmpegSubtitlesConvertor', 'key': 'FFmpegSubtitlesConvertor',
'format': opts.convertsubtitles, 'format': opts.convertsubtitles,
'ttml_convert_style': opts.ttmlconvertstyle,
}) })
if opts.embedsubtitles: if opts.embedsubtitles:
postprocessors.append({ postprocessors.append({

View File

@ -862,6 +862,10 @@ def parseOpts(overrideArguments=None):
'--convert-subs', '--convert-subtitles', '--convert-subs', '--convert-subtitles',
metavar='FORMAT', dest='convertsubtitles', default=None, metavar='FORMAT', dest='convertsubtitles', default=None,
help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)') help='Convert the subtitles to other format (currently supported: srt|ass|vtt|lrc)')
postproc.add_option(
'--ttml-convert-no-style',
action='store_false', dest='ttmlconvertstyle', default=True,
help='Discard all style information when converting subtitles from ttml/dfxp')
parser.add_option_group(general) parser.add_option_group(general)
parser.add_option_group(network) parser.add_option_group(network)

View File

@ -599,9 +599,10 @@ class FFmpegFixupM3u8PP(FFmpegPostProcessor):
class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
def __init__(self, downloader=None, format=None): def __init__(self, downloader=None, format=None, ttml_convert_style=True):
super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
self.format = format self.format = format
self.ttml_convert_style = ttml_convert_style
def run(self, info): def run(self, info):
subs = info.get('requested_subtitles') subs = info.get('requested_subtitles')
@ -634,7 +635,7 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext')) srt_file = subtitles_filename(filename, lang, 'srt', info.get('ext'))
with open(dfxp_file, 'rb') as f: with open(dfxp_file, 'rb') as f:
srt_data = dfxp2srt(f.read()) srt_data = dfxp2srt(f.read(), self.ttml_convert_style)
with io.open(srt_file, 'wt', encoding='utf-8') as f: with io.open(srt_file, 'wt', encoding='utf-8') as f:
f.write(srt_data) f.write(srt_data)

View File

@ -4494,7 +4494,7 @@ def srt_subtitles_timecode(seconds):
return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000) return '%02d:%02d:%02d,%03d' % (seconds / 3600, (seconds % 3600) / 60, seconds % 60, (seconds % 1) * 1000)
def dfxp2srt(dfxp_data): def dfxp2srt(dfxp_data, convert_style=True):
''' '''
@param dfxp_data A bytes-like object containing DFXP data @param dfxp_data A bytes-like object containing DFXP data
@returns A unicode object containing converted SRT data @returns A unicode object containing converted SRT data
@ -4536,7 +4536,7 @@ def dfxp2srt(dfxp_data):
def start(self, tag, attrib): def start(self, tag, attrib):
if tag in (_x('ttml:br'), 'br'): if tag in (_x('ttml:br'), 'br'):
self._out += '\n' self._out += '\n'
else: elif convert_style:
unclosed_elements = [] unclosed_elements = []
style = {} style = {}
element_style_id = attrib.get('style') element_style_id = attrib.get('style')
@ -4579,7 +4579,7 @@ def dfxp2srt(dfxp_data):
self._unclosed_elements.append(unclosed_elements) self._unclosed_elements.append(unclosed_elements)
def end(self, tag): def end(self, tag):
if tag not in (_x('ttml:br'), 'br'): if convert_style and tag not in (_x('ttml:br'), 'br'):
unclosed_elements = self._unclosed_elements.pop() unclosed_elements = self._unclosed_elements.pop()
for element in reversed(unclosed_elements): for element in reversed(unclosed_elements):
self._out += '</%s>' % element self._out += '</%s>' % element