1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-16 14:27:53 +00:00

Compare commits

..

No commits in common. "a874871801b8b05d06e8ffe52bed94fdfc26611e" and "f102e3dc4efe27e6c8697d6d117f05d1bb6d1b91" have entirely different histories.

2 changed files with 22 additions and 33 deletions

View File

@ -1,20 +1,8 @@
# coding: utf-8
from __future__ import unicode_literals
from .compat import (
compat_str,
compat_chr,
)
from .compat import compat_str
# Below is included the text of icu/CaseFolding.txt retrieved from
# https://github.com/unicode-org/icu/blob/main/icu4c/source/data/unidata/CaseFolding.txt
# In case newly foldable Unicode characters are defined, paste the new version
# of the text inside the ''' marks.
# The text is expected to have only blank lines andlines with 1st character #,
# all ignored, and fold definitions like this:
# `from_hex_code; space_separated_to_hex_code_list; comment`
_map_str = '''
# CaseFolding-15.0.0.txt
# Date: 2022-02-02, 23:35:35 GMT
# © 2022 Unicode®, Inc.
@ -77,6 +65,7 @@ _map_str = '''
# have the value C for the status field, and the code point itself for the mapping field.
# =================================================================
_map_str = '''
0041; C; 0061; # LATIN CAPITAL LETTER A
0042; C; 0062; # LATIN CAPITAL LETTER B
0043; C; 0063; # LATIN CAPITAL LETTER C
@ -1638,22 +1627,17 @@ FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
'''
_parse_unichr = lambda s: compat_chr(int(s, 16))
_map = dict(
(_parse_unichr(from_), ''.join(map(_parse_unichr, to_.split(' '))))
(unichr(int(from_, 16)), ''.join((unichr(int(v, 16)) for v in to_.split(' '))))
for from_, type_, to_, _ in (
l.split('; ', 3) for l in _map_str.splitlines() if l and not l[0] == '#')
l.split('; ', 3) for l in _map_str.splitlines() if l)
if type_ in ('C', 'F'))
del _map_str
def casefold(s):
assert isinstance(s, compat_str)
return ''.join((_map.get(c, c) for c in s))
__all__ = [
casefold
]

View File

@ -21,19 +21,6 @@ import subprocess
import sys
import xml.etree.ElementTree
# deal with critical unicode/str things first
try:
# Python 2
compat_str, compat_basestring, compat_chr = (
unicode, basestring, unichr
)
from .casefold import casefold as compat_casefold
except NameError:
compat_str, compat_basestring, compat_chr = (
str, str, chr
)
compat_casefold = lambda s: s.casefold()
try:
import collections.abc as compat_collections_abc
except ImportError:
@ -2386,6 +2373,13 @@ try:
except ImportError:
import BaseHTTPServer as compat_http_server
try:
compat_str = unicode # Python 2
from .casefold import casefold as compat_casefold
except NameError:
compat_str = str
compat_casefold = lambda s: s.casefold()
try:
from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes
from urllib.parse import unquote as compat_urllib_parse_unquote
@ -2516,11 +2510,22 @@ except ImportError: # Python < 3.4
return compat_urllib_response.addinfourl(io.BytesIO(data), headers, url)
try:
compat_basestring = basestring # Python 2
except NameError:
compat_basestring = str
try:
compat_chr = unichr # Python 2
except NameError:
compat_chr = chr
try:
from xml.etree.ElementTree import ParseError as compat_xml_parse_error
except ImportError: # Python 2.6
from xml.parsers.expat import ExpatError as compat_xml_parse_error
etree = xml.etree.ElementTree