1
0
mirror of https://github.com/ytdl-org/youtube-dl.git synced 2024-11-16 14:27:53 +00:00

[utils] Make value optional for find_xpath_attr

This allows selecting particular attributes by name but without specifying the value and similar to xpath syntax `[@attrib]`
This commit is contained in:
Sergey M․ 2015-08-01 20:22:13 +06:00
parent 525a87f58e
commit ee114368ad
2 changed files with 17 additions and 5 deletions

View File

@ -235,12 +235,21 @@ class TestUtil(unittest.TestCase):
<node x="a"/> <node x="a"/>
<node x="a" y="c" /> <node x="a" y="c" />
<node x="b" y="d" /> <node x="b" y="d" />
<node x="" />
</root>''' </root>'''
doc = xml.etree.ElementTree.fromstring(testxml) doc = xml.etree.ElementTree.fromstring(testxml)
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None)
self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None)
self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None)
self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None)
self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1])
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1])
self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3])
self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2])
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2])
self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3])
self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4])
def test_xpath_with_ns(self): def test_xpath_with_ns(self):
testxml = '''<root xmlns:media="http://example.com/"> testxml = '''<root xmlns:media="http://example.com/">

View File

@ -139,21 +139,24 @@ def write_json_file(obj, fn):
if sys.version_info >= (2, 7): if sys.version_info >= (2, 7):
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val=None):
""" Find the xpath xpath[@key=val] """ """ Find the xpath xpath[@key=val] """
assert re.match(r'^[a-zA-Z-]+$', key) assert re.match(r'^[a-zA-Z-]+$', key)
assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val) if val:
expr = xpath + "[@%s='%s']" % (key, val) assert re.match(r'^[a-zA-Z0-9@\s:._-]*$', val)
expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
return node.find(expr) return node.find(expr)
else: else:
def find_xpath_attr(node, xpath, key, val): def find_xpath_attr(node, xpath, key, val=None):
# Here comes the crazy part: In 2.6, if the xpath is a unicode, # Here comes the crazy part: In 2.6, if the xpath is a unicode,
# .//node does not match if a node is a direct child of . ! # .//node does not match if a node is a direct child of . !
if isinstance(xpath, compat_str): if isinstance(xpath, compat_str):
xpath = xpath.encode('ascii') xpath = xpath.encode('ascii')
for f in node.findall(xpath): for f in node.findall(xpath):
if f.attrib.get(key) == val: if key not in f.attrib:
continue
if val is None or f.attrib.get(key) == val:
return f return f
return None return None