I’ve made a few functions to add methods for getting elements. I modeled these off existing javascript functions. They include getElementById, getElementByName, getElementsByClassName, closest, querySelector, and querySelectorAll.
These “helper” functions were modeled after the existing functions in minidom and should be added to the top level of the module.
def _get_element_by_id_helper(parent, id, rc):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE and \
(node.getAttribute('Id') == id):
rc.append(node)
_get_element_by_id_helper(node, id, rc)
return rc
def _get_element_by_name_helper(parent, name, rc):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE and \
(node.getAttribute('Name') == name):
rc.append(node)
_get_element_by_name_helper(node, name, rc)
return rc
def _get_elements_by_className_helper(parent, className, rc):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE and \
(node.getAttribute('Class') == className):
rc.append(node)
_get_elements_by_className_helper(node, className, rc)
return rc
def _closest_helper(node, selectors:list, rc):
initialLen = len(selectors)
if node.nodeType == Node.ELEMENT_NODE and initialLen:
parent = node.parentNode
if parent.nodeType == Node.ELEMENT_NODE:
if _element_selector_match(parent, selectors[0]):
if len(selectors) == 1:
rc.append(node)
selectors.pop(0)
else:
_closest_helper(parent, selectors, rc)
if not initialLen == len(selectors) and len(selectors):
if _element_selector_match(node, selectors[0]):
if len(selectors) == 1:
rc.append(node)
selectors.pop(0)
return rc
def _query_selector_helper(parent, selectors:list, rc, **kwargs):
for node in parent.childNodes:
if node.nodeType == Node.ELEMENT_NODE and \
len(selectors) and (not len(rc) or kwargs.get('all')):
if _element_selector_match(node, selectors[0]):
if len(selectors) == 1:
rc.append(node)
else:
_query_selector_helper(node, selectors[1:], rc, **kwargs)
else:
_query_selector_helper(node, selectors, rc, **kwargs)
return rc
def _element_selector_match(e:'Element', s:dict) -> bool:
m = True
if s.get('TagName'):
if not s['TagName'] == e.tagName:
m = False
if s.get('Attributes') and m:
for a in s['Attributes']:
if not e.hasAttribute(a['Name']) or not m:
m = False
else:
if a.get('Value'):
aV = str(a['Value'])
pV = str(e.getAttribute(a['Name']))
if a.get('CaseSensitivity'):
if a['CaseSensitivity'] == 'i':
aV = aV.lower()
pV = pV.lower()
if a.get('Operator'):
if a['Operator'] == '~':
if aV not in pV.split():
m = False
if a['Operator'] == '|':
if not aV == pV and not aV+'-' == pV[0: len(aV)+1]:
m = False
if a['Operator'] == '^':
if not aV == pV[0: len(aV)]:
m = False
if a['Operator'] == '$':
if not aV == pV[-len(aV):]:
m = False
if a['Operator'] == '*':
if aV not in pV:
m = False
else:
if not aV == pV:
m = False
return m
def _parse_selector(selector:str, rc:list) -> list:
"""Returns list of selector dictionaries.
Dictionaries may contain keys:
"TagName", and "Attributes"
"TagName" has a string value.
"Attributes" is a list of dictionaries that may contain:
"Name", "Operator", "Value", and "CaseSensitivity"
"""
def addCharacter(d:dict, key:str, character:str):
if not d.get(key):
d[key] = character
else:
d[key] += character
# 1:TagName 2:AttributeName 3:AttributeOperator 4:AttributeValue 5:AttributeParenthesis 6:AttributeQuote 7:AttributeCaseSensitivity
level = 1
operatorModifiers = ['~','|','^','*']
rc.append({})
for c in selector:
if c.isspace() and level < 2:
level = 1
if len(rc[-1].keys()):
rc.append({})
else:
if c == '[' and level < 2:
if not rc[-1].get('Attributes'):
rc[-1]['Attributes'] = []
rc[-1]['Attributes'].append({})
level = 2
elif (c.isspace() or c in operatorModifiers) and level == 2:
if rc[-1]['Attributes'][-1].get('Name'):
level = 3
elif c == '=' and level < 4:
level = 4
elif c.isspace() and level == 4:
if rc[-1]['Attributes'][-1].get('Value'):
level = 7
elif c == '"' and level == 4:
level = 5
elif c == "'" and level == 4:
level = 6
elif c == '"' and level == 5:
level = 7
elif c == "'" and level == 6:
level = 7
elif c == ']' and (level > 1 and not (level == 5 or level == 6)):
level = 1
if level == 1 and not (c.isspace() or c == ']'):
addCharacter(rc[-1], 'TagName', c)
elif level == 2 and not (c.isspace() or c == '['):
addCharacter(rc[-1]['Attributes'][-1], 'Name', c)
elif level == 3 and c in operatorModifiers:
addCharacter(rc[-1]['Attributes'][-1], 'Operator', c)
elif level == 4 and not (c.isspace() or c == '='):
addCharacter(rc[-1]['Attributes'][-1], 'Value', c)
elif level == 5 and not c == '"':
addCharacter(rc[-1]['Attributes'][-1], 'Value', c)
elif level == 6 and not c == "'":
addCharacter(rc[-1]['Attributes'][-1], 'Value', c)
elif level == 7 and not (c.isspace() or c == '"' or c == "'"):
addCharacter(rc[-1]['Attributes'][-1], 'CaseSensitivity', c)
for r in rc:
if r.get('Attributes'):
for j,a in enumerate(r['Attributes']):
if not len(a):
a.pop(j)
if not len(r['Attributes']):
del r['Attributes']
if rc[-1].get('Attributes'):
if not rc[-1]['Attributes'][-1]:
rc[-1]
if not rc[-1].keys():
rc.pop(-1)
return rc
These are the class functions that use the “helper” functions. They should be added to the NodeGet class.
def getElementById(self, id:str) -> NodeList['Element']:
"""Returns all descendant elements with the given id.
In valid xml, there should be only one element per id.
Returns the list of all descendant elements (not direct children
only) with the specified id.
"""
return _get_element_by_id_helper(self, id, NodeList())
def getElementByName(self, name:str) -> NodeList['Element']:
"""Returns all descendant elements with the given name.
In valid xml, there should be only one element per name.
Returns the list of all descendant elements (not direct children
only) with the specified name.
"""
return _get_element_by_name_helper(self, name, NodeList())
def getElementsByClassName(self, className:str) -> NodeList['Element']:
"""Returns all descendant elements with the given class name.
Returns the list of all descendant elements (not direct children
only) with the specified class name.
"""
return _get_elements_by_className_helper(self, className, NodeList())
def closest(self, query:str) -> NodeList['Element']:
"""Returns element in path with the given query.
Returns the element in path (not direct parent
only) with the specified query.
"""
return _closest_helper(self, _parse_selector(query, list()), NodeList())
def querySelector(self, query:str) -> NodeList['Element']:
"""Returns first descendant element with the given query.
Returns the first descendant element (not direct child
only) with the specified query.
"""
return _query_selector_helper(self, _parse_selector(query, list()), NodeList())
def querySelectorAll(self, query:str) -> NodeList['Element']:
"""Returns all descendant elements with the given query.
Returns the list of all descendant elements (not direct children
only) with the specified query.
"""
return _query_selector_helper(self, _parse_selector(query, list()), NodeList(), all=True)