Xml.dom.minidom adding convenience methods for getting elements

I’ve made a few functions to add methods for getting elements. I modeled these off existing javascript functions. They include getElementById, getElementByName, getElementsByClassName, closest, querySelector, and querySelectorAll.

These “helper” functions were modeled after the existing functions in minidom and should be added to the top level of the module.

def _get_element_by_id_helper(parent, id, rc):
    for node in parent.childNodes:
        if node.nodeType == Node.ELEMENT_NODE and \
            (node.getAttribute('Id') == id):
            rc.append(node)
        _get_element_by_id_helper(node, id, rc)
    return rc

def _get_element_by_name_helper(parent, name, rc):
    for node in parent.childNodes:
        if node.nodeType == Node.ELEMENT_NODE and \
            (node.getAttribute('Name') == name):
            rc.append(node)
        _get_element_by_name_helper(node, name, rc)
    return rc

def _get_elements_by_className_helper(parent, className, rc):
    for node in parent.childNodes:
        if node.nodeType == Node.ELEMENT_NODE and \
            (node.getAttribute('Class') == className):
            rc.append(node)
        _get_elements_by_className_helper(node, className, rc)
    return rc

def _closest_helper(node, selectors:list, rc):
    initialLen = len(selectors)
    if node.nodeType == Node.ELEMENT_NODE and initialLen:
        parent = node.parentNode
        if parent.nodeType == Node.ELEMENT_NODE:
            if _element_selector_match(parent, selectors[0]):
                if len(selectors) == 1:
                    rc.append(node)
                selectors.pop(0)
            else:
                _closest_helper(parent, selectors, rc)
        if not initialLen == len(selectors) and len(selectors):
            if _element_selector_match(node, selectors[0]):
                if len(selectors) == 1:
                    rc.append(node)
                selectors.pop(0)
    return rc

def _query_selector_helper(parent, selectors:list, rc, **kwargs):
    for node in parent.childNodes:
        if node.nodeType == Node.ELEMENT_NODE and \
            len(selectors) and (not len(rc) or kwargs.get('all')):
            if _element_selector_match(node, selectors[0]):
                if len(selectors) == 1:
                    rc.append(node)
                else:
                    _query_selector_helper(node, selectors[1:], rc, **kwargs)
            else:
                _query_selector_helper(node, selectors, rc, **kwargs)
    return rc

def _element_selector_match(e:'Element', s:dict) -> bool:
    m = True
    if s.get('TagName'):
        if not s['TagName'] == e.tagName:
            m = False
    if s.get('Attributes') and m:
        for a in s['Attributes']:
            if not e.hasAttribute(a['Name']) or not m:
                m = False
            else:
                if a.get('Value'):
                    aV = str(a['Value'])
                    pV = str(e.getAttribute(a['Name']))
                    if a.get('CaseSensitivity'):
                        if a['CaseSensitivity'] == 'i':
                            aV = aV.lower()
                            pV = pV.lower()
                    if a.get('Operator'):
                        if a['Operator'] == '~':
                            if aV not in pV.split():
                                m = False
                        if a['Operator'] == '|':
                            if not aV == pV and not aV+'-' == pV[0: len(aV)+1]:
                                m = False
                        if a['Operator'] == '^':
                            if not aV == pV[0: len(aV)]:
                                m = False
                        if a['Operator'] == '$':
                            if not aV == pV[-len(aV):]:
                                m = False
                        if a['Operator'] == '*':
                            if aV not in pV:
                                m = False
                    else:
                        if not aV == pV:
                            m = False
    return m

def _parse_selector(selector:str, rc:list) -> list:
    """Returns list of selector dictionaries.

    Dictionaries may contain keys: 
        "TagName", and "Attributes"
    "TagName" has a string value.
    "Attributes" is a list of dictionaries that may contain:
        "Name", "Operator", "Value", and "CaseSensitivity"
    """
    def addCharacter(d:dict, key:str, character:str):
        if not d.get(key):
            d[key] = character
        else:
            d[key] += character
    # 1:TagName 2:AttributeName 3:AttributeOperator 4:AttributeValue 5:AttributeParenthesis 6:AttributeQuote 7:AttributeCaseSensitivity
    level = 1
    operatorModifiers = ['~','|','^','*']
    rc.append({})
    for c in selector:
        if c.isspace() and level < 2:
            level = 1
            if len(rc[-1].keys()):
                rc.append({})
        else:
            if c == '[' and level < 2:
                if not rc[-1].get('Attributes'):
                    rc[-1]['Attributes'] = []
                rc[-1]['Attributes'].append({})
                level = 2
            elif (c.isspace() or c in operatorModifiers) and level == 2:
                if rc[-1]['Attributes'][-1].get('Name'):
                    level = 3
            elif c == '=' and level < 4:
                level = 4
            elif c.isspace() and level == 4:
                if rc[-1]['Attributes'][-1].get('Value'):
                    level = 7
            elif c == '"' and level == 4:
                level = 5
            elif c == "'" and level == 4:
                level = 6
            elif c == '"' and level == 5:
                level = 7
            elif c == "'" and level == 6:
                level = 7
            elif c == ']' and (level > 1 and not (level == 5 or level == 6)):
                level = 1
            
            if level == 1 and not (c.isspace() or c == ']'):
                addCharacter(rc[-1], 'TagName', c)
            elif level == 2 and not (c.isspace() or c == '['):
                addCharacter(rc[-1]['Attributes'][-1], 'Name', c)
            elif level == 3 and c in operatorModifiers:
                addCharacter(rc[-1]['Attributes'][-1], 'Operator', c)
            elif level == 4 and not (c.isspace() or c == '='):
                addCharacter(rc[-1]['Attributes'][-1], 'Value', c)
            elif level == 5 and not c == '"':
                addCharacter(rc[-1]['Attributes'][-1], 'Value', c)
            elif level == 6 and not c == "'":
                addCharacter(rc[-1]['Attributes'][-1], 'Value', c)
            elif level == 7 and not (c.isspace() or c == '"' or c == "'"):
                addCharacter(rc[-1]['Attributes'][-1], 'CaseSensitivity', c)

    for r in rc:
        if r.get('Attributes'):
            for j,a in enumerate(r['Attributes']):
                if not len(a):
                    a.pop(j)
            if not len(r['Attributes']):
                del r['Attributes']
        
        if rc[-1].get('Attributes'):
            if not rc[-1]['Attributes'][-1]:
                rc[-1]
        if not rc[-1].keys():
            rc.pop(-1)
    return rc

These are the class functions that use the “helper” functions. They should be added to the NodeGet class.

    def getElementById(self, id:str) -> NodeList['Element']:
        """Returns all descendant elements with the given id.
        In valid xml, there should be only one element per id.

        Returns the list of all descendant elements (not direct children
        only) with the specified id.
        """
        return _get_element_by_id_helper(self, id, NodeList())

    def getElementByName(self, name:str) -> NodeList['Element']:
        """Returns all descendant elements with the given name.
        In valid xml, there should be only one element per name.

        Returns the list of all descendant elements (not direct children
        only) with the specified name.
        """
        return _get_element_by_name_helper(self, name, NodeList())
    
    def getElementsByClassName(self, className:str) -> NodeList['Element']:
        """Returns all descendant elements with the given class name.

        Returns the list of all descendant elements (not direct children
        only) with the specified class name.
        """
        return _get_elements_by_className_helper(self, className, NodeList())

    def closest(self, query:str) -> NodeList['Element']:
        """Returns element in path with the given query.

        Returns the element in path (not direct parent
        only) with the specified query.
        """
        return _closest_helper(self, _parse_selector(query, list()), NodeList())
    
    def querySelector(self, query:str) -> NodeList['Element']:
        """Returns first descendant element with the given query.

        Returns the first descendant element (not direct child
        only) with the specified query.
        """
        return _query_selector_helper(self, _parse_selector(query, list()), NodeList())
        
    def querySelectorAll(self, query:str) -> NodeList['Element']:
        """Returns all descendant elements with the given query.

        Returns the list of all descendant elements (not direct children
        only) with the specified query.
        """
        return _query_selector_helper(self, _parse_selector(query, list()), NodeList(), all=True)