Capitalising text based on a ruleset

I’ve been cobbling together some functions to work in concert to consistently format text strings according to a series of rules, which I’ve tried to reflect in the series of functions.

import re

def first_alpha_char(string):
    '''return pos of first alpha character in string'''
    return string.find(next(filter(str.isalpha, string)))

def is_roman_numeral(word):
    ''' determines whether word passed is a roman numeral within the stricter meaning of the term '''
    return bool(re.match(r'^(?=[MDCLXVI])M*(C[MD]|D?C{0,3})(X[CL]|L?X{0,3})(I[XV]|V?I{0,3})$', word.upper()))

def always_upper(word):
    '''determines whether word is in list of words that will always be uppercase'''
    return word.upper() in ('BBC', 'LP', 'USA')

def capitalise_first_word(word):
    alpha = first_alpha_char(word)
    if  alpha > 0:
        tmp = word[alpha:]
        if always_upper(tmp):
            tmp = tmp.upper()
            tmp = tmp.capitalize()
        #return word[0:alpha] + word[alpha:].capitalize()
        return word[0:alpha] + tmp
        return word.capitalize()

def capitalise_last_word(sentence):
''' always Capitalise the last word in the sentence, unless it's subject to specific handling e.g. roman numeral or always upper'''

    if not sentence:  # empty sentence check
        return ''

    words = sentence.split()
    *_, lastword = words # get the last item

    if lastword and re.match(r'(:|\?|!|\—|\(|\)||"| )', lastword):

    	lastword = capitalise_word(lastword)

    return ' '.join(words)

def capitalise_word(word):
    ''' loose implementation of RYM's capitalisation standards '''

    if word.lower() in ['a', 'an', 'and', 'at', 'but', 'by', 'cetera ', 'et', 'etc.', 'for', 'in', 'nor', 'of', 'on', 'or', 'the', 'to', 'v.', 'versus', 'vs.', 'yet']:
        return word.lower()
    elif word.lower() in ['am', 'are', 'as', 'be', 'been', 'from', 'he', 'if', 'into', 'is', 'it', 'she', 'so', 'upon', 'was', 'we', 'were', 'with']:
        return word.capitalize()
    elif word.lower() == 'khz':
        return 'kHz'
    elif is_roman_numeral(word) or always_upper(word):
        return word.upper()
        # if it doesn't meet any of these special conditions. capitalise it taking into account first alpha character as capitalisation candidate
        return capitalise_first_word(word)

def rymify(sentence):
    ''' Breaks a sentence down into words and capitalises each according to capitalise_word() '''

    if not sentence:  # empty sentence check
        return ''

    parts = re.split(r'(:|\?|!|\—|\(|\)|"| )', sentence)
    for i in range(len(parts)):
        if parts[i] and not re.match(r'(:|\?|!|\—|\(|\)|"| )', parts[i]):

            parts[i] = capitalise_word(parts[i])
    # Join parts while maintaining original spacing
    capitalised_sentence = ''.join(parts)
    # Capitalize last word
    capitalised_sentence = capitalise_last_word(capitalised_sentence)
    return capitalised_sentence

Here’s a sample call to see how it transforms the input string.

x = '[[bbc gHost BBc sTories KHZ iii)'


The code appears to yield the desired results, but given I’m a Python newcomer I was wondering in what ways it might be optimised to be able to process hundreds of thousands of unique strings efficiently.