Issues generating a wordcloud with a subscript in a word

Hello,

I am using wordcloud. When I am using superscripts in a word the word cloud is correctly generated but when I am using subscripts the word displays a square at the end (instead of the subscript). Has anyone a workaround? Thanks!
Example code:


def superscript(n):
    return "".join(["⁰¹²³⁴⁵⁶⁷⁸⁹"[ord(c)-ord('0')] for c in str(n)]) 


def subscript(n):
    return ''.join(['₀₁₂₃₄₅₆₇₈₉'[int(char)] for char in str(n)])


text = 'Close'
text = text + subscript(2)  # Apply superscript to '2'
text2 = 'Distant'
text2= text2 + superscript(1)

sorted_feature_renamed = [f.replace('PCL', text).replace('PNCL', text2) for f in sorted_feature]

loading = pca_utilities.word_clouds(sorted_rotated_loadings,sorted_feature_renamed,figsize=(10,10))
plt.savefig("wordcloud.png", transparent=True)
plt.show()


def word_clouds(loadings, feature,figsize):
    """
    Function to return 1) wordclouds.pngs (saved by default) 2) .csvs containg colour codes & weightings used to make wordclouds 
    """
    df = pd.DataFrame(loadings.T) # transpose
    # could easily put the following into a function:
    principle_vector = np.array(df, dtype =float) # turn df into array
    pv_in_hex= []
    vmax = np.abs(principle_vector).max() #get the maximum absolute value in array
    vmin = -vmax #minimu 
    for i in range(principle_vector.shape[1]): # loop through each column (compoenent)
        rescale = (principle_vector  [:,i] - vmin) / (vmax - vmin) # rescale scores 
        colors_hex = []
        for c in cm.RdBu_r(rescale): 
            colors_hex.append(mcolor.to_hex(c)) # adds colour codes (hex) to list
        pv_in_hex.append(colors_hex) # add all colour codes for each item on all components 
    colors_hex = np.array(pv_in_hex ).T 
    df_v_color = pd.DataFrame(colors_hex)
    # loops over compoentn loadings
    for col_index in df:
        absolute = df[col_index].abs() # make absolute 
        integer = 100 * absolute # make interger 
        integer = integer.astype(int) 
        concat = pd.concat([integer, df_v_color[col_index]], axis=1) # concatanate loadings and colours 
        concat.columns = ['freq', 'colours']
        concat.insert(1, 'labels', feature) # add labels (items) from feature list 

        freq_dict = dict(zip(concat.labels, concat.freq)) # where key: item and value: weighting
        colour_dict = dict(zip(concat.labels, concat.colours))# where key: itemm and value: colour
        def color_func(word, *args, **kwargs): #colour function to supply to wordcloud function.. don't ask !
            try:
                color = colour_dict[word]
            except KeyError:
                color = '#000000' # black
            return color
        # create wordcloud object
        wc = WordCloud(background_color="rgba(255, 255, 255, 0)", mode="RGBA", color_func=color_func, # background_color="white"
                    width=800, height=800, prefer_horizontal=1, 
                    min_font_size=8, max_font_size=200)
        # generate wordcloud from loadings in frequency dict
        wc = wc.generate_from_frequencies(freq_dict)
        plt.figure(figsize=figsize)
        plt.imshow(wc, interpolation="bilinear")
        plt.axis("off")
        wc.to_file(f"wordcloud_transparent{col_index}.png")
        plt.show()

It sounds like a font problem. You’d need to try a variety of different fonts to see which ones have all the required characters.

1 Like

Agreed. If your font renderer is good with fallbacks, you might be in luck - simply installing more fonts may help. I like having GNU Unifont as a wide-coverage font.

1 Like