My python discord bot isn't detecting words on my lists when they are included in a sentence

Miner_OfThePigs · September 22, 2025, 5:14pm

Currently I’m trying to use segmentation to insure words are detected even when they are included in sentences with out spaces like “ilikecats” but currently it doesn’t detect words unless they are said by themselves

def normalize_text(text):
    # Normalize Unicode characters (e.g., accent removal)
    normalized = unicodedata.normalize('NFD', text)
    ascii_text = ''.join(char for char in normalized if unicodedata.category(char) != 'Mn')
    
    # Generate all possible substitutions based on the mapping
    possible_texts = [ascii_text]  # Start with the original text as one possibility
    for symbol, replacements in lookalike_mappings.items():
        new_texts = []
        for t in possible_texts:
            if symbol in t:  # Only apply replacements if symbol is in the text
                for replacement in replacements:
                    new_texts.append(t.replace(symbol, replacement))
            else:
                new_texts.append(t)  # Keep the original text as it is
        possible_texts = new_texts  # Update possible_texts with new variations

    return possible_texts

def segment_words(text, word_dict):
    """Segment a string of concatenated words into individual words using a dictionary."""
    # Normalize the text first
    normalized_texts = normalize_text(text)
    
    segments = []
    for normalized_text in normalized_texts:
        n = len(normalized_text)
        memo = [-1] * (n + 1)  # To memoize the positions of valid words

        # Dynamic programming to break the text into valid words
        def find_segments(start):
            # If we've reached the end of the text, return True (end of recursion)
            if start == n:
                return True

            # If already computed, use the memoized result
            if memo[start] != -1:
                return memo[start] == 1

            for end in range(start + 1, n + 1):
                # Check if the substring normalized_text[start:end] is a valid word in the Trie
                if triedict.search(normalized_text[start:end]):
                    # If valid, proceed to the next segment
                    if find_segments(end):
                        segments.append(normalized_text[start:end])  # Add the valid word
                        memo[start] = 1  # Mark as valid
                        return True

            memo[start] = 0  # Mark as invalid position
            return False
        
        # Start the segmentation from the beginning of the normalized text
        find_segments(0)

    return segments

here is the code I’m attempting to use for normalization and segmentation of words