import {
    type TranscriptionMessage,
    type TranscriptionSegment,
    type TranscriptionSegmentMap,
} from './useStoreAudioTranscriptions.types';

/*
mergeTranscriptionSegmentsIntoMessages is the second step in storing audio transcription messages.

Livekit logs segments of audio transcriptions. If a participant stops talking for a moment and then
starts again, this can end up being split into multiple segments. mergeTranscriptionSegmentsIntoMessages
merges consecutive segments for a participant into a single message
*/

function sortSegmentsConsecutively(segments: TranscriptionSegmentMap): TranscriptionSegment[] {
    return Object.values(segments).sort((a, b) => a.firstReceivedTime - b.firstReceivedTime);
}

function initializeTranscriptionMessage(firstSegment: TranscriptionSegment): TranscriptionMessage {
    return {
        // We need a consistent id on the TranscriptionMessage so that we know whether or not
        // we have already stored it (See `unstoredTranscriptionMessages` in useStoredTranscriptionMessages).
        // Unfortunately, firstSegment.id is not a uuid. We could use `sha256()`, but that's asynchronous, which
        // isn't really convenient here. Instead, we let the string id from livekit go all the way through
        // to the server, and then we convert it in tutor_bot_message.rb::uuid_from_event
        id: firstSegment.id,
        conversationId: firstSegment.conversationId,
        participant: firstSegment.participant,
        text: firstSegment.text,
        segments: [firstSegment],
    };
}

export function mergeTranscriptionSegmentsIntoMessages(segments: TranscriptionSegmentMap): TranscriptionMessage[] {
    return sortSegmentsConsecutively(segments).reduce<TranscriptionMessage[]>((acc, segment) => {
        const prevMessage = acc[acc.length - 1];

        // If there is already an active message for this participant that has not been stored yet, then add to it
        if (prevMessage?.participant === segment.participant) {
            // Check if the previous message ends with whitespace
            const endsWithWhitespace = /\s$/.test(prevMessage.text);

            // Only add a space if the previous message doesn't end with whitespace
            if (endsWithWhitespace) {
                prevMessage.text += segment.text;
            } else {
                prevMessage.text += ` ${segment.text}`;
            }

            // Note that we intentionally preserve any trailing whitespace on the segment, since sometimes it
            // is a newline and maybe that implies something meaningful like a pause in speech.  We trim() the
            // trailing whitespace on the last segment of the message in `useStoreTranscriptionMessages`.
            prevMessage.segments.push(segment);
        } else {
            acc.push(initializeTranscriptionMessage(segment));
        }
        return acc;
    }, []);
}
