import { type RealAiMessage, type SourceLink } from 'TutorBot';
import {
    isDisplayableNonLessonFrameSourceLocation,
    isLinkToLessonInStream,
    sourceLinkTitle,
} from './sourceLocationHelpers';

/*
    There are 2 ways to add links in markdown:

    1. [Title](url)
    2.
        [Title][reference]

        [reference]: url

    When ChatGPT references a document in a message, it cannot include the url itself, because back_royal is responsible for
    figuring out the url that is appropriate to the user. So, we tell ChatGPT to reference documents like this

        [Title][source:0]

    `addFootnotesToMessage`, then, needs to do a few things:

    1. Add footnotes for each document at the bottom of the message. The message will use indexes use indexes
        [source:0] rather than ids [source:bc0e1b67-1152-5fc8-ae7e-277e569772a3] to reference documents. So, the code here needs to
        map from indexes to sources ids. (Tutorbot uses indexes when talking to ChatGPT in order to save tokens).

        In order to do the mapping from source ids to the footnote references in the message, the message returned from tutorbot
        returns a footnoteReferences object, with entries like {'bc0e1b67-1152-5fc8-ae7e-277e569772a3': 'source:0'}.

    2. ChatGPT might ignore us and format the link like [Title](source:0). That format doesn't work with footnotes, so we need to
        convert those.

    3. ChatGPT might reference multiple sources in one link, like [Title](source:1,2,3)

    4. ChatGPT might misformat the source id, like [Title](source: Document ID 1)

    It might seem like we could do most of this work in tutorbot before the message gets sent. Tutorbot could do the regex replacement
    that is handled in steps 2 and 3. And Tutorbot could replace the inline [source:0] with [source:ID] so that back_royal never has
    to know about the indexes. Theoretically it's true that Tutorbot could do it, but the fact that we're streaming the answer back makes
    it harder than it would be if we were generating a complete answer and then sending it back.

    This stuff is tested in Message.stories.tsx, BotMessageWithInlineLinks
*/

type Footnote = {
    footnote: string;
    footnoteReference: string;
};

function getFootnotes(footnoteReferences: RealAiMessage['footnoteReferences'], sourceLinks: SourceLink[]): Footnote[] {
    return sourceLinks
        .map(sourceLink =>
            sourceLink.sourceIds.map(sourceId => {
                const footnoteReference = footnoteReferences![sourceId];
                let url;
                if (isLinkToLessonInStream(sourceLink.link)) {
                    url = sourceLink.link.lessonLaunchPath;
                } else if (isDisplayableNonLessonFrameSourceLocation(sourceLink.link)) {
                    url = sourceLink.link.url;
                } else {
                    throw new Error('Unnexpected link type');
                }
                return {
                    footnoteReference,
                    footnote: `[${footnoteReference}]:${url}`,
                };
            }),
        )
        .flat();
}

function handleMisformattedLinks(
    messageContent: string,
    footnoteReferences: RealAiMessage['footnoteReferences'],
    sourceLinks: SourceLink[],
) {
    let preparedContent = messageContent;

    // This loop of regexes replaces cases where ChatGPT used the source's title as the reference
    // instead of the source's index, like [Some Document][source:Some Document]
    sourceLinks.forEach(sourceLink => {
        if (!footnoteReferences) return;

        // sourceIds should never be empty, so in practice sourceId will always be a
        // string here. This is guaranteed because the SourceMetadata type requires an
        // id and the SourceLink is built from that.
        // But, might as well be defensive since the SourceLink type itself cannot guarantee
        // that sourceIds[0] will be present and, even if it could, this data is coming down
        // from a service and isn't really guaranteed to be correct.
        const sourceId = sourceLink.sourceIds[0];
        if (!sourceId) return;
        const footnoteReference = footnoteReferences[sourceId];
        if (!footnoteReference) return;
        const title = sourceLinkTitle(sourceLink);
        const regex = new RegExp(`\\[source:\\s?${title}\\]`, 'g');
        preparedContent = preparedContent.replace(regex, `[${title}][${footnoteReference}]`);
    });

    // this regex fixes a case where ChatGPT does [Document ID: 1] instead of [source:1]
    preparedContent = preparedContent.replace(/([[(])Document ID: (\d)([\])])/g, '$1source:$2$3');

    // This regex replaces a title like [source:Document ID 1] with just [(link)]. We could try
    // to put in the title, but often ChatGPT already included the title, and just didn't wrap
    // it in brackets, like "Here is a link to the document titled \"Debits & Credits\" [source: Document 0](doc-0)"
    // See https://trello.com/c/O12pvJrP
    preparedContent = preparedContent.replace(/\[source: [^\]]+\]([[(])/g, '[(link)]$1');

    // This regex replace a case where the source is in the format (doc-1) instead of [source:1]. See https://trello.com/c/O12pvJrP
    preparedContent = preparedContent.replace(/\(\w+-(\d)\)/g, '[source:$1]');

    // This regex replaces a misformatted id, like (source: Document ID 1) with a properly formatted one, like (source:1)
    preparedContent = preparedContent.replace(/source:[^\d)\]]+(\d)/, 'source:$1');

    // This regex removes an untitled link that ChatGPT just attaches to the end of the message, like
    preparedContent = preparedContent.replace(/ \[sources?:\d\](.?)$/g, '$1');

    // This regex fixes two situations:
    // 1. ChatGPT uses the [Title](url) format instead of the [Title][reference] format
    // 2. ChatGPT includes multiple source ids in a single reference (we just use the 1st one)
    preparedContent = preparedContent.replace(/[[(]sources?:(\d)[,\d]*[\])]/g, '[source:$1]');

    // this regex fixes the case where ChatGPT prefixed the title with "Document 1:"
    preparedContent = preparedContent.replace(/\[Document \d: ([^\]])/g, '[$1');

    // This regex handles cases where ChatGPT referenced a document without giving the
    // link a title, like [source:0]. In that case, we convert to [(link)](source:0). We could try
    // to put in the title, but often ChatGPT already included the title, but just didn't wrap
    // it in brackets, like "refer to The Blah Blah Document [source:0]"
    preparedContent = preparedContent.replace(/([^\]])\[(source[^\]]+)]/g, '$1[(link)][$2]');

    return preparedContent;
}

function addDocumentTitlesWhereMissing(
    messageContent: string,
    footnoteReferences: RealAiMessage['footnoteReferences'],
    sourceLinks: SourceLink[],
) {
    if (!footnoteReferences) {
        return messageContent;
    }
    let preparedContent = messageContent;
    Object.entries(footnoteReferences).forEach(([sourceId, sourceReference]) => {
        const sourceIndex = sourceReference.split(':')[1];
        if (!sourceIndex) return;
        const sourceLink = sourceLinks.find(sl => sl.sourceIds.map(id => id).includes(sourceId));
        if (!sourceLink) return;
        const title = sourceLinkTitle(sourceLink);
        const regex = new RegExp(`Document ${sourceIndex}`, 'g');
        preparedContent = preparedContent.replace(regex, title);
    });

    return preparedContent;
}

export function addFootnotesToMessage(
    messageContent: RealAiMessage['content'],
    footnoteReferences: RealAiMessage['footnoteReferences'],
    sourceLinks: SourceLink[],
) {
    if (!footnoteReferences) {
        return messageContent;
    }

    // footnotes is an array of objects like:
    /*
        {
            footnoteReference: 'source:0',
            footnote: '[source:0]:https://www.example.com'
        }
    */
    const footnotes = getFootnotes(footnoteReferences, sourceLinks);

    let preparedContent = handleMisformattedLinks(messageContent, footnoteReferences, sourceLinks);
    preparedContent = addDocumentTitlesWhereMissing(preparedContent, footnoteReferences, sourceLinks);

    // Build a list of footnotes
    const footNotesString = footnotes.map(f => f.footnote).join('\n');

    // Add the footnotes to the bottom of the message. Markdown requires 2 line breaks before the first footnote.
    // There might already be some footnotes that ChatGPT added to reference urls directly. We can just add more
    // underneath any existing ones.
    preparedContent = [preparedContent, footNotesString].filter(el => el).join('\n\n');

    return preparedContent;
}
