/** Takes in HTML paste and makes a best effort to convert it to plain text, preserving formatting like bullet points when possible */
export async function htmlToPlainText(html: string) {
  const temp = document.createElement('div');
  temp.innerHTML = html;

  let lineBreakWasLastThingInserted = false;
  function processNode(node: Node, depth: number | null = null) {
    let result = '';

    if (node.nodeType === Node.TEXT_NODE) {
      const textContent = (node as Text).textContent || '';

      if (textContent === '\n' && lineBreakWasLastThingInserted === true) {
        // this entire text node is a newline
        lineBreakWasLastThingInserted = true;
        return result; // return early if it's just a newline and we already inserted one last
      } else if (textContent.endsWith('\n')) {
        // this is text with a line break at the end, include it in the result
        lineBreakWasLastThingInserted = true;
      } else {
        // no line break at the end of this text node
        lineBreakWasLastThingInserted = false;
      }
      // Add the text to the result
      result += textContent;
    } else if (node.nodeType === Node.ELEMENT_NODE) {
      const tagName = (node as HTMLElement).tagName.toLowerCase();
      if (elementsToSkip.has(tagName)) {
        // skip this element
        return result;
      }

      let appliedDepth = depth;

      // ----- Before the tag processing -----
      if (tagName === 'br') {
        result += '\n'; // br always inserts a line break
      } else if (tagName === 'li') {
        result += '  '.repeat(appliedDepth ?? 0) + '• '; // Indent based on depth
      } else if (tagName === 'ul' || tagName === 'ol') {
        // set depth to 0 if this is the first ul/ol, otherwise increment
        appliedDepth = appliedDepth === null ? 0 : appliedDepth + 1;
      }

      // ----- Process child nodes -----
      for (const childNode of node.childNodes) {
        result += processNode(childNode, appliedDepth);
      }

      // ----- After the tag processing -----
      // If a block element is ending, add a newline
      if (blockElements.has(tagName) && lineBreakWasLastThingInserted === false) {
        result += '\n';
        lineBreakWasLastThingInserted = true;
      }
    }

    return result;
  }

  // Process the entire HTML content
  let plainText = processNode(temp);
  // Clean up extra whitespace while preserving intentional line breaks
  plainText = plainText.replace(/\n{3,}/g, '\n\n');

  return plainText;
}

const blockElements = new Set([
  'p',
  'div',
  'ul',
  'ol',
  'li',
  'blockquote',
  'h1',
  'h2',
  'h3',
  'h4',
  'h5',
  'h6',
  'pre',
  'code',
  'hr',
  'table',
  'tr',
  'td',
  'th',
  'thead',
  'tbody',
  'tfoot',
  'figure',
  'figcaption',
  'article',
  'section',
  'aside',
  'header',
  'footer',
  'nav',
  'address',
  'dl',
  'dt',
  'dd',
]);

const elementsToSkip = new Set([
  'style',
  'script',
  'meta',
  'link',
  'head',
  'title',
  'noscript',
  'iframe',
  'svg',
  'canvas',
  'audio',
  'video',
  'track',
  'embed',
  'object',
  'param',
  'source',
  'picture',
  'map',
  'area',
  'base',
]);
