import { DocHeading } from 'blooksy-backend';

export const extractHeadings = (htmlContent: string): DocHeading[] => {
  const parser = new DOMParser();
  const doc = parser.parseFromString(htmlContent, 'text/html');
  const headingsElements = doc.querySelectorAll('h2, h3, h4, h5');

  const headings: DocHeading[] = [];
  headingsElements.forEach(headingEl => {
    const headingLevelMatch = headingEl.tagName.match(/\d/);
    const level = headingLevelMatch ? Number(headingLevelMatch[0]) : undefined;
    const value = headingEl.textContent;

    if (level !== undefined && [2, 3, 4, 5].includes(level) && typeof value === 'string') {
      headings.push({ level, value });
    }
  });

  return headings;
};

export const extractTableOrImgTitles = (tag: 'table' | 'img', htmlContent: string): string[] => {
  const parser = new DOMParser();
  const doc = parser.parseFromString(htmlContent, 'text/html');
  const elements = doc.querySelectorAll(tag);
  const titles: string[] = [];

  elements.forEach(element => {
    const parent = element.parentElement;
    const parentIsFigure = parent?.nodeName === 'FIGURE';

    const parentSibling = parent?.previousSibling;
    const elementTitle = parentSibling?.textContent;
    const titleNotEmpty = elementTitle && elementTitle.trim() !== '';

    parentIsFigure && titleNotEmpty && titles.push(elementTitle);
  });

  return titles;
};
