import { decode } from 'html-entities';

const STOP_WORDS = new Set([
  'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from',
  'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the',
  'to', 'was', 'were', 'will', 'with'
]);

export function removeStopWords(text: string): string {
  if (!text) return '';
  return text
    .split(' ')
    .filter(word => !STOP_WORDS.has(word.toLowerCase()))
    .join(' ');
}

export function sanitizeText(text: string): string {
  if (!text) return '';
  try {
    return decode(text)
    .replace(/[^\w\s-]/g, '')
    .replace(/\s+/g, ' ')
    .trim();
  } catch (error) {
    console.warn('Error decoding text:', error);
    return text
      .replace(/[^\w\s-]/g, '')
      .replace(/\s+/g, ' ')
      .trim();
  }
}

export function truncateText(text: string, maxLength: number): string {
  if (!text) return '';
  if (text.length <= maxLength) return text;
  return text.substring(0, maxLength - 3) + '...';
}