ReadMeABook/src/lib/utils/ranking-algorithm.ts

/**
 * Component: Intelligent Ranking Algorithm
 * Documentation: documentation/phase3/ranking-algorithm.md
 */

import { compareTwoStrings } from 'string-similarity';

export interface TorrentResult {
  indexer: string;
  indexerId?: number;
  title: string;
  size: number;
  seeders: number;
  leechers: number;
  publishDate: Date;
  downloadUrl: string;
  infoHash?: string;
  guid: string;
  format?: 'M4B' | 'M4A' | 'MP3' | 'OTHER';
  bitrate?: string;
  hasChapters?: boolean;
  flags?: string[];     // Indexer flags like "Freeleech", "Internal", etc.
}

export interface AudiobookRequest {
  title: string;
  author: string;
  narrator?: string;
  durationMinutes?: number;
}

export interface IndexerFlagConfig {
  name: string;         // Flag name (e.g., "Freeleech")
  modifier: number;     // -100 to 100 (percentage)
}

export interface BonusModifier {
  type: 'indexer_priority' | 'indexer_flag' | 'custom';
  value: number;        // Multiplier (e.g., 0.4 for 40%)
  points: number;       // Calculated bonus points from this modifier
  reason: string;       // Human-readable explanation
}

export interface ScoreBreakdown {
  formatScore: number;
  seederScore: number;
  sizeScore: number;
  matchScore: number;
  totalScore: number;
  notes: string[];
}

export interface RankedTorrent extends TorrentResult {
  score: number;              // Base score (0-100)
  bonusModifiers: BonusModifier[];
  bonusPoints: number;        // Sum of all bonus points
  finalScore: number;         // score + bonusPoints
  rank: number;
  breakdown: ScoreBreakdown;
}

export class RankingAlgorithm {
  /**
   * Rank all torrents and return sorted by finalScore (best first)
   * @param torrents - Array of torrent results to rank
   * @param audiobook - Audiobook request details for matching
   * @param indexerPriorities - Optional map of indexerId to priority (1-25), defaults to 10
   * @param flagConfigs - Optional array of flag configurations for bonus/penalty modifiers
   */
  rankTorrents(
    torrents: TorrentResult[],
    audiobook: AudiobookRequest,
    indexerPriorities?: Map<number, number>,
    flagConfigs?: IndexerFlagConfig[]
  ): RankedTorrent[] {
    const ranked = torrents.map((torrent) => {
      // Calculate base scores (0-100)
      const formatScore = this.scoreFormat(torrent);
      const seederScore = this.scoreSeeders(torrent.seeders);
      const sizeScore = this.scoreSize(torrent.size, audiobook.durationMinutes);
      const matchScore = this.scoreMatch(torrent, audiobook);

      const baseScore = formatScore + seederScore + sizeScore + matchScore;

      // Calculate bonus modifiers
      const bonusModifiers: BonusModifier[] = [];

      // Indexer priority bonus (default: 10/25 = 40%)
      if (torrent.indexerId !== undefined) {
        const priority = indexerPriorities?.get(torrent.indexerId) ?? 10;
        const modifier = priority / 25;  // Convert 1-25 to 0.04-1.0 (4%-100%)
        const points = baseScore * modifier;

        bonusModifiers.push({
          type: 'indexer_priority',
          value: modifier,
          points: points,
          reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`,
        });
      }

      // Flag bonuses/penalties
      if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) {
        torrent.flags.forEach(torrentFlag => {
          // Case-insensitive, whitespace-trimmed matching
          const matchingConfig = flagConfigs.find(cfg =>
            cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase()
          );

          if (matchingConfig) {
            const modifier = matchingConfig.modifier / 100; // Convert -100 to 100 → -1.0 to 1.0
            const points = baseScore * modifier;

            bonusModifiers.push({
              type: 'indexer_flag',
              value: modifier,
              points: points,
              reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`,
            });
          }
        });
      }

      // Sum all bonus points
      const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0);

      // Calculate final score
      const finalScore = baseScore + bonusPoints;

      return {
        ...torrent,
        score: baseScore,
        bonusModifiers,
        bonusPoints,
        finalScore,
        rank: 0, // Will be assigned after sorting
        breakdown: {
          formatScore,
          seederScore,
          sizeScore,
          matchScore,
          totalScore: baseScore,
          notes: this.generateNotes(torrent, {
            formatScore,
            seederScore,
            sizeScore,
            matchScore,
            totalScore: baseScore,
            notes: [],
          }),
        },
      };
    });

    // Sort by finalScore descending (best first), then by publishDate descending (newest first) for tiebreakers
    ranked.sort((a, b) => {
      // Primary: sort by final score
      if (b.finalScore !== a.finalScore) {
        return b.finalScore - a.finalScore;
      }
      // Tiebreaker: sort by publishDate (newest first)
      return b.publishDate.getTime() - a.publishDate.getTime();
    });

    // Assign ranks
    ranked.forEach((r, index) => {
      r.rank = index + 1;
    });

    return ranked;
  }

  /**
   * Get detailed scoring breakdown for a torrent
   */
  getScoreBreakdown(
    torrent: TorrentResult,
    audiobook: AudiobookRequest
  ): ScoreBreakdown {
    const formatScore = this.scoreFormat(torrent);
    const seederScore = this.scoreSeeders(torrent.seeders);
    const sizeScore = this.scoreSize(torrent.size, audiobook.durationMinutes);
    const matchScore = this.scoreMatch(torrent, audiobook);
    const totalScore = formatScore + seederScore + sizeScore + matchScore;

    return {
      formatScore,
      seederScore,
      sizeScore,
      matchScore,
      totalScore,
      notes: this.generateNotes(torrent, {
        formatScore,
        seederScore,
        sizeScore,
        matchScore,
        totalScore,
        notes: [],
      }),
    };
  }

  /**
   * Score format quality (25 points max)
   * M4B with chapters: 25 pts
   * M4B without chapters: 22 pts
   * M4A: 16 pts
   * MP3: 10 pts
   * Other: 3 pts
   */
  private scoreFormat(torrent: TorrentResult): number {
    const format = this.detectFormat(torrent);

    switch (format) {
      case 'M4B':
        return torrent.hasChapters !== false ? 25 : 22;
      case 'M4A':
        return 16;
      case 'MP3':
        return 10;
      default:
        return 3;
    }
  }

  /**
   * Score seeder count (15 points max)
   * Logarithmic scaling:
   * 1 seeder: 0 points
   * 10 seeders: 6 points
   * 100 seeders: 12 points
   * 1000+ seeders: 15 points
   */
  private scoreSeeders(seeders: number): number {
    if (seeders === 0) return 0;
    return Math.min(15, Math.log10(seeders + 1) * 6);
  }

  /**
   * Score size reasonableness (10 points max)
   * Expected: 1-2 MB per minute (64-128 kbps)
   * Perfect match: 10 points
   * Too small/large: Reduced points
   */
  private scoreSize(size: number, durationMinutes?: number): number {
    if (!durationMinutes) {
      return 5; // Neutral score if duration unknown
    }

    // Expected size: 1-2 MB per minute
    const minExpected = durationMinutes * 1024 * 1024; // 1 MB/min
    const maxExpected = durationMinutes * 2 * 1024 * 1024; // 2 MB/min

    if (size >= minExpected && size <= maxExpected) {
      return 10; // Perfect size
    }

    // Calculate deviation penalty
    const deviation =
      size < minExpected
        ? (minExpected - size) / minExpected
        : (size - maxExpected) / maxExpected;

    return Math.max(0, 10 - deviation * 10);
  }

  /**
   * Score title/author match quality (50 points max)
   * Title similarity: 0-35 points (heavily weighted!)
   * Author presence: 0-15 points
   */
  private scoreMatch(
    torrent: TorrentResult,
    audiobook: AudiobookRequest
  ): number {
    const torrentTitle = torrent.title.toLowerCase();
    const requestTitle = audiobook.title.toLowerCase();
    const requestAuthor = audiobook.author.toLowerCase();

    // ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ==========
    // Extract significant words (filter out common stop words)
    const stopWords = ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for'];

    const extractWords = (text: string, stopList: string[]): string[] => {
      return text
        .toLowerCase()
        .replace(/[^\w\s]/g, ' ') // Remove punctuation
        .split(/\s+/)
        .filter(word => word.length > 0 && !stopList.includes(word));
    };

    // Separate required words (outside parentheses/brackets) from optional words (inside)
    // This handles common patterns like "Title (Subtitle)" where subtitle may be omitted
    const separateRequiredOptional = (title: string): { required: string; optional: string } => {
      // Extract content in parentheses/brackets as optional
      const optionalPattern = /[(\[{]([^)\]}]+)[)\]}]/g;
      const optionalMatches: string[] = [];
      let match;

      while ((match = optionalPattern.exec(title)) !== null) {
        optionalMatches.push(match[1]);
      }

      // Remove parenthetical/bracketed content to get required portion
      const required = title.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim();
      const optional = optionalMatches.join(' ');

      return { required, optional };
    };

    const { required: requiredTitle, optional: optionalTitle } = separateRequiredOptional(requestTitle);

    // Extract words from required portion only for coverage check
    const requiredWords = extractWords(requiredTitle, stopWords);
    const torrentWords = extractWords(torrentTitle, stopWords);

    // Calculate word coverage: how many REQUIRED words appear in TORRENT
    if (requiredWords.length === 0) {
      // Edge case: title is only stop words or only optional content, skip filter
      // Fall through to normal scoring
    } else {
      const matchedWords = requiredWords.filter(word => torrentWords.includes(word));
      const coverage = matchedWords.length / requiredWords.length;

      // HARD REQUIREMENT: Must have 80%+ coverage of REQUIRED words
      if (coverage < 0.80) {
        // Automatic rejection - doesn't contain enough of the requested words
        return 0;
      }
    }

    // ========== STAGE 2: TITLE MATCHING (0-35 points) ==========
    let titleScore = 0;
    if (torrentTitle.includes(requestTitle)) {
      // Found the title, but is it the complete title or part of a longer one?
      const titleIndex = torrentTitle.indexOf(requestTitle);
      const beforeTitle = torrentTitle.substring(0, titleIndex);
      const afterTitle = torrentTitle.substring(titleIndex + requestTitle.length);

      // Extract significant words BEFORE the matched title
      const beforeWords = extractWords(beforeTitle, stopWords);

      // Title is complete if:
      // 1. No significant words before it (not "This Inevitable Ruin" + "Dungeon Crawler Carl")
      // 2. Followed by clear metadata markers (not "'s Secret" or " Is Watching")
      const metadataMarkers = [' by ', ' - ', ' [', ' (', ' {', ' :', ','];
      const hasNoWordsPrefix = beforeWords.length === 0;
      const hasMetadataSuffix = afterTitle === '' ||
                                metadataMarkers.some(marker => afterTitle.startsWith(marker));

      const isCompleteTitle = hasNoWordsPrefix && hasMetadataSuffix;

      if (isCompleteTitle) {
        // Complete title match → full points
        titleScore = 35;
      } else {
        // Title has prefix words OR continues with more words
        // This is likely a different book in a series → use fuzzy similarity
        titleScore = compareTwoStrings(requestTitle, torrentTitle) * 35;
      }
    } else {
      // No substring match at all → use fuzzy similarity
      titleScore = compareTwoStrings(requestTitle, torrentTitle) * 35;
    }

    // ========== STAGE 3: AUTHOR MATCHING (0-15 points) ==========
    // Parse requested authors (split on separators, filter out roles)
    const requestAuthors = requestAuthor
      .split(/,|&| and | - /)
      .map(a => a.trim())
      .filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));

    // Check how many authors appear in torrent title (exact substring match)
    const authorMatches = requestAuthors.filter(author =>
      torrentTitle.includes(author)
    );

    let authorScore = 0;
    if (authorMatches.length > 0) {
      // Exact substring match → proportional credit
      authorScore = (authorMatches.length / requestAuthors.length) * 15;
    } else {
      // No exact match → use fuzzy similarity for partial credit
      authorScore = compareTwoStrings(requestAuthor, torrentTitle) * 15;
    }

    return Math.min(50, titleScore + authorScore);
  }

  /**
   * Detect format from torrent title
   */
  private detectFormat(torrent: TorrentResult): 'M4B' | 'M4A' | 'MP3' | 'OTHER' {
    // Use explicit format if provided
    if (torrent.format) {
      return torrent.format;
    }

    const title = torrent.title.toUpperCase();

    // Check for format keywords in title
    if (title.includes('M4B')) return 'M4B';
    if (title.includes('M4A')) return 'M4A';
    if (title.includes('MP3')) return 'MP3';

    // Default to OTHER if no format detected
    return 'OTHER';
  }

  /**
   * Generate human-readable notes about scoring
   */
  private generateNotes(
    torrent: TorrentResult,
    breakdown: ScoreBreakdown
  ): string[] {
    const notes: string[] = [];

    // Format notes
    const format = this.detectFormat(torrent);
    if (format === 'M4B') {
      notes.push('Excellent format (M4B)');
      if (torrent.hasChapters !== false) {
        notes.push('Has chapter markers');
      }
    } else if (format === 'M4A') {
      notes.push('Good format (M4A)');
    } else if (format === 'MP3') {
      notes.push('Acceptable format (MP3)');
    } else {
      notes.push('Unknown or uncommon format');
    }

    // Seeder notes
    if (torrent.seeders === 0) {
      notes.push('⚠️ No seeders available');
    } else if (torrent.seeders < 5) {
      notes.push(`Low seeders (${torrent.seeders})`);
    } else if (torrent.seeders >= 50) {
      notes.push(`Excellent availability (${torrent.seeders} seeders)`);
    }

    // Size notes
    if (breakdown.sizeScore < 5) {
      notes.push('⚠️ Unusual file size');
    }

    // Match notes (now worth 50 points!)
    if (breakdown.matchScore < 20) {
      notes.push('⚠️ Poor title/author match');
    } else if (breakdown.matchScore < 35) {
      notes.push('⚠️ Weak title/author match');
    } else if (breakdown.matchScore >= 45) {
      notes.push('✓ Excellent title/author match');
    }

    // Overall quality assessment
    if (breakdown.totalScore >= 75) {
      notes.push('✓ Excellent choice');
    } else if (breakdown.totalScore >= 55) {
      notes.push('✓ Good choice');
    } else if (breakdown.totalScore < 35) {
      notes.push('⚠️ Consider reviewing this choice');
    }

    return notes;
  }
}

// Singleton instance
let ranker: RankingAlgorithm | null = null;

export function getRankingAlgorithm(): RankingAlgorithm {
  if (!ranker) {
    ranker = new RankingAlgorithm();
  }
  return ranker;
}

/**
 * Helper function to rank torrents using the singleton instance
 */
export function rankTorrents(
  torrents: TorrentResult[],
  audiobook: AudiobookRequest,
  indexerPriorities?: Map<number, number>,
  flagConfigs?: IndexerFlagConfig[]
): (RankedTorrent & { qualityScore: number })[] {
  const algorithm = getRankingAlgorithm();
  const ranked = algorithm.rankTorrents(torrents, audiobook, indexerPriorities, flagConfigs);

  // Add qualityScore field for UI compatibility (rounded score)
  return ranked.map((r) => ({
    ...r,
    qualityScore: Math.round(r.score),
  }));
}