/** * Component: Intelligent Ranking Algorithm * Documentation: documentation/phase3/ranking-algorithm.md */ import { compareTwoStrings } from 'string-similarity'; export interface TorrentResult { indexer: string; indexerId?: number; title: string; size: number; seeders: number; leechers: number; publishDate: Date; downloadUrl: string; infoHash?: string; guid: string; format?: 'M4B' | 'M4A' | 'MP3' | 'OTHER'; bitrate?: string; hasChapters?: boolean; flags?: string[]; // Indexer flags like "Freeleech", "Internal", etc. } export interface AudiobookRequest { title: string; author: string; narrator?: string; durationMinutes?: number; } export interface IndexerFlagConfig { name: string; // Flag name (e.g., "Freeleech") modifier: number; // -100 to 100 (percentage) } export interface BonusModifier { type: 'indexer_priority' | 'indexer_flag' | 'custom'; value: number; // Multiplier (e.g., 0.4 for 40%) points: number; // Calculated bonus points from this modifier reason: string; // Human-readable explanation } export interface ScoreBreakdown { formatScore: number; seederScore: number; sizeScore: number; matchScore: number; totalScore: number; notes: string[]; } export interface RankedTorrent extends TorrentResult { score: number; // Base score (0-100) bonusModifiers: BonusModifier[]; bonusPoints: number; // Sum of all bonus points finalScore: number; // score + bonusPoints rank: number; breakdown: ScoreBreakdown; } export class RankingAlgorithm { /** * Rank all torrents and return sorted by finalScore (best first) * @param torrents - Array of torrent results to rank * @param audiobook - Audiobook request details for matching * @param indexerPriorities - Optional map of indexerId to priority (1-25), defaults to 10 * @param flagConfigs - Optional array of flag configurations for bonus/penalty modifiers */ rankTorrents( torrents: TorrentResult[], audiobook: AudiobookRequest, indexerPriorities?: Map, flagConfigs?: IndexerFlagConfig[] ): RankedTorrent[] { const ranked = torrents.map((torrent) => { // Calculate base scores (0-100) const formatScore = this.scoreFormat(torrent); const seederScore = this.scoreSeeders(torrent.seeders); const sizeScore = this.scoreSize(torrent.size, audiobook.durationMinutes); const matchScore = this.scoreMatch(torrent, audiobook); const baseScore = formatScore + seederScore + sizeScore + matchScore; // Calculate bonus modifiers const bonusModifiers: BonusModifier[] = []; // Indexer priority bonus (default: 10/25 = 40%) if (torrent.indexerId !== undefined) { const priority = indexerPriorities?.get(torrent.indexerId) ?? 10; const modifier = priority / 25; // Convert 1-25 to 0.04-1.0 (4%-100%) const points = baseScore * modifier; bonusModifiers.push({ type: 'indexer_priority', value: modifier, points: points, reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`, }); } // Flag bonuses/penalties if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) { torrent.flags.forEach(torrentFlag => { // Case-insensitive, whitespace-trimmed matching const matchingConfig = flagConfigs.find(cfg => cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase() ); if (matchingConfig) { const modifier = matchingConfig.modifier / 100; // Convert -100 to 100 → -1.0 to 1.0 const points = baseScore * modifier; bonusModifiers.push({ type: 'indexer_flag', value: modifier, points: points, reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`, }); } }); } // Sum all bonus points const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0); // Calculate final score const finalScore = baseScore + bonusPoints; return { ...torrent, score: baseScore, bonusModifiers, bonusPoints, finalScore, rank: 0, // Will be assigned after sorting breakdown: { formatScore, seederScore, sizeScore, matchScore, totalScore: baseScore, notes: this.generateNotes(torrent, { formatScore, seederScore, sizeScore, matchScore, totalScore: baseScore, notes: [], }), }, }; }); // Sort by finalScore descending (best first), then by publishDate descending (newest first) for tiebreakers ranked.sort((a, b) => { // Primary: sort by final score if (b.finalScore !== a.finalScore) { return b.finalScore - a.finalScore; } // Tiebreaker: sort by publishDate (newest first) return b.publishDate.getTime() - a.publishDate.getTime(); }); // Assign ranks ranked.forEach((r, index) => { r.rank = index + 1; }); return ranked; } /** * Get detailed scoring breakdown for a torrent */ getScoreBreakdown( torrent: TorrentResult, audiobook: AudiobookRequest ): ScoreBreakdown { const formatScore = this.scoreFormat(torrent); const seederScore = this.scoreSeeders(torrent.seeders); const sizeScore = this.scoreSize(torrent.size, audiobook.durationMinutes); const matchScore = this.scoreMatch(torrent, audiobook); const totalScore = formatScore + seederScore + sizeScore + matchScore; return { formatScore, seederScore, sizeScore, matchScore, totalScore, notes: this.generateNotes(torrent, { formatScore, seederScore, sizeScore, matchScore, totalScore, notes: [], }), }; } /** * Score format quality (25 points max) * M4B with chapters: 25 pts * M4B without chapters: 22 pts * M4A: 16 pts * MP3: 10 pts * Other: 3 pts */ private scoreFormat(torrent: TorrentResult): number { const format = this.detectFormat(torrent); switch (format) { case 'M4B': return torrent.hasChapters !== false ? 25 : 22; case 'M4A': return 16; case 'MP3': return 10; default: return 3; } } /** * Score seeder count (15 points max) * Logarithmic scaling: * 1 seeder: 0 points * 10 seeders: 6 points * 100 seeders: 12 points * 1000+ seeders: 15 points */ private scoreSeeders(seeders: number): number { if (seeders === 0) return 0; return Math.min(15, Math.log10(seeders + 1) * 6); } /** * Score size reasonableness (10 points max) * Expected: 1-2 MB per minute (64-128 kbps) * Perfect match: 10 points * Too small/large: Reduced points */ private scoreSize(size: number, durationMinutes?: number): number { if (!durationMinutes) { return 5; // Neutral score if duration unknown } // Expected size: 1-2 MB per minute const minExpected = durationMinutes * 1024 * 1024; // 1 MB/min const maxExpected = durationMinutes * 2 * 1024 * 1024; // 2 MB/min if (size >= minExpected && size <= maxExpected) { return 10; // Perfect size } // Calculate deviation penalty const deviation = size < minExpected ? (minExpected - size) / minExpected : (size - maxExpected) / maxExpected; return Math.max(0, 10 - deviation * 10); } /** * Score title/author match quality (50 points max) * Title similarity: 0-35 points (heavily weighted!) * Author presence: 0-15 points */ private scoreMatch( torrent: TorrentResult, audiobook: AudiobookRequest ): number { const torrentTitle = torrent.title.toLowerCase(); const requestTitle = audiobook.title.toLowerCase(); const requestAuthor = audiobook.author.toLowerCase(); // ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ========== // Extract significant words (filter out common stop words) const stopWords = ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for']; const extractWords = (text: string, stopList: string[]): string[] => { return text .toLowerCase() .replace(/[^\w\s]/g, ' ') // Remove punctuation .split(/\s+/) .filter(word => word.length > 0 && !stopList.includes(word)); }; // Separate required words (outside parentheses/brackets) from optional words (inside) // This handles common patterns like "Title (Subtitle)" where subtitle may be omitted const separateRequiredOptional = (title: string): { required: string; optional: string } => { // Extract content in parentheses/brackets as optional const optionalPattern = /[(\[{]([^)\]}]+)[)\]}]/g; const optionalMatches: string[] = []; let match; while ((match = optionalPattern.exec(title)) !== null) { optionalMatches.push(match[1]); } // Remove parenthetical/bracketed content to get required portion const required = title.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim(); const optional = optionalMatches.join(' '); return { required, optional }; }; const { required: requiredTitle, optional: optionalTitle } = separateRequiredOptional(requestTitle); // Extract words from required portion only for coverage check const requiredWords = extractWords(requiredTitle, stopWords); const torrentWords = extractWords(torrentTitle, stopWords); // Calculate word coverage: how many REQUIRED words appear in TORRENT if (requiredWords.length === 0) { // Edge case: title is only stop words or only optional content, skip filter // Fall through to normal scoring } else { const matchedWords = requiredWords.filter(word => torrentWords.includes(word)); const coverage = matchedWords.length / requiredWords.length; // HARD REQUIREMENT: Must have 80%+ coverage of REQUIRED words if (coverage < 0.80) { // Automatic rejection - doesn't contain enough of the requested words return 0; } } // ========== STAGE 2: TITLE MATCHING (0-35 points) ========== let titleScore = 0; if (torrentTitle.includes(requestTitle)) { // Found the title, but is it the complete title or part of a longer one? const titleIndex = torrentTitle.indexOf(requestTitle); const beforeTitle = torrentTitle.substring(0, titleIndex); const afterTitle = torrentTitle.substring(titleIndex + requestTitle.length); // Extract significant words BEFORE the matched title const beforeWords = extractWords(beforeTitle, stopWords); // Title is complete if: // 1. No significant words before it (not "This Inevitable Ruin" + "Dungeon Crawler Carl") // 2. Followed by clear metadata markers (not "'s Secret" or " Is Watching") const metadataMarkers = [' by ', ' - ', ' [', ' (', ' {', ' :', ',']; const hasNoWordsPrefix = beforeWords.length === 0; const hasMetadataSuffix = afterTitle === '' || metadataMarkers.some(marker => afterTitle.startsWith(marker)); const isCompleteTitle = hasNoWordsPrefix && hasMetadataSuffix; if (isCompleteTitle) { // Complete title match → full points titleScore = 35; } else { // Title has prefix words OR continues with more words // This is likely a different book in a series → use fuzzy similarity titleScore = compareTwoStrings(requestTitle, torrentTitle) * 35; } } else { // No substring match at all → use fuzzy similarity titleScore = compareTwoStrings(requestTitle, torrentTitle) * 35; } // ========== STAGE 3: AUTHOR MATCHING (0-15 points) ========== // Parse requested authors (split on separators, filter out roles) const requestAuthors = requestAuthor .split(/,|&| and | - /) .map(a => a.trim()) .filter(a => a.length > 2 && !['translator', 'narrator'].includes(a)); // Check how many authors appear in torrent title (exact substring match) const authorMatches = requestAuthors.filter(author => torrentTitle.includes(author) ); let authorScore = 0; if (authorMatches.length > 0) { // Exact substring match → proportional credit authorScore = (authorMatches.length / requestAuthors.length) * 15; } else { // No exact match → use fuzzy similarity for partial credit authorScore = compareTwoStrings(requestAuthor, torrentTitle) * 15; } return Math.min(50, titleScore + authorScore); } /** * Detect format from torrent title */ private detectFormat(torrent: TorrentResult): 'M4B' | 'M4A' | 'MP3' | 'OTHER' { // Use explicit format if provided if (torrent.format) { return torrent.format; } const title = torrent.title.toUpperCase(); // Check for format keywords in title if (title.includes('M4B')) return 'M4B'; if (title.includes('M4A')) return 'M4A'; if (title.includes('MP3')) return 'MP3'; // Default to OTHER if no format detected return 'OTHER'; } /** * Generate human-readable notes about scoring */ private generateNotes( torrent: TorrentResult, breakdown: ScoreBreakdown ): string[] { const notes: string[] = []; // Format notes const format = this.detectFormat(torrent); if (format === 'M4B') { notes.push('Excellent format (M4B)'); if (torrent.hasChapters !== false) { notes.push('Has chapter markers'); } } else if (format === 'M4A') { notes.push('Good format (M4A)'); } else if (format === 'MP3') { notes.push('Acceptable format (MP3)'); } else { notes.push('Unknown or uncommon format'); } // Seeder notes if (torrent.seeders === 0) { notes.push('⚠️ No seeders available'); } else if (torrent.seeders < 5) { notes.push(`Low seeders (${torrent.seeders})`); } else if (torrent.seeders >= 50) { notes.push(`Excellent availability (${torrent.seeders} seeders)`); } // Size notes if (breakdown.sizeScore < 5) { notes.push('⚠️ Unusual file size'); } // Match notes (now worth 50 points!) if (breakdown.matchScore < 20) { notes.push('⚠️ Poor title/author match'); } else if (breakdown.matchScore < 35) { notes.push('⚠️ Weak title/author match'); } else if (breakdown.matchScore >= 45) { notes.push('✓ Excellent title/author match'); } // Overall quality assessment if (breakdown.totalScore >= 75) { notes.push('✓ Excellent choice'); } else if (breakdown.totalScore >= 55) { notes.push('✓ Good choice'); } else if (breakdown.totalScore < 35) { notes.push('⚠️ Consider reviewing this choice'); } return notes; } } // Singleton instance let ranker: RankingAlgorithm | null = null; export function getRankingAlgorithm(): RankingAlgorithm { if (!ranker) { ranker = new RankingAlgorithm(); } return ranker; } /** * Helper function to rank torrents using the singleton instance */ export function rankTorrents( torrents: TorrentResult[], audiobook: AudiobookRequest, indexerPriorities?: Map, flagConfigs?: IndexerFlagConfig[] ): (RankedTorrent & { qualityScore: number })[] { const algorithm = getRankingAlgorithm(); const ranked = algorithm.rankTorrents(torrents, audiobook, indexerPriorities, flagConfigs); // Add qualityScore field for UI compatibility (rounded score) return ranked.map((r) => ({ ...r, qualityScore: Math.round(r.score), })); }