/** * Component: Intelligent Ranking Algorithm * Documentation: documentation/phase3/ranking-algorithm.md */ import { compareTwoStrings } from 'string-similarity'; export interface TorrentResult { indexer: string; indexerId?: number; title: string; size: number; seeders?: number; // Optional for NZB/Usenet results (no seeders concept) leechers?: number; // Optional for NZB/Usenet results (no leechers concept) publishDate: Date; downloadUrl: string; infoUrl?: string; // Link to indexer's info page (for user reference) infoHash?: string; guid: string; format?: 'M4B' | 'M4A' | 'MP3' | 'FLAC' | 'OTHER'; bitrate?: string; hasChapters?: boolean; flags?: string[]; // Indexer flags like "Freeleech", "Internal", etc. protocol?: string; // 'torrent' or 'usenet' - from Prowlarr API } export interface AudiobookRequest { title: string; author: string; narrator?: string; durationMinutes?: number; } export interface IndexerFlagConfig { name: string; // Flag name (e.g., "Freeleech") modifier: number; // -100 to 100 (percentage) } export interface RankTorrentsOptions { indexerPriorities?: Map; // indexerId -> priority (1-25) flagConfigs?: IndexerFlagConfig[]; // Flag bonus configurations requireAuthor?: boolean; // Enforce author presence check (default: true) stopWords?: string[]; // Language-specific stop words for matching characterReplacements?: Record; // Language-specific char replacements (e.g. ß→ss) } export interface EbookTorrentRequest { title: string; author: string; preferredFormat: string; // User's preferred format (epub, pdf, etc.) } export interface RankEbookTorrentsOptions { indexerPriorities?: Map; // indexerId -> priority (1-25) flagConfigs?: IndexerFlagConfig[]; // Flag bonus configurations requireAuthor?: boolean; // Enforce author presence check (default: true) stopWords?: string[]; // Language-specific stop words for matching characterReplacements?: Record; // Language-specific char replacements (e.g. ß→ss) } export interface BonusModifier { type: 'indexer_priority' | 'indexer_flag' | 'custom'; value: number; // Multiplier (e.g., 0.4 for 40%) points: number; // Calculated bonus points from this modifier reason: string; // Human-readable explanation } export interface ScoreBreakdown { formatScore: number; sizeScore: number; seederScore: number; matchScore: number; totalScore: number; notes: string[]; } export interface RankedTorrent extends TorrentResult { score: number; // Base score (0-100) bonusModifiers: BonusModifier[]; bonusPoints: number; // Sum of all bonus points finalScore: number; // score + bonusPoints rank: number; breakdown: ScoreBreakdown; } export interface EbookScoreBreakdown { formatScore: number; // 0-10 points (match preferred = 10, else 0) sizeScore: number; // 0-15 points (inverted - smaller is better) seederScore: number; // 0-15 points (same as audiobooks) matchScore: number; // 0-60 points (same as audiobooks) totalScore: number; notes: string[]; } export interface RankedEbookTorrent extends TorrentResult { score: number; // Base score (0-100) bonusModifiers: BonusModifier[]; bonusPoints: number; // Sum of all bonus points finalScore: number; // score + bonusPoints rank: number; breakdown: EbookScoreBreakdown; ebookFormat?: string; // Detected ebook format (epub, pdf, mobi, etc.) } export class RankingAlgorithm { /** * Rank all torrents and return sorted by finalScore (best first) * @param torrents - Array of torrent results to rank * @param audiobook - Audiobook request details for matching (includes durationMinutes for size scoring) * @param options - Optional configuration for ranking behavior */ rankTorrents( torrents: TorrentResult[], audiobook: AudiobookRequest, options: RankTorrentsOptions = {} ): RankedTorrent[] { const { indexerPriorities, flagConfigs, requireAuthor = true, // Safe default: require author in automatic mode stopWords, characterReplacements, } = options; // Filter out files < 20 MB (likely ebooks/samples) const filteredTorrents = torrents.filter((torrent) => { const sizeMB = torrent.size / (1024 * 1024); return sizeMB >= 20; }); const ranked = filteredTorrents.map((torrent) => { // Calculate base scores (0-100) const formatScore = this.scoreFormat(torrent); const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes); const seederScore = this.scoreSeeders(torrent.seeders); const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor, stopWords, characterReplacements); const baseScore = formatScore + sizeScore + seederScore + matchScore; // Calculate bonus modifiers const bonusModifiers: BonusModifier[] = []; // Indexer priority bonus (default: 10/25 = 40%) if (torrent.indexerId !== undefined) { const priority = indexerPriorities?.get(torrent.indexerId) ?? 10; const modifier = priority / 25; // Convert 1-25 to 0.04-1.0 (4%-100%) const points = baseScore * modifier; bonusModifiers.push({ type: 'indexer_priority', value: modifier, points: points, reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`, }); } // Flag bonuses/penalties if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) { torrent.flags.forEach(torrentFlag => { // Case-insensitive, whitespace-trimmed matching const matchingConfig = flagConfigs.find(cfg => cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase() ); if (matchingConfig) { const modifier = matchingConfig.modifier / 100; // Convert -100 to 100 → -1.0 to 1.0 const points = baseScore * modifier; bonusModifiers.push({ type: 'indexer_flag', value: modifier, points: points, reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`, }); } }); } // Sum all bonus points const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0); // Calculate final score const finalScore = baseScore + bonusPoints; return { ...torrent, score: baseScore, bonusModifiers, bonusPoints, finalScore, rank: 0, // Will be assigned after sorting breakdown: { formatScore, sizeScore, seederScore, matchScore, totalScore: baseScore, notes: this.generateNotes(torrent, { formatScore, sizeScore, seederScore, matchScore, totalScore: baseScore, notes: [], }, audiobook.durationMinutes), }, }; }); // Sort by finalScore descending (best first), then by publishDate descending (newest first) for tiebreakers ranked.sort((a, b) => { // Primary: sort by final score if (b.finalScore !== a.finalScore) { return b.finalScore - a.finalScore; } // Tiebreaker: sort by publishDate (newest first) return b.publishDate.getTime() - a.publishDate.getTime(); }); // Assign ranks ranked.forEach((r, index) => { r.rank = index + 1; }); return ranked; } /** * Get detailed scoring breakdown for a torrent */ getScoreBreakdown( torrent: TorrentResult, audiobook: AudiobookRequest, requireAuthor: boolean = true ): ScoreBreakdown { const formatScore = this.scoreFormat(torrent); const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes); const seederScore = this.scoreSeeders(torrent.seeders); const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor); const totalScore = formatScore + sizeScore + seederScore + matchScore; return { formatScore, sizeScore, seederScore, matchScore, totalScore, notes: this.generateNotes(torrent, { formatScore, sizeScore, seederScore, matchScore, totalScore, notes: [], }, audiobook.durationMinutes), }; } /** * Score format quality (10 points max) * Reduced from 25 to make room for data-driven size scoring * M4B with chapters: 10 pts * M4B without chapters: 9 pts * FLAC: 7 pts (lossless audio, excellent quality) * M4A: 6 pts * MP3: 4 pts * Other: 1 pt */ private scoreFormat(torrent: TorrentResult): number { const format = this.detectFormat(torrent); switch (format) { case 'M4B': return torrent.hasChapters !== false ? 10 : 9; case 'FLAC': return 7; case 'M4A': return 6; case 'MP3': return 4; default: return 1; } } /** * Score file size quality (15 points max) * Uses book runtime and file size to validate correct file type * Filters out ebooks and ranks audiobook quality * * @param torrent - Torrent result with size in bytes * @param runtimeMinutes - Book runtime in minutes from Audnexus * @returns 0-15 points based on MB/min ratio * * Algorithm: * - >= 1.0 MB/min → 15/15 points (high quality baseline) * - Linear scaling below 1.0 MB/min * - 0 points if no runtime data (graceful degradation) * * Note: Files < 20 MB are pre-filtered in rankTorrents() */ private scoreSize(torrent: TorrentResult, runtimeMinutes: number | undefined): number { // Graceful degradation: no runtime data = no size scoring if (!runtimeMinutes || runtimeMinutes === 0) { return 0; } const sizeMB = torrent.size / (1024 * 1024); const mbPerMin = sizeMB / runtimeMinutes; // High quality baseline: 1.0 MB/min or higher gets full points // This is ~64 kbps MP3 equivalent if (mbPerMin >= 1.0) { return 15; } // Linear scaling below baseline // 0.5 MB/min = 7.5 points // 0.3 MB/min = 4.5 points return mbPerMin * 15; } /** * Score seeder count (15 points max) * Logarithmic scaling: * 1 seeder: 0 points * 10 seeders: 6 points * 100 seeders: 12 points * 1000+ seeders: 15 points * * Note: NZB/Usenet results don't have seeders concept - centralized servers provide guaranteed availability */ private scoreSeeders(seeders: number | undefined): number { // Handle undefined/null (NZB results) - give full score since Usenet has centralized availability if (seeders === undefined || seeders === null || isNaN(seeders)) { return 15; // Full score - Usenet doesn't need seeders, content is on centralized servers } if (seeders === 0) return 0; return Math.min(15, Math.log10(seeders + 1) * 6); } /** * Normalize text for matching by handling CamelCase and punctuation separators * "VirginaEvans TheCorrespondent" → "virgina evans the correspondent" * "Twelve.Months-Jim.Butcher" → "twelve months jim butcher" * "Author_Name_Book" → "author name book" */ private normalizeForMatching(text: string, characterReplacements?: Record): string { let result = text // Split CamelCase FIRST (before lowercasing): "TheCorrespondent" → "The Correspondent" .replace(/([a-z])([A-Z])/g, '$1 $2') .toLowerCase(); // Apply language-specific character replacements before NFD (e.g. ß→ss) if (characterReplacements) { for (const [from, to] of Object.entries(characterReplacements)) { result = result.replace(new RegExp(from, 'g'), to); } } return result // NFD normalization: convert accented chars to ASCII base forms // e.g. "uber" from "uber", "senor" from "senor", "cafe" from "cafe" .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') // Replace underscores with spaces (must be explicit since \w includes _) .replace(/_/g, ' ') // Replace other punctuation/separators with spaces (preserves apostrophes in contractions) .replace(/[^\w\s']/g, ' ') // Collapse multiple spaces .replace(/\s+/g, ' ') .trim(); } /** * Score title/author match quality (60 points max) * Title similarity: 0-45 points (heavily weighted!) * Author presence: 0-15 points */ private scoreMatch( torrent: TorrentResult, audiobook: AudiobookRequest, requireAuthor: boolean = true, customStopWords?: string[], characterReplacements?: Record ): number { // Normalize for matching (handles CamelCase, punctuation separators, diacritics) const torrentTitle = this.normalizeForMatching(torrent.title, characterReplacements); const requestTitle = this.normalizeForMatching(audiobook.title, characterReplacements); // Parse authors from RAW string first (preserving commas for splitting) // Then normalize individual authors for matching const requestAuthorRaw = audiobook.author.toLowerCase().replace(/\s+/g, ' ').trim(); const parsedAuthors = requestAuthorRaw .split(/,|&| and | - /) .map(a => a.trim()) .filter(a => a.length > 2 && !['translator', 'narrator'].includes(a)); // Normalize parsed authors for matching (handles CamelCase in author names) const normalizedAuthors = parsedAuthors.map(a => this.normalizeForMatching(a, characterReplacements)); // Combined normalized author string for fuzzy matching const requestAuthorNormalized = normalizedAuthors.join(' '); // ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ========== // Extract significant words (filter out common stop words) // Use provided language-specific stop words, or fall back to English defaults const stopWords = customStopWords || ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for']; const extractWords = (text: string, stopList: string[]): string[] => { let processed = text // Split CamelCase FIRST: "TheCorrespondent" → "The Correspondent" .replace(/([a-z])([A-Z])/g, '$1 $2') .toLowerCase(); // Apply language-specific character replacements before NFD if (characterReplacements) { for (const [from, to] of Object.entries(characterReplacements)) { processed = processed.replace(new RegExp(from, 'g'), to); } } return processed // NFD normalization for accented characters .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') // Replace underscores with spaces (must be explicit since \w includes _) .replace(/_/g, ' ') // Remove other punctuation (but keep apostrophes for contractions) .replace(/[^\w\s']/g, ' ') .split(/\s+/) .filter(word => word.length > 0 && !stopList.includes(word)); }; // Separate required words (outside parentheses/brackets/colon subtitles) from optional words // This handles common patterns like: // "Title (Subtitle)" where subtitle may be omitted // "Title: Series Name" where Audible appends series names after a colon // Note: Run on ORIGINAL title to preserve brackets/colons, then normalize the result const separateRequiredOptional = (title: string): { required: string; optional: string } => { // Work with original title format for bracket/colon detection const originalTitle = audiobook.title.toLowerCase(); // Extract content in parentheses/brackets as optional const optionalPattern = /[(\[{]([^)\]}]+)[)\]}]/g; const optionalMatches: string[] = []; let match; while ((match = optionalPattern.exec(originalTitle)) !== null) { optionalMatches.push(match[1]); } // Remove parenthetical/bracketed content to get the non-bracketed portion let requiredRaw = originalTitle.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim(); // Treat content after a colon as optional (Audible commonly appends series names) // e.g., "The Finest Edge of Twilight: Dungeons & Dragons" → required: title, optional: series const colonIndex = requiredRaw.indexOf(':'); if (colonIndex > 0 && colonIndex < requiredRaw.length - 1) { const afterColon = requiredRaw.substring(colonIndex + 1).trim(); if (afterColon.length > 0) { optionalMatches.push(afterColon); } requiredRaw = requiredRaw.substring(0, colonIndex).trim(); } // Normalize the required portion (handles CamelCase, punctuation) const required = this.normalizeForMatching(requiredRaw, characterReplacements); const optional = optionalMatches.join(' '); return { required, optional }; }; const { required: requiredTitle, optional: optionalTitle } = separateRequiredOptional(requestTitle); // Extract words from required portion only for coverage check const requiredWords = extractWords(requiredTitle, stopWords); const torrentWords = extractWords(torrentTitle, stopWords); // Calculate word coverage: how many REQUIRED words appear in TORRENT if (requiredWords.length === 0) { // Edge case: title is only stop words or only optional content, skip filter // Fall through to normal scoring } else { const matchedWords = requiredWords.filter(word => torrentWords.includes(word)); const coverage = matchedWords.length / requiredWords.length; // HARD REQUIREMENT: Must have 80%+ coverage of REQUIRED words if (coverage < 0.80) { // Automatic rejection - doesn't contain enough of the requested words return 0; } } // ========== STAGE 1.5: AUTHOR PRESENCE CHECK (OPTIONAL) ========== // Only enforced in automatic mode (requireAuthor: true) // Interactive search (requireAuthor: false) shows all results if (requireAuthor && !this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors)) { // No high-confidence author match → reject to prevent wrong-author matches return 0; } // ========== STAGE 2: TITLE MATCHING (0-35 points) ========== let titleScore = 0; // Keep original torrent title (lowercased only) for metadata marker detection // Markers like [ ] ( ) : are removed by normalization but needed for suffix validation const torrentTitleOriginal = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim(); // Try matching with full title first, then fall back to required title (without parentheses) const titlesToTry = [requestTitle]; if (requiredTitle !== requestTitle) { titlesToTry.push(requiredTitle); // Add required-only version if different } let bestMatch = false; for (const titleToMatch of titlesToTry) { if (torrentTitle.includes(titleToMatch)) { // Found the title, but is it the complete title or part of a longer one? const titleIndex = torrentTitle.indexOf(titleToMatch); const beforeTitle = torrentTitle.substring(0, titleIndex); const afterTitle = torrentTitle.substring(titleIndex + titleToMatch.length); // For metadata marker detection, try to find where the title starts in the ORIGINAL string // Search for key words from the title to locate position in original const titleWords = titleToMatch.split(/\s+/).filter(w => w.length > 2); let afterTitleOriginal = ''; if (titleWords.length > 0) { // Find the last significant title word in the original string const lastTitleWord = titleWords[titleWords.length - 1]; const lastWordIdxOriginal = torrentTitleOriginal.lastIndexOf(lastTitleWord); if (lastWordIdxOriginal !== -1) { afterTitleOriginal = torrentTitleOriginal.substring(lastWordIdxOriginal + lastTitleWord.length); } } // Extract significant words BEFORE the matched title const beforeWords = extractWords(beforeTitle, stopWords); // Title is complete if: // 1. Acceptable prefix (no words, OR structured metadata like "Author - Series - ") // 2. Followed by clear metadata markers (not "'s Secret" or " Is Watching") // Check ORIGINAL title for metadata markers ([ ] ( ) etc. not normalized away) const metadataMarkers = [' by ', ' - ', ' [', ' (', ' {', ' :', ',']; // Check if afterTitle starts with any author name (handles space-separated format like "Title Author Year") const afterStartsWithAuthor = normalizedAuthors.some(author => author.length > 2 && afterTitle.trim().startsWith(author) ); // Check metadata markers in both normalized and original suffixes const hasMetadataSuffix = afterTitle === '' || metadataMarkers.some(marker => afterTitle.startsWith(marker)) || metadataMarkers.some(marker => afterTitleOriginal.startsWith(marker)) || afterStartsWithAuthor; // Check prefix validity: // - No words before = clean match // - Title preceded by separator (` - `, `: `) = structured metadata (Author - Series - Title) // - Author name in prefix = author attribution before title const hasNoWordsPrefix = beforeWords.length === 0; // Check if title is immediately preceded by a metadata separator // This handles "Author - Series - 01 - Title" patterns // Check both normalized and original strings for separators const precedingText = beforeTitle.trimEnd(); // Also check original string for separators that got normalized away (like colons) let beforeTitleOriginal = ''; if (titleWords.length > 0) { const firstTitleWord = titleWords[0]; const firstWordIdxOriginal = torrentTitleOriginal.indexOf(firstTitleWord); if (firstWordIdxOriginal !== -1) { beforeTitleOriginal = torrentTitleOriginal.substring(0, firstWordIdxOriginal).trimEnd(); } } const titlePrecededBySeparator = precedingText.endsWith('-') || precedingText.endsWith(':') || precedingText.endsWith('—') || beforeTitleOriginal.endsWith('-') || beforeTitleOriginal.endsWith(':') || beforeTitleOriginal.endsWith('—'); // Check if any author name appears in the prefix // This handles "Author Name - Title" patterns const authorInPrefix = normalizedAuthors.some(author => author.length > 2 && beforeTitle.includes(author) ); const hasAcceptablePrefix = hasNoWordsPrefix || titlePrecededBySeparator || authorInPrefix; const isCompleteTitle = hasAcceptablePrefix && hasMetadataSuffix; if (isCompleteTitle) { // Complete title match → full points titleScore = 45; bestMatch = true; break; // Found a good match, stop trying } } } if (!bestMatch) { // No complete match found, use fuzzy similarity as fallback // Try against full title first, then required title const fuzzyScores = titlesToTry.map(title => compareTwoStrings(title, torrentTitle)); titleScore = Math.max(...fuzzyScores) * 45; } // ========== STAGE 3: AUTHOR MATCHING (0-15 points) ========== // Check how many authors appear in torrent title (exact substring match) const authorMatches = normalizedAuthors.filter(author => torrentTitle.includes(author) ); let authorScore = 0; if (authorMatches.length > 0) { // Exact substring match → proportional credit authorScore = (authorMatches.length / normalizedAuthors.length) * 15; } else { // No exact match → use fuzzy similarity for partial credit authorScore = compareTwoStrings(requestAuthorNormalized, torrentTitle) * 15; } return Math.min(60, titleScore + authorScore); } /** * Check if author is present in torrent title with high confidence * Uses pre-parsed and normalized authors array * * @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching) * @param normalizedAuthors - Array of normalized author names (roles already filtered) * @returns true if at least ONE author is present with high confidence */ private checkAuthorPresenceWithParsed(torrentTitle: string, normalizedAuthors: string[]): boolean { // At least ONE author must match with high confidence return normalizedAuthors.some(author => { // Check 1: Exact substring match (works well now that both are normalized) if (torrentTitle.includes(author)) { return true; } // Check 2: High fuzzy similarity (≥ 0.85) // Handles: "J.K. Rowling" vs "J. K. Rowling" vs "JK Rowling" // Also handles: "Dennis E. Taylor" vs "Dennis Taylor" const similarity = compareTwoStrings(author, torrentTitle); if (similarity >= 0.85) { return true; } // Check 3: Core name components (first + last name present within 30 chars) // Handles: "Sanderson, Brandon" vs "Brandon Sanderson" // Handles: "Brandon R. Sanderson" vs "Brandon Sanderson" // Now also handles: "VirginaEvans" → "virgina evans" (after normalization) const words = author.split(/\s+/).filter(w => w.length > 1); if (words.length >= 2) { const firstName = words[0]; const lastName = words[words.length - 1]; const firstIdx = torrentTitle.indexOf(firstName); const lastIdx = torrentTitle.indexOf(lastName); // Both components present and reasonably close? if (firstIdx !== -1 && lastIdx !== -1) { const distance = Math.abs(lastIdx - firstIdx); if (distance <= 30) { return true; } } } return false; }); } /** * Check if author is present in torrent title with high confidence * Handles variations: middle initials, spacing, punctuation, name order, CamelCase * * @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching) * @param requestAuthor - Raw author string (will be parsed and normalized internally) * @returns true if at least ONE author is present with high confidence */ private checkAuthorPresence(torrentTitle: string, requestAuthor: string, characterReplacements?: Record): boolean { // Parse multiple authors (same logic as Stage 3 author matching) const authors = requestAuthor .split(/,|&| and | - /) .map(a => a.trim()) .filter(a => a.length > 2 && !['translator', 'narrator'].includes(a)); // Normalize each author for matching const normalizedAuthors = authors.map(a => this.normalizeForMatching(a, characterReplacements)); return this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors); } /** * Detect format from torrent title */ private detectFormat(torrent: TorrentResult): 'M4B' | 'M4A' | 'MP3' | 'FLAC' | 'OTHER' { // Use explicit format if provided if (torrent.format) { return torrent.format; } const title = torrent.title.toUpperCase(); // Check for format keywords in title if (title.includes('M4B')) return 'M4B'; if (title.includes('M4A')) return 'M4A'; if (title.includes('MP3')) return 'MP3'; if (title.includes('FLAC')) return 'FLAC'; // Default to OTHER if no format detected return 'OTHER'; } /** * Generate human-readable notes about scoring */ private generateNotes( torrent: TorrentResult, breakdown: ScoreBreakdown, runtimeMinutes?: number ): string[] { const notes: string[] = []; // Format notes const format = this.detectFormat(torrent); if (format === 'M4B') { notes.push('Excellent format (M4B)'); if (torrent.hasChapters !== false) { notes.push('Has chapter markers'); } } else if (format === 'FLAC') { notes.push('Lossless format (FLAC)'); } else if (format === 'M4A') { notes.push('Good format (M4A)'); } else if (format === 'MP3') { notes.push('Acceptable format (MP3)'); } else { notes.push('Unknown or uncommon format'); } // Size notes if (runtimeMinutes && runtimeMinutes > 0) { const sizeMB = torrent.size / (1024 * 1024); const mbPerMin = sizeMB / runtimeMinutes; if (mbPerMin >= 1.5) { notes.push('✓ Premium quality (high bitrate)'); } else if (mbPerMin >= 1.0) { notes.push('✓ High quality'); } else if (mbPerMin >= 0.5) { notes.push('Standard quality'); } else if (mbPerMin >= 0.3) { notes.push('⚠️ Low quality (low bitrate)'); } else { notes.push('⚠️ Very low quality - may be ebook'); } } // Seeder notes (skip for NZB/Usenet results which don't have seeders) if (torrent.seeders !== undefined && torrent.seeders !== null && !isNaN(torrent.seeders)) { if (torrent.seeders === 0) { notes.push('⚠️ No seeders available'); } else if (torrent.seeders < 5) { notes.push(`Low seeders (${torrent.seeders})`); } else if (torrent.seeders >= 50) { notes.push(`Excellent availability (${torrent.seeders} seeders)`); } } // Match notes (now worth 60 points!) if (breakdown.matchScore < 24) { notes.push('⚠️ Poor title/author match'); } else if (breakdown.matchScore < 42) { notes.push('⚠️ Weak title/author match'); } else if (breakdown.matchScore >= 54) { notes.push('✓ Excellent title/author match'); } // Overall quality assessment if (breakdown.totalScore >= 75) { notes.push('✓ Excellent choice'); } else if (breakdown.totalScore >= 55) { notes.push('✓ Good choice'); } else if (breakdown.totalScore < 35) { notes.push('⚠️ Consider reviewing this choice'); } return notes; } // ========================================================================= // EBOOK TORRENT RANKING (for indexer results) // Reuses scoreMatch() and scoreSeeders() from audiobook ranking // Uses ebook-specific format and size scoring // ========================================================================= /** * Rank ebook torrents from indexers * Reuses title/author matching and seeder scoring from audiobook ranking * Uses ebook-specific format scoring (10 pts for match, 0 otherwise) * Uses inverted size scoring (smaller = better, > 20MB filtered) * * @param torrents - Array of torrent results from Prowlarr * @param ebook - Ebook request details (title, author, preferredFormat) * @param options - Optional configuration for ranking behavior */ rankEbookTorrents( torrents: TorrentResult[], ebook: EbookTorrentRequest, options: RankEbookTorrentsOptions = {} ): RankedEbookTorrent[] { const { indexerPriorities, flagConfigs, requireAuthor = true, // Safe default: require author in automatic mode stopWords, characterReplacements, } = options; // Filter out files > 20 MB (too large for ebooks) const filteredTorrents = torrents.filter((torrent) => { const sizeMB = torrent.size / (1024 * 1024); return sizeMB <= 20; }); const ranked = filteredTorrents.map((torrent) => { // Detect ebook format from title const detectedFormat = this.detectEbookFormat(torrent); // Calculate base scores (0-100) // Reuse scoreMatch and scoreSeeders from audiobook ranking const formatScore = this.scoreEbookFormat(torrent, ebook.preferredFormat); const sizeScore = this.scoreEbookSize(torrent); const seederScore = this.scoreSeeders(torrent.seeders); const matchScore = this.scoreMatch(torrent, { title: ebook.title, author: ebook.author, }, requireAuthor, stopWords, characterReplacements); const baseScore = formatScore + sizeScore + seederScore + matchScore; // Calculate bonus modifiers (same as audiobooks) const bonusModifiers: BonusModifier[] = []; // Indexer priority bonus (default: 10/25 = 40%) if (torrent.indexerId !== undefined) { const priority = indexerPriorities?.get(torrent.indexerId) ?? 10; const modifier = priority / 25; // Convert 1-25 to 0.04-1.0 (4%-100%) const points = baseScore * modifier; bonusModifiers.push({ type: 'indexer_priority', value: modifier, points: points, reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`, }); } // Flag bonuses/penalties (same as audiobooks) if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) { torrent.flags.forEach(torrentFlag => { const matchingConfig = flagConfigs.find(cfg => cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase() ); if (matchingConfig) { const modifier = matchingConfig.modifier / 100; const points = baseScore * modifier; bonusModifiers.push({ type: 'indexer_flag', value: modifier, points: points, reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`, }); } }); } // Sum all bonus points const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0); // Calculate final score const finalScore = baseScore + bonusPoints; return { ...torrent, score: baseScore, bonusModifiers, bonusPoints, finalScore, rank: 0, // Will be assigned after sorting breakdown: { formatScore, sizeScore, seederScore, matchScore, totalScore: baseScore, notes: this.generateEbookNotes(torrent, { formatScore, sizeScore, seederScore, matchScore, totalScore: baseScore, notes: [], }, ebook.preferredFormat), }, ebookFormat: detectedFormat !== 'unknown' ? detectedFormat : undefined, }; }); // Sort by finalScore descending (best first), then by publishDate descending (newest first) ranked.sort((a, b) => { if (b.finalScore !== a.finalScore) { return b.finalScore - a.finalScore; } return b.publishDate.getTime() - a.publishDate.getTime(); }); // Assign ranks ranked.forEach((r, index) => { r.rank = index + 1; }); return ranked; } /** * Score ebook format (10 points max) * Full points for matching preferred format, 0 otherwise */ private scoreEbookFormat(torrent: TorrentResult, preferredFormat: string): number { const detectedFormat = this.detectEbookFormat(torrent); const preferred = preferredFormat.toLowerCase(); // Exact match = full points, otherwise 0 if (detectedFormat === preferred) { return 10; } return 0; } /** * Score ebook file size (15 points max, inverted - smaller is better) * < 5 MB = 15 pts (full) * 5-15 MB = 10 pts * 15-20 MB = 5 pts * > 20 MB = filtered out (not scored) */ private scoreEbookSize(torrent: TorrentResult): number { const sizeMB = torrent.size / (1024 * 1024); if (sizeMB < 5) { return 15; // Optimal size for ebooks } else if (sizeMB <= 15) { return 10; // Acceptable, may have images } else if (sizeMB <= 20) { return 5; // Large but within limit } // > 20 MB should have been filtered, but return 0 as safety return 0; } /** * Detect ebook format from torrent title * Handles formats in various positions: .epub, (epub), [epub], " epub" */ private detectEbookFormat(torrent: TorrentResult): string { const title = torrent.title.toLowerCase(); // Check for common ebook format extensions/keywords // Patterns: .format, (format), [format], " format", "_format" const formats = ['epub', 'pdf', 'mobi', 'azw3', 'azw', 'fb2', 'cbz', 'cbr']; for (const format of formats) { if ( title.includes(`.${format}`) || // file.epub title.includes(`(${format})`) || // (epub) title.includes(`[${format}]`) || // [epub] title.includes(` ${format}`) || // " epub" (space before) title.includes(`_${format}`) || // _epub (underscore) title.endsWith(format) // ends with format ) { return format; } } // Default to unknown return 'unknown'; } /** * Generate human-readable notes for ebook scoring */ private generateEbookNotes( torrent: TorrentResult, breakdown: EbookScoreBreakdown, preferredFormat: string ): string[] { const notes: string[] = []; // Format notes const detectedFormat = this.detectEbookFormat(torrent); if (breakdown.formatScore === 10) { notes.push(`✓ Preferred format (${detectedFormat.toUpperCase()})`); } else if (detectedFormat !== 'unknown') { notes.push(`Different format (${detectedFormat.toUpperCase()}, wanted ${preferredFormat.toUpperCase()})`); } else { notes.push('⚠️ Unknown format'); } // Size notes const sizeMB = torrent.size / (1024 * 1024); if (sizeMB < 5) { notes.push('✓ Optimal file size'); } else if (sizeMB <= 15) { notes.push('Good file size (may have images)'); } else if (sizeMB <= 20) { notes.push('⚠️ Large file size'); } // Seeder notes (same logic as audiobooks) if (torrent.seeders !== undefined && torrent.seeders !== null && !isNaN(torrent.seeders)) { if (torrent.seeders === 0) { notes.push('⚠️ No seeders available'); } else if (torrent.seeders < 5) { notes.push(`Low seeders (${torrent.seeders})`); } else if (torrent.seeders >= 50) { notes.push(`Excellent availability (${torrent.seeders} seeders)`); } } // Match notes (same thresholds as audiobooks) if (breakdown.matchScore < 24) { notes.push('⚠️ Poor title/author match'); } else if (breakdown.matchScore < 42) { notes.push('⚠️ Weak title/author match'); } else if (breakdown.matchScore >= 54) { notes.push('✓ Excellent title/author match'); } // Overall quality assessment if (breakdown.totalScore >= 75) { notes.push('✓ Excellent choice'); } else if (breakdown.totalScore >= 55) { notes.push('✓ Good choice'); } else if (breakdown.totalScore < 35) { notes.push('⚠️ Consider reviewing this choice'); } return notes; } } // ========================================================================= // EBOOK RANKING (simplified algorithm for ebook search results) // ========================================================================= export interface EbookResult { md5: string; title: string; author: string; format: string; // epub, pdf, mobi, etc. fileSize?: number; // in bytes downloadUrls: string[]; source: 'annas_archive' | 'prowlarr'; // Source of the result indexerId?: number; // Prowlarr indexer ID (if applicable) } export interface EbookRequest { title: string; author: string; preferredFormat: string; // User's preferred format (epub, pdf, etc.) } export interface RankedEbook extends EbookResult { score: number; // Total score (0-100) rank: number; breakdown: { formatScore: number; // 0-40 points sizeScore: number; // 0-30 points (inverted - smaller is better) sourceScore: number; // 0-30 points (Anna's Archive priority) notes: string[]; }; } /** * Rank ebook search results * Scoring priorities (inverted from audiobooks): * - Format match: 40 points (matching preferred format) * - Size: 30 points (smaller files = better, inverted from audiobooks) * - Source: 30 points (Anna's Archive priority for reliability) */ export function rankEbooks( results: EbookResult[], request: EbookRequest ): RankedEbook[] { const preferredFormat = request.preferredFormat.toLowerCase(); const ranked = results.map((result): RankedEbook => { const notes: string[] = []; // ========== FORMAT SCORING (0-40 points) ========== // Exact format match gets full points // Similar formats get partial credit let formatScore = 0; const resultFormat = result.format.toLowerCase(); if (resultFormat === preferredFormat) { formatScore = 40; notes.push(`✓ Preferred format (${result.format.toUpperCase()})`); } else { // Partial credit for compatible formats const ebookFormatGroups = [ ['epub', 'kepub'], // EPUB family ['mobi', 'azw', 'azw3'], // Kindle family ['pdf'], // PDF standalone ['fb2', 'fb2.zip'], // FB2 family ['cbz', 'cbr'], // Comic formats ]; const preferredGroup = ebookFormatGroups.find(g => g.includes(preferredFormat)); const resultGroup = ebookFormatGroups.find(g => g.includes(resultFormat)); if (preferredGroup && resultGroup && preferredGroup === resultGroup) { formatScore = 30; // Same family notes.push(`Similar format (${result.format.toUpperCase()})`); } else if (resultFormat === 'epub') { formatScore = 25; // EPUB is universally convertible notes.push(`Convertible format (${result.format.toUpperCase()})`); } else if (resultFormat === 'pdf') { formatScore = 15; // PDF is common but less flexible notes.push(`PDF format (less flexible)`); } else { formatScore = 10; // Other formats notes.push(`Different format (${result.format.toUpperCase()})`); } } // ========== SIZE SCORING (0-30 points, inverted) ========== // For ebooks, smaller files are generally better (cleaner, no bloat) // Typical ebook sizes: 0.5-5 MB (good), 5-20 MB (has images), 20+ MB (may have issues) let sizeScore = 0; if (result.fileSize !== undefined && result.fileSize > 0) { const sizeMB = result.fileSize / (1024 * 1024); if (sizeMB <= 2) { sizeScore = 30; // Ideal size notes.push('✓ Optimal file size'); } else if (sizeMB <= 5) { sizeScore = 25; // Good size notes.push('Good file size'); } else if (sizeMB <= 15) { sizeScore = 20; // Has images, acceptable notes.push('Larger file (may have images)'); } else if (sizeMB <= 50) { sizeScore = 10; // Large, possibly bloated notes.push('⚠️ Large file size'); } else { sizeScore = 5; // Very large, suspicious notes.push('⚠️ Very large file (may include extras)'); } } else { // No size info - give middle score sizeScore = 15; notes.push('File size unknown'); } // ========== SOURCE SCORING (0-30 points) ========== // Anna's Archive is the primary reliable source // Future: Prowlarr indexers will get configurable priority let sourceScore = 0; if (result.source === 'annas_archive') { sourceScore = 30; // Full points for Anna's Archive notes.push('✓ Anna\'s Archive (reliable)'); } else if (result.source === 'prowlarr') { // Future: Use indexer priority from config sourceScore = 15; // Base score for Prowlarr results notes.push('Prowlarr indexer'); } const totalScore = formatScore + sizeScore + sourceScore; return { ...result, score: totalScore, rank: 0, // Will be assigned after sorting breakdown: { formatScore, sizeScore, sourceScore, notes, }, }; }); // Sort by score descending ranked.sort((a, b) => b.score - a.score); // Assign ranks ranked.forEach((r, index) => { r.rank = index + 1; }); return ranked; } // Singleton instance let ranker: RankingAlgorithm | null = null; export function getRankingAlgorithm(): RankingAlgorithm { if (!ranker) { ranker = new RankingAlgorithm(); } return ranker; } /** * Helper function to rank torrents using the singleton instance * * @param torrents - Array of torrent results to rank * @param audiobook - Audiobook request details * @param options - Optional ranking configuration * @returns Ranked torrents with quality scores */ export function rankTorrents( torrents: TorrentResult[], audiobook: AudiobookRequest, options?: RankTorrentsOptions ): (RankedTorrent & { qualityScore: number })[]; /** * Helper function to rank torrents using the singleton instance (legacy signature) * @deprecated Use options object instead */ export function rankTorrents( torrents: TorrentResult[], audiobook: AudiobookRequest, indexerPriorities?: Map, flagConfigs?: IndexerFlagConfig[] ): (RankedTorrent & { qualityScore: number })[]; export function rankTorrents( torrents: TorrentResult[], audiobook: AudiobookRequest, optionsOrPriorities?: RankTorrentsOptions | Map, flagConfigs?: IndexerFlagConfig[] ): (RankedTorrent & { qualityScore: number })[] { const algorithm = getRankingAlgorithm(); // Handle both new options object and legacy parameters let options: RankTorrentsOptions; if (optionsOrPriorities instanceof Map) { // Legacy call: rankTorrents(torrents, audiobook, priorities, flags) options = { indexerPriorities: optionsOrPriorities, flagConfigs, requireAuthor: true // Safe default }; } else { // New call: rankTorrents(torrents, audiobook, options) options = optionsOrPriorities || {}; } const ranked = algorithm.rankTorrents(torrents, audiobook, options); // Add qualityScore field for UI compatibility (rounded score) return ranked.map((r) => ({ ...r, qualityScore: Math.round(r.score), })); } /** * Helper function to rank ebook torrents using the singleton instance * * @param torrents - Array of torrent results from Prowlarr * @param ebook - Ebook request details (title, author, preferredFormat) * @param options - Optional ranking configuration * @returns Ranked ebook torrents with quality scores */ export function rankEbookTorrents( torrents: TorrentResult[], ebook: EbookTorrentRequest, options?: RankEbookTorrentsOptions ): (RankedEbookTorrent & { qualityScore: number })[] { const algorithm = getRankingAlgorithm(); const ranked = algorithm.rankEbookTorrents(torrents, ebook, options || {}); // Add qualityScore field for UI compatibility (rounded score) return ranked.map((r) => ({ ...r, qualityScore: Math.round(r.score), })); }