mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-03 04:40:09 +00:00
307b63fab4
Refactors admin settings to use a new IndexersTab and card-based indexer management UI, supporting category selection and improved configuration. Updates backend and API routes to handle indexer categories, propagate ASIN for better search scoring, and group indexers by categories to optimize Prowlarr searches. Enhances documentation to clarify non-terminal request matching and auto-completion behavior. Adds new reusable components for indexer management and category selection.
582 lines
19 KiB
TypeScript
582 lines
19 KiB
TypeScript
/**
|
|
* Component: Intelligent Ranking Algorithm
|
|
* Documentation: documentation/phase3/ranking-algorithm.md
|
|
*/
|
|
|
|
import { compareTwoStrings } from 'string-similarity';
|
|
|
|
export interface TorrentResult {
|
|
indexer: string;
|
|
indexerId?: number;
|
|
title: string;
|
|
size: number;
|
|
seeders?: number; // Optional for NZB/Usenet results (no seeders concept)
|
|
leechers?: number; // Optional for NZB/Usenet results (no leechers concept)
|
|
publishDate: Date;
|
|
downloadUrl: string;
|
|
infoUrl?: string; // Link to indexer's info page (for user reference)
|
|
infoHash?: string;
|
|
guid: string;
|
|
format?: 'M4B' | 'M4A' | 'MP3' | 'OTHER';
|
|
bitrate?: string;
|
|
hasChapters?: boolean;
|
|
flags?: string[]; // Indexer flags like "Freeleech", "Internal", etc.
|
|
protocol?: string; // 'torrent' or 'usenet' - from Prowlarr API
|
|
}
|
|
|
|
export interface AudiobookRequest {
|
|
title: string;
|
|
author: string;
|
|
narrator?: string;
|
|
durationMinutes?: number;
|
|
}
|
|
|
|
export interface IndexerFlagConfig {
|
|
name: string; // Flag name (e.g., "Freeleech")
|
|
modifier: number; // -100 to 100 (percentage)
|
|
}
|
|
|
|
export interface BonusModifier {
|
|
type: 'indexer_priority' | 'indexer_flag' | 'custom';
|
|
value: number; // Multiplier (e.g., 0.4 for 40%)
|
|
points: number; // Calculated bonus points from this modifier
|
|
reason: string; // Human-readable explanation
|
|
}
|
|
|
|
export interface ScoreBreakdown {
|
|
formatScore: number;
|
|
sizeScore: number;
|
|
seederScore: number;
|
|
matchScore: number;
|
|
totalScore: number;
|
|
notes: string[];
|
|
}
|
|
|
|
export interface RankedTorrent extends TorrentResult {
|
|
score: number; // Base score (0-100)
|
|
bonusModifiers: BonusModifier[];
|
|
bonusPoints: number; // Sum of all bonus points
|
|
finalScore: number; // score + bonusPoints
|
|
rank: number;
|
|
breakdown: ScoreBreakdown;
|
|
}
|
|
|
|
export class RankingAlgorithm {
|
|
/**
|
|
* Rank all torrents and return sorted by finalScore (best first)
|
|
* @param torrents - Array of torrent results to rank
|
|
* @param audiobook - Audiobook request details for matching (includes durationMinutes for size scoring)
|
|
* @param indexerPriorities - Optional map of indexerId to priority (1-25), defaults to 10
|
|
* @param flagConfigs - Optional array of flag configurations for bonus/penalty modifiers
|
|
*/
|
|
rankTorrents(
|
|
torrents: TorrentResult[],
|
|
audiobook: AudiobookRequest,
|
|
indexerPriorities?: Map<number, number>,
|
|
flagConfigs?: IndexerFlagConfig[]
|
|
): RankedTorrent[] {
|
|
// Filter out files < 20 MB (likely ebooks/samples)
|
|
const filteredTorrents = torrents.filter((torrent) => {
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
return sizeMB >= 20;
|
|
});
|
|
|
|
const ranked = filteredTorrents.map((torrent) => {
|
|
// Calculate base scores (0-100)
|
|
const formatScore = this.scoreFormat(torrent);
|
|
const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes);
|
|
const seederScore = this.scoreSeeders(torrent.seeders);
|
|
const matchScore = this.scoreMatch(torrent, audiobook);
|
|
|
|
const baseScore = formatScore + sizeScore + seederScore + matchScore;
|
|
|
|
// Calculate bonus modifiers
|
|
const bonusModifiers: BonusModifier[] = [];
|
|
|
|
// Indexer priority bonus (default: 10/25 = 40%)
|
|
if (torrent.indexerId !== undefined) {
|
|
const priority = indexerPriorities?.get(torrent.indexerId) ?? 10;
|
|
const modifier = priority / 25; // Convert 1-25 to 0.04-1.0 (4%-100%)
|
|
const points = baseScore * modifier;
|
|
|
|
bonusModifiers.push({
|
|
type: 'indexer_priority',
|
|
value: modifier,
|
|
points: points,
|
|
reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`,
|
|
});
|
|
}
|
|
|
|
// Flag bonuses/penalties
|
|
if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) {
|
|
torrent.flags.forEach(torrentFlag => {
|
|
// Case-insensitive, whitespace-trimmed matching
|
|
const matchingConfig = flagConfigs.find(cfg =>
|
|
cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase()
|
|
);
|
|
|
|
if (matchingConfig) {
|
|
const modifier = matchingConfig.modifier / 100; // Convert -100 to 100 → -1.0 to 1.0
|
|
const points = baseScore * modifier;
|
|
|
|
bonusModifiers.push({
|
|
type: 'indexer_flag',
|
|
value: modifier,
|
|
points: points,
|
|
reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`,
|
|
});
|
|
}
|
|
});
|
|
}
|
|
|
|
// Sum all bonus points
|
|
const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0);
|
|
|
|
// Calculate final score
|
|
const finalScore = baseScore + bonusPoints;
|
|
|
|
return {
|
|
...torrent,
|
|
score: baseScore,
|
|
bonusModifiers,
|
|
bonusPoints,
|
|
finalScore,
|
|
rank: 0, // Will be assigned after sorting
|
|
breakdown: {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore: baseScore,
|
|
notes: this.generateNotes(torrent, {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore: baseScore,
|
|
notes: [],
|
|
}, audiobook.durationMinutes),
|
|
},
|
|
};
|
|
});
|
|
|
|
// Sort by finalScore descending (best first), then by publishDate descending (newest first) for tiebreakers
|
|
ranked.sort((a, b) => {
|
|
// Primary: sort by final score
|
|
if (b.finalScore !== a.finalScore) {
|
|
return b.finalScore - a.finalScore;
|
|
}
|
|
// Tiebreaker: sort by publishDate (newest first)
|
|
return b.publishDate.getTime() - a.publishDate.getTime();
|
|
});
|
|
|
|
// Assign ranks
|
|
ranked.forEach((r, index) => {
|
|
r.rank = index + 1;
|
|
});
|
|
|
|
return ranked;
|
|
}
|
|
|
|
/**
|
|
* Get detailed scoring breakdown for a torrent
|
|
*/
|
|
getScoreBreakdown(
|
|
torrent: TorrentResult,
|
|
audiobook: AudiobookRequest
|
|
): ScoreBreakdown {
|
|
const formatScore = this.scoreFormat(torrent);
|
|
const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes);
|
|
const seederScore = this.scoreSeeders(torrent.seeders);
|
|
const matchScore = this.scoreMatch(torrent, audiobook);
|
|
const totalScore = formatScore + sizeScore + seederScore + matchScore;
|
|
|
|
return {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore,
|
|
notes: this.generateNotes(torrent, {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore,
|
|
notes: [],
|
|
}, audiobook.durationMinutes),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Score format quality (10 points max)
|
|
* Reduced from 25 to make room for data-driven size scoring
|
|
* M4B with chapters: 10 pts
|
|
* M4B without chapters: 9 pts
|
|
* M4A: 6 pts
|
|
* MP3: 4 pts
|
|
* Other: 1 pt
|
|
*/
|
|
private scoreFormat(torrent: TorrentResult): number {
|
|
const format = this.detectFormat(torrent);
|
|
|
|
switch (format) {
|
|
case 'M4B':
|
|
return torrent.hasChapters !== false ? 10 : 9;
|
|
case 'M4A':
|
|
return 6;
|
|
case 'MP3':
|
|
return 4;
|
|
default:
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Score file size quality (15 points max)
|
|
* Uses book runtime and file size to validate correct file type
|
|
* Filters out ebooks and ranks audiobook quality
|
|
*
|
|
* @param torrent - Torrent result with size in bytes
|
|
* @param runtimeMinutes - Book runtime in minutes from Audnexus
|
|
* @returns 0-15 points based on MB/min ratio
|
|
*
|
|
* Algorithm:
|
|
* - >= 1.0 MB/min → 15/15 points (high quality baseline)
|
|
* - Linear scaling below 1.0 MB/min
|
|
* - 0 points if no runtime data (graceful degradation)
|
|
*
|
|
* Note: Files < 20 MB are pre-filtered in rankTorrents()
|
|
*/
|
|
private scoreSize(torrent: TorrentResult, runtimeMinutes: number | undefined): number {
|
|
// Graceful degradation: no runtime data = no size scoring
|
|
if (!runtimeMinutes || runtimeMinutes === 0) {
|
|
return 0;
|
|
}
|
|
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
const mbPerMin = sizeMB / runtimeMinutes;
|
|
|
|
// High quality baseline: 1.0 MB/min or higher gets full points
|
|
// This is ~64 kbps MP3 equivalent
|
|
if (mbPerMin >= 1.0) {
|
|
return 15;
|
|
}
|
|
|
|
// Linear scaling below baseline
|
|
// 0.5 MB/min = 7.5 points
|
|
// 0.3 MB/min = 4.5 points
|
|
return mbPerMin * 15;
|
|
}
|
|
|
|
/**
|
|
* Score seeder count (15 points max)
|
|
* Logarithmic scaling:
|
|
* 1 seeder: 0 points
|
|
* 10 seeders: 6 points
|
|
* 100 seeders: 12 points
|
|
* 1000+ seeders: 15 points
|
|
*
|
|
* Note: NZB/Usenet results don't have seeders concept - centralized servers provide guaranteed availability
|
|
*/
|
|
private scoreSeeders(seeders: number | undefined): number {
|
|
// Handle undefined/null (NZB results) - give full score since Usenet has centralized availability
|
|
if (seeders === undefined || seeders === null || isNaN(seeders)) {
|
|
return 15; // Full score - Usenet doesn't need seeders, content is on centralized servers
|
|
}
|
|
|
|
if (seeders === 0) return 0;
|
|
return Math.min(15, Math.log10(seeders + 1) * 6);
|
|
}
|
|
|
|
|
|
/**
|
|
* Score title/author match quality (60 points max)
|
|
* Title similarity: 0-45 points (heavily weighted!)
|
|
* Author presence: 0-15 points
|
|
*/
|
|
private scoreMatch(
|
|
torrent: TorrentResult,
|
|
audiobook: AudiobookRequest
|
|
): number {
|
|
// Normalize whitespace (multiple spaces → single space) for consistent matching
|
|
const torrentTitle = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
const requestTitle = audiobook.title.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
const requestAuthor = audiobook.author.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
|
|
// ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ==========
|
|
// Extract significant words (filter out common stop words)
|
|
const stopWords = ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for'];
|
|
|
|
const extractWords = (text: string, stopList: string[]): string[] => {
|
|
return text
|
|
.toLowerCase()
|
|
.replace(/[^\w\s]/g, ' ') // Remove punctuation
|
|
.split(/\s+/)
|
|
.filter(word => word.length > 0 && !stopList.includes(word));
|
|
};
|
|
|
|
// Separate required words (outside parentheses/brackets) from optional words (inside)
|
|
// This handles common patterns like "Title (Subtitle)" where subtitle may be omitted
|
|
const separateRequiredOptional = (title: string): { required: string; optional: string } => {
|
|
// Extract content in parentheses/brackets as optional
|
|
const optionalPattern = /[(\[{]([^)\]}]+)[)\]}]/g;
|
|
const optionalMatches: string[] = [];
|
|
let match;
|
|
|
|
while ((match = optionalPattern.exec(title)) !== null) {
|
|
optionalMatches.push(match[1]);
|
|
}
|
|
|
|
// Remove parenthetical/bracketed content to get required portion
|
|
const required = title.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim();
|
|
const optional = optionalMatches.join(' ');
|
|
|
|
return { required, optional };
|
|
};
|
|
|
|
const { required: requiredTitle, optional: optionalTitle } = separateRequiredOptional(requestTitle);
|
|
|
|
// Extract words from required portion only for coverage check
|
|
const requiredWords = extractWords(requiredTitle, stopWords);
|
|
const torrentWords = extractWords(torrentTitle, stopWords);
|
|
|
|
// Calculate word coverage: how many REQUIRED words appear in TORRENT
|
|
if (requiredWords.length === 0) {
|
|
// Edge case: title is only stop words or only optional content, skip filter
|
|
// Fall through to normal scoring
|
|
} else {
|
|
const matchedWords = requiredWords.filter(word => torrentWords.includes(word));
|
|
const coverage = matchedWords.length / requiredWords.length;
|
|
|
|
// HARD REQUIREMENT: Must have 80%+ coverage of REQUIRED words
|
|
if (coverage < 0.80) {
|
|
// Automatic rejection - doesn't contain enough of the requested words
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// ========== STAGE 2: TITLE MATCHING (0-35 points) ==========
|
|
let titleScore = 0;
|
|
|
|
// Try matching with full title first, then fall back to required title (without parentheses)
|
|
const titlesToTry = [requestTitle];
|
|
if (requiredTitle !== requestTitle) {
|
|
titlesToTry.push(requiredTitle); // Add required-only version if different
|
|
}
|
|
|
|
let bestMatch = false;
|
|
for (const titleToMatch of titlesToTry) {
|
|
if (torrentTitle.includes(titleToMatch)) {
|
|
// Found the title, but is it the complete title or part of a longer one?
|
|
const titleIndex = torrentTitle.indexOf(titleToMatch);
|
|
const beforeTitle = torrentTitle.substring(0, titleIndex);
|
|
const afterTitle = torrentTitle.substring(titleIndex + titleToMatch.length);
|
|
|
|
// Extract significant words BEFORE the matched title
|
|
const beforeWords = extractWords(beforeTitle, stopWords);
|
|
|
|
// Title is complete if:
|
|
// 1. Acceptable prefix (no words, OR structured metadata like "Author - Series - ")
|
|
// 2. Followed by clear metadata markers (not "'s Secret" or " Is Watching")
|
|
const metadataMarkers = [' by ', ' - ', ' [', ' (', ' {', ' :', ','];
|
|
|
|
// Check if afterTitle starts with author name (handles space-separated format like "Title Author Year")
|
|
const afterStartsWithAuthor = requestAuthor.length > 2 &&
|
|
afterTitle.trim().startsWith(requestAuthor);
|
|
|
|
const hasMetadataSuffix = afterTitle === '' ||
|
|
metadataMarkers.some(marker => afterTitle.startsWith(marker)) ||
|
|
afterStartsWithAuthor;
|
|
|
|
// Check prefix validity:
|
|
// - No words before = clean match
|
|
// - Title preceded by separator (` - `, `: `) = structured metadata (Author - Series - Title)
|
|
// - Author name in prefix = author attribution before title
|
|
const hasNoWordsPrefix = beforeWords.length === 0;
|
|
|
|
// Check if title is immediately preceded by a metadata separator
|
|
// This handles "Author - Series - 01 - Title" patterns
|
|
const precedingText = beforeTitle.trimEnd();
|
|
const titlePrecededBySeparator =
|
|
precedingText.endsWith('-') ||
|
|
precedingText.endsWith(':') ||
|
|
precedingText.endsWith('—');
|
|
|
|
// Check if author name appears in the prefix
|
|
// This handles "Author Name - Title" patterns
|
|
const authorInPrefix = requestAuthor.length > 2 &&
|
|
beforeTitle.includes(requestAuthor);
|
|
|
|
const hasAcceptablePrefix =
|
|
hasNoWordsPrefix ||
|
|
titlePrecededBySeparator ||
|
|
authorInPrefix;
|
|
|
|
const isCompleteTitle = hasAcceptablePrefix && hasMetadataSuffix;
|
|
|
|
if (isCompleteTitle) {
|
|
// Complete title match → full points
|
|
titleScore = 45;
|
|
bestMatch = true;
|
|
break; // Found a good match, stop trying
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!bestMatch) {
|
|
// No complete match found, use fuzzy similarity as fallback
|
|
// Try against full title first, then required title
|
|
const fuzzyScores = titlesToTry.map(title => compareTwoStrings(title, torrentTitle));
|
|
titleScore = Math.max(...fuzzyScores) * 45;
|
|
}
|
|
|
|
// ========== STAGE 3: AUTHOR MATCHING (0-15 points) ==========
|
|
// Parse requested authors (split on separators, filter out roles)
|
|
const requestAuthors = requestAuthor
|
|
.split(/,|&| and | - /)
|
|
.map(a => a.trim())
|
|
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
|
|
|
// Check how many authors appear in torrent title (exact substring match)
|
|
const authorMatches = requestAuthors.filter(author =>
|
|
torrentTitle.includes(author)
|
|
);
|
|
|
|
let authorScore = 0;
|
|
if (authorMatches.length > 0) {
|
|
// Exact substring match → proportional credit
|
|
authorScore = (authorMatches.length / requestAuthors.length) * 15;
|
|
} else {
|
|
// No exact match → use fuzzy similarity for partial credit
|
|
authorScore = compareTwoStrings(requestAuthor, torrentTitle) * 15;
|
|
}
|
|
|
|
return Math.min(60, titleScore + authorScore);
|
|
}
|
|
|
|
/**
|
|
* Detect format from torrent title
|
|
*/
|
|
private detectFormat(torrent: TorrentResult): 'M4B' | 'M4A' | 'MP3' | 'OTHER' {
|
|
// Use explicit format if provided
|
|
if (torrent.format) {
|
|
return torrent.format;
|
|
}
|
|
|
|
const title = torrent.title.toUpperCase();
|
|
|
|
// Check for format keywords in title
|
|
if (title.includes('M4B')) return 'M4B';
|
|
if (title.includes('M4A')) return 'M4A';
|
|
if (title.includes('MP3')) return 'MP3';
|
|
|
|
// Default to OTHER if no format detected
|
|
return 'OTHER';
|
|
}
|
|
|
|
/**
|
|
* Generate human-readable notes about scoring
|
|
*/
|
|
private generateNotes(
|
|
torrent: TorrentResult,
|
|
breakdown: ScoreBreakdown,
|
|
runtimeMinutes?: number
|
|
): string[] {
|
|
const notes: string[] = [];
|
|
|
|
// Format notes
|
|
const format = this.detectFormat(torrent);
|
|
if (format === 'M4B') {
|
|
notes.push('Excellent format (M4B)');
|
|
if (torrent.hasChapters !== false) {
|
|
notes.push('Has chapter markers');
|
|
}
|
|
} else if (format === 'M4A') {
|
|
notes.push('Good format (M4A)');
|
|
} else if (format === 'MP3') {
|
|
notes.push('Acceptable format (MP3)');
|
|
} else {
|
|
notes.push('Unknown or uncommon format');
|
|
}
|
|
|
|
// Size notes
|
|
if (runtimeMinutes && runtimeMinutes > 0) {
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
const mbPerMin = sizeMB / runtimeMinutes;
|
|
|
|
if (mbPerMin >= 1.5) {
|
|
notes.push('✓ Premium quality (high bitrate)');
|
|
} else if (mbPerMin >= 1.0) {
|
|
notes.push('✓ High quality');
|
|
} else if (mbPerMin >= 0.5) {
|
|
notes.push('Standard quality');
|
|
} else if (mbPerMin >= 0.3) {
|
|
notes.push('⚠️ Low quality (low bitrate)');
|
|
} else {
|
|
notes.push('⚠️ Very low quality - may be ebook');
|
|
}
|
|
}
|
|
|
|
// Seeder notes (skip for NZB/Usenet results which don't have seeders)
|
|
if (torrent.seeders !== undefined && torrent.seeders !== null && !isNaN(torrent.seeders)) {
|
|
if (torrent.seeders === 0) {
|
|
notes.push('⚠️ No seeders available');
|
|
} else if (torrent.seeders < 5) {
|
|
notes.push(`Low seeders (${torrent.seeders})`);
|
|
} else if (torrent.seeders >= 50) {
|
|
notes.push(`Excellent availability (${torrent.seeders} seeders)`);
|
|
}
|
|
}
|
|
|
|
// Match notes (now worth 60 points!)
|
|
if (breakdown.matchScore < 24) {
|
|
notes.push('⚠️ Poor title/author match');
|
|
} else if (breakdown.matchScore < 42) {
|
|
notes.push('⚠️ Weak title/author match');
|
|
} else if (breakdown.matchScore >= 54) {
|
|
notes.push('✓ Excellent title/author match');
|
|
}
|
|
|
|
// Overall quality assessment
|
|
if (breakdown.totalScore >= 75) {
|
|
notes.push('✓ Excellent choice');
|
|
} else if (breakdown.totalScore >= 55) {
|
|
notes.push('✓ Good choice');
|
|
} else if (breakdown.totalScore < 35) {
|
|
notes.push('⚠️ Consider reviewing this choice');
|
|
}
|
|
|
|
return notes;
|
|
}
|
|
}
|
|
|
|
// Singleton instance
|
|
let ranker: RankingAlgorithm | null = null;
|
|
|
|
export function getRankingAlgorithm(): RankingAlgorithm {
|
|
if (!ranker) {
|
|
ranker = new RankingAlgorithm();
|
|
}
|
|
return ranker;
|
|
}
|
|
|
|
/**
|
|
* Helper function to rank torrents using the singleton instance
|
|
*/
|
|
export function rankTorrents(
|
|
torrents: TorrentResult[],
|
|
audiobook: AudiobookRequest,
|
|
indexerPriorities?: Map<number, number>,
|
|
flagConfigs?: IndexerFlagConfig[]
|
|
): (RankedTorrent & { qualityScore: number })[] {
|
|
const algorithm = getRankingAlgorithm();
|
|
const ranked = algorithm.rankTorrents(torrents, audiobook, indexerPriorities, flagConfigs);
|
|
|
|
// Add qualityScore field for UI compatibility (rounded score)
|
|
return ranked.map((r) => ({
|
|
...r,
|
|
qualityScore: Math.round(r.score),
|
|
}));
|
|
}
|