mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-03 21:00:09 +00:00
5d8ac2f73d
Introduce centralized language configuration and wire locale-aware behavior across scraping and ranking. Adds src/lib/constants/language-config.ts with per-language scraping rules, stop words, and character replacements; replaces AudibleRegion.isEnglish with a language field in types and AUDIBLE_REGIONS. Update AudibleService, ebook scraper, processors, and API routes to use getLanguageForRegion so Anna's Archive searches, scraping selectors, runtime/rating parsing, and ranking use language-specific params and filters. Extend ranking algorithm to accept stopWords and characterReplacements and apply them during normalization and matching. Update UI selects to mark non-English regions and adjust tests accordingly.
1306 lines
46 KiB
TypeScript
1306 lines
46 KiB
TypeScript
/**
|
|
* Component: Intelligent Ranking Algorithm
|
|
* Documentation: documentation/phase3/ranking-algorithm.md
|
|
*/
|
|
|
|
import { compareTwoStrings } from 'string-similarity';
|
|
|
|
export interface TorrentResult {
|
|
indexer: string;
|
|
indexerId?: number;
|
|
title: string;
|
|
size: number;
|
|
seeders?: number; // Optional for NZB/Usenet results (no seeders concept)
|
|
leechers?: number; // Optional for NZB/Usenet results (no leechers concept)
|
|
publishDate: Date;
|
|
downloadUrl: string;
|
|
infoUrl?: string; // Link to indexer's info page (for user reference)
|
|
infoHash?: string;
|
|
guid: string;
|
|
format?: 'M4B' | 'M4A' | 'MP3' | 'FLAC' | 'OTHER';
|
|
bitrate?: string;
|
|
hasChapters?: boolean;
|
|
flags?: string[]; // Indexer flags like "Freeleech", "Internal", etc.
|
|
protocol?: string; // 'torrent' or 'usenet' - from Prowlarr API
|
|
}
|
|
|
|
export interface AudiobookRequest {
|
|
title: string;
|
|
author: string;
|
|
narrator?: string;
|
|
durationMinutes?: number;
|
|
}
|
|
|
|
export interface IndexerFlagConfig {
|
|
name: string; // Flag name (e.g., "Freeleech")
|
|
modifier: number; // -100 to 100 (percentage)
|
|
}
|
|
|
|
export interface RankTorrentsOptions {
|
|
indexerPriorities?: Map<number, number>; // indexerId -> priority (1-25)
|
|
flagConfigs?: IndexerFlagConfig[]; // Flag bonus configurations
|
|
requireAuthor?: boolean; // Enforce author presence check (default: true)
|
|
stopWords?: string[]; // Language-specific stop words for matching
|
|
characterReplacements?: Record<string, string>; // Language-specific char replacements (e.g. ß→ss)
|
|
}
|
|
|
|
export interface EbookTorrentRequest {
|
|
title: string;
|
|
author: string;
|
|
preferredFormat: string; // User's preferred format (epub, pdf, etc.)
|
|
}
|
|
|
|
export interface RankEbookTorrentsOptions {
|
|
indexerPriorities?: Map<number, number>; // indexerId -> priority (1-25)
|
|
flagConfigs?: IndexerFlagConfig[]; // Flag bonus configurations
|
|
requireAuthor?: boolean; // Enforce author presence check (default: true)
|
|
stopWords?: string[]; // Language-specific stop words for matching
|
|
characterReplacements?: Record<string, string>; // Language-specific char replacements (e.g. ß→ss)
|
|
}
|
|
|
|
export interface BonusModifier {
|
|
type: 'indexer_priority' | 'indexer_flag' | 'custom';
|
|
value: number; // Multiplier (e.g., 0.4 for 40%)
|
|
points: number; // Calculated bonus points from this modifier
|
|
reason: string; // Human-readable explanation
|
|
}
|
|
|
|
export interface ScoreBreakdown {
|
|
formatScore: number;
|
|
sizeScore: number;
|
|
seederScore: number;
|
|
matchScore: number;
|
|
totalScore: number;
|
|
notes: string[];
|
|
}
|
|
|
|
export interface RankedTorrent extends TorrentResult {
|
|
score: number; // Base score (0-100)
|
|
bonusModifiers: BonusModifier[];
|
|
bonusPoints: number; // Sum of all bonus points
|
|
finalScore: number; // score + bonusPoints
|
|
rank: number;
|
|
breakdown: ScoreBreakdown;
|
|
}
|
|
|
|
export interface EbookScoreBreakdown {
|
|
formatScore: number; // 0-10 points (match preferred = 10, else 0)
|
|
sizeScore: number; // 0-15 points (inverted - smaller is better)
|
|
seederScore: number; // 0-15 points (same as audiobooks)
|
|
matchScore: number; // 0-60 points (same as audiobooks)
|
|
totalScore: number;
|
|
notes: string[];
|
|
}
|
|
|
|
export interface RankedEbookTorrent extends TorrentResult {
|
|
score: number; // Base score (0-100)
|
|
bonusModifiers: BonusModifier[];
|
|
bonusPoints: number; // Sum of all bonus points
|
|
finalScore: number; // score + bonusPoints
|
|
rank: number;
|
|
breakdown: EbookScoreBreakdown;
|
|
ebookFormat?: string; // Detected ebook format (epub, pdf, mobi, etc.)
|
|
}
|
|
|
|
export class RankingAlgorithm {
|
|
/**
|
|
* Rank all torrents and return sorted by finalScore (best first)
|
|
* @param torrents - Array of torrent results to rank
|
|
* @param audiobook - Audiobook request details for matching (includes durationMinutes for size scoring)
|
|
* @param options - Optional configuration for ranking behavior
|
|
*/
|
|
rankTorrents(
|
|
torrents: TorrentResult[],
|
|
audiobook: AudiobookRequest,
|
|
options: RankTorrentsOptions = {}
|
|
): RankedTorrent[] {
|
|
const {
|
|
indexerPriorities,
|
|
flagConfigs,
|
|
requireAuthor = true, // Safe default: require author in automatic mode
|
|
stopWords,
|
|
characterReplacements,
|
|
} = options;
|
|
// Filter out files < 20 MB (likely ebooks/samples)
|
|
const filteredTorrents = torrents.filter((torrent) => {
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
return sizeMB >= 20;
|
|
});
|
|
|
|
const ranked = filteredTorrents.map((torrent) => {
|
|
// Calculate base scores (0-100)
|
|
const formatScore = this.scoreFormat(torrent);
|
|
const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes);
|
|
const seederScore = this.scoreSeeders(torrent.seeders);
|
|
const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor, stopWords, characterReplacements);
|
|
|
|
const baseScore = formatScore + sizeScore + seederScore + matchScore;
|
|
|
|
// Calculate bonus modifiers
|
|
const bonusModifiers: BonusModifier[] = [];
|
|
|
|
// Indexer priority bonus (default: 10/25 = 40%)
|
|
if (torrent.indexerId !== undefined) {
|
|
const priority = indexerPriorities?.get(torrent.indexerId) ?? 10;
|
|
const modifier = priority / 25; // Convert 1-25 to 0.04-1.0 (4%-100%)
|
|
const points = baseScore * modifier;
|
|
|
|
bonusModifiers.push({
|
|
type: 'indexer_priority',
|
|
value: modifier,
|
|
points: points,
|
|
reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`,
|
|
});
|
|
}
|
|
|
|
// Flag bonuses/penalties
|
|
if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) {
|
|
torrent.flags.forEach(torrentFlag => {
|
|
// Case-insensitive, whitespace-trimmed matching
|
|
const matchingConfig = flagConfigs.find(cfg =>
|
|
cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase()
|
|
);
|
|
|
|
if (matchingConfig) {
|
|
const modifier = matchingConfig.modifier / 100; // Convert -100 to 100 → -1.0 to 1.0
|
|
const points = baseScore * modifier;
|
|
|
|
bonusModifiers.push({
|
|
type: 'indexer_flag',
|
|
value: modifier,
|
|
points: points,
|
|
reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`,
|
|
});
|
|
}
|
|
});
|
|
}
|
|
|
|
// Sum all bonus points
|
|
const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0);
|
|
|
|
// Calculate final score
|
|
const finalScore = baseScore + bonusPoints;
|
|
|
|
return {
|
|
...torrent,
|
|
score: baseScore,
|
|
bonusModifiers,
|
|
bonusPoints,
|
|
finalScore,
|
|
rank: 0, // Will be assigned after sorting
|
|
breakdown: {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore: baseScore,
|
|
notes: this.generateNotes(torrent, {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore: baseScore,
|
|
notes: [],
|
|
}, audiobook.durationMinutes),
|
|
},
|
|
};
|
|
});
|
|
|
|
// Sort by finalScore descending (best first), then by publishDate descending (newest first) for tiebreakers
|
|
ranked.sort((a, b) => {
|
|
// Primary: sort by final score
|
|
if (b.finalScore !== a.finalScore) {
|
|
return b.finalScore - a.finalScore;
|
|
}
|
|
// Tiebreaker: sort by publishDate (newest first)
|
|
return b.publishDate.getTime() - a.publishDate.getTime();
|
|
});
|
|
|
|
// Assign ranks
|
|
ranked.forEach((r, index) => {
|
|
r.rank = index + 1;
|
|
});
|
|
|
|
return ranked;
|
|
}
|
|
|
|
/**
|
|
* Get detailed scoring breakdown for a torrent
|
|
*/
|
|
getScoreBreakdown(
|
|
torrent: TorrentResult,
|
|
audiobook: AudiobookRequest,
|
|
requireAuthor: boolean = true
|
|
): ScoreBreakdown {
|
|
const formatScore = this.scoreFormat(torrent);
|
|
const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes);
|
|
const seederScore = this.scoreSeeders(torrent.seeders);
|
|
const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor);
|
|
const totalScore = formatScore + sizeScore + seederScore + matchScore;
|
|
|
|
return {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore,
|
|
notes: this.generateNotes(torrent, {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore,
|
|
notes: [],
|
|
}, audiobook.durationMinutes),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Score format quality (10 points max)
|
|
* Reduced from 25 to make room for data-driven size scoring
|
|
* M4B with chapters: 10 pts
|
|
* M4B without chapters: 9 pts
|
|
* FLAC: 7 pts (lossless audio, excellent quality)
|
|
* M4A: 6 pts
|
|
* MP3: 4 pts
|
|
* Other: 1 pt
|
|
*/
|
|
private scoreFormat(torrent: TorrentResult): number {
|
|
const format = this.detectFormat(torrent);
|
|
|
|
switch (format) {
|
|
case 'M4B':
|
|
return torrent.hasChapters !== false ? 10 : 9;
|
|
case 'FLAC':
|
|
return 7;
|
|
case 'M4A':
|
|
return 6;
|
|
case 'MP3':
|
|
return 4;
|
|
default:
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Score file size quality (15 points max)
|
|
* Uses book runtime and file size to validate correct file type
|
|
* Filters out ebooks and ranks audiobook quality
|
|
*
|
|
* @param torrent - Torrent result with size in bytes
|
|
* @param runtimeMinutes - Book runtime in minutes from Audnexus
|
|
* @returns 0-15 points based on MB/min ratio
|
|
*
|
|
* Algorithm:
|
|
* - >= 1.0 MB/min → 15/15 points (high quality baseline)
|
|
* - Linear scaling below 1.0 MB/min
|
|
* - 0 points if no runtime data (graceful degradation)
|
|
*
|
|
* Note: Files < 20 MB are pre-filtered in rankTorrents()
|
|
*/
|
|
private scoreSize(torrent: TorrentResult, runtimeMinutes: number | undefined): number {
|
|
// Graceful degradation: no runtime data = no size scoring
|
|
if (!runtimeMinutes || runtimeMinutes === 0) {
|
|
return 0;
|
|
}
|
|
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
const mbPerMin = sizeMB / runtimeMinutes;
|
|
|
|
// High quality baseline: 1.0 MB/min or higher gets full points
|
|
// This is ~64 kbps MP3 equivalent
|
|
if (mbPerMin >= 1.0) {
|
|
return 15;
|
|
}
|
|
|
|
// Linear scaling below baseline
|
|
// 0.5 MB/min = 7.5 points
|
|
// 0.3 MB/min = 4.5 points
|
|
return mbPerMin * 15;
|
|
}
|
|
|
|
/**
|
|
* Score seeder count (15 points max)
|
|
* Logarithmic scaling:
|
|
* 1 seeder: 0 points
|
|
* 10 seeders: 6 points
|
|
* 100 seeders: 12 points
|
|
* 1000+ seeders: 15 points
|
|
*
|
|
* Note: NZB/Usenet results don't have seeders concept - centralized servers provide guaranteed availability
|
|
*/
|
|
private scoreSeeders(seeders: number | undefined): number {
|
|
// Handle undefined/null (NZB results) - give full score since Usenet has centralized availability
|
|
if (seeders === undefined || seeders === null || isNaN(seeders)) {
|
|
return 15; // Full score - Usenet doesn't need seeders, content is on centralized servers
|
|
}
|
|
|
|
if (seeders === 0) return 0;
|
|
return Math.min(15, Math.log10(seeders + 1) * 6);
|
|
}
|
|
|
|
|
|
/**
|
|
* Normalize text for matching by handling CamelCase and punctuation separators
|
|
* "VirginaEvans TheCorrespondent" → "virgina evans the correspondent"
|
|
* "Twelve.Months-Jim.Butcher" → "twelve months jim butcher"
|
|
* "Author_Name_Book" → "author name book"
|
|
*/
|
|
private normalizeForMatching(text: string, characterReplacements?: Record<string, string>): string {
|
|
let result = text
|
|
// Split CamelCase FIRST (before lowercasing): "TheCorrespondent" → "The Correspondent"
|
|
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
.toLowerCase();
|
|
// Apply language-specific character replacements before NFD (e.g. ß→ss)
|
|
if (characterReplacements) {
|
|
for (const [from, to] of Object.entries(characterReplacements)) {
|
|
result = result.replace(new RegExp(from, 'g'), to);
|
|
}
|
|
}
|
|
return result
|
|
// NFD normalization: convert accented chars to ASCII base forms
|
|
// e.g. "uber" from "uber", "senor" from "senor", "cafe" from "cafe"
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
// Replace underscores with spaces (must be explicit since \w includes _)
|
|
.replace(/_/g, ' ')
|
|
// Replace other punctuation/separators with spaces (preserves apostrophes in contractions)
|
|
.replace(/[^\w\s']/g, ' ')
|
|
// Collapse multiple spaces
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
/**
|
|
* Score title/author match quality (60 points max)
|
|
* Title similarity: 0-45 points (heavily weighted!)
|
|
* Author presence: 0-15 points
|
|
*/
|
|
private scoreMatch(
|
|
torrent: TorrentResult,
|
|
audiobook: AudiobookRequest,
|
|
requireAuthor: boolean = true,
|
|
customStopWords?: string[],
|
|
characterReplacements?: Record<string, string>
|
|
): number {
|
|
// Normalize for matching (handles CamelCase, punctuation separators, diacritics)
|
|
const torrentTitle = this.normalizeForMatching(torrent.title, characterReplacements);
|
|
const requestTitle = this.normalizeForMatching(audiobook.title, characterReplacements);
|
|
|
|
// Parse authors from RAW string first (preserving commas for splitting)
|
|
// Then normalize individual authors for matching
|
|
const requestAuthorRaw = audiobook.author.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
const parsedAuthors = requestAuthorRaw
|
|
.split(/,|&| and | - /)
|
|
.map(a => a.trim())
|
|
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
|
|
|
// Normalize parsed authors for matching (handles CamelCase in author names)
|
|
const normalizedAuthors = parsedAuthors.map(a => this.normalizeForMatching(a, characterReplacements));
|
|
// Combined normalized author string for fuzzy matching
|
|
const requestAuthorNormalized = normalizedAuthors.join(' ');
|
|
|
|
// ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ==========
|
|
// Extract significant words (filter out common stop words)
|
|
// Use provided language-specific stop words, or fall back to English defaults
|
|
const stopWords = customStopWords || ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for'];
|
|
|
|
const extractWords = (text: string, stopList: string[]): string[] => {
|
|
let processed = text
|
|
// Split CamelCase FIRST: "TheCorrespondent" → "The Correspondent"
|
|
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
|
.toLowerCase();
|
|
// Apply language-specific character replacements before NFD
|
|
if (characterReplacements) {
|
|
for (const [from, to] of Object.entries(characterReplacements)) {
|
|
processed = processed.replace(new RegExp(from, 'g'), to);
|
|
}
|
|
}
|
|
return processed
|
|
// NFD normalization for accented characters
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
// Replace underscores with spaces (must be explicit since \w includes _)
|
|
.replace(/_/g, ' ')
|
|
// Remove other punctuation (but keep apostrophes for contractions)
|
|
.replace(/[^\w\s']/g, ' ')
|
|
.split(/\s+/)
|
|
.filter(word => word.length > 0 && !stopList.includes(word));
|
|
};
|
|
|
|
// Separate required words (outside parentheses/brackets/colon subtitles) from optional words
|
|
// This handles common patterns like:
|
|
// "Title (Subtitle)" where subtitle may be omitted
|
|
// "Title: Series Name" where Audible appends series names after a colon
|
|
// Note: Run on ORIGINAL title to preserve brackets/colons, then normalize the result
|
|
const separateRequiredOptional = (title: string): { required: string; optional: string } => {
|
|
// Work with original title format for bracket/colon detection
|
|
const originalTitle = audiobook.title.toLowerCase();
|
|
|
|
// Extract content in parentheses/brackets as optional
|
|
const optionalPattern = /[(\[{]([^)\]}]+)[)\]}]/g;
|
|
const optionalMatches: string[] = [];
|
|
let match;
|
|
|
|
while ((match = optionalPattern.exec(originalTitle)) !== null) {
|
|
optionalMatches.push(match[1]);
|
|
}
|
|
|
|
// Remove parenthetical/bracketed content to get the non-bracketed portion
|
|
let requiredRaw = originalTitle.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim();
|
|
|
|
// Treat content after a colon as optional (Audible commonly appends series names)
|
|
// e.g., "The Finest Edge of Twilight: Dungeons & Dragons" → required: title, optional: series
|
|
const colonIndex = requiredRaw.indexOf(':');
|
|
if (colonIndex > 0 && colonIndex < requiredRaw.length - 1) {
|
|
const afterColon = requiredRaw.substring(colonIndex + 1).trim();
|
|
if (afterColon.length > 0) {
|
|
optionalMatches.push(afterColon);
|
|
}
|
|
requiredRaw = requiredRaw.substring(0, colonIndex).trim();
|
|
}
|
|
|
|
// Normalize the required portion (handles CamelCase, punctuation)
|
|
const required = this.normalizeForMatching(requiredRaw, characterReplacements);
|
|
const optional = optionalMatches.join(' ');
|
|
|
|
return { required, optional };
|
|
};
|
|
|
|
const { required: requiredTitle, optional: optionalTitle } = separateRequiredOptional(requestTitle);
|
|
|
|
// Extract words from required portion only for coverage check
|
|
const requiredWords = extractWords(requiredTitle, stopWords);
|
|
const torrentWords = extractWords(torrentTitle, stopWords);
|
|
|
|
// Calculate word coverage: how many REQUIRED words appear in TORRENT
|
|
if (requiredWords.length === 0) {
|
|
// Edge case: title is only stop words or only optional content, skip filter
|
|
// Fall through to normal scoring
|
|
} else {
|
|
const matchedWords = requiredWords.filter(word => torrentWords.includes(word));
|
|
const coverage = matchedWords.length / requiredWords.length;
|
|
|
|
// HARD REQUIREMENT: Must have 80%+ coverage of REQUIRED words
|
|
if (coverage < 0.80) {
|
|
// Automatic rejection - doesn't contain enough of the requested words
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// ========== STAGE 1.5: AUTHOR PRESENCE CHECK (OPTIONAL) ==========
|
|
// Only enforced in automatic mode (requireAuthor: true)
|
|
// Interactive search (requireAuthor: false) shows all results
|
|
if (requireAuthor && !this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors)) {
|
|
// No high-confidence author match → reject to prevent wrong-author matches
|
|
return 0;
|
|
}
|
|
|
|
// ========== STAGE 2: TITLE MATCHING (0-35 points) ==========
|
|
let titleScore = 0;
|
|
|
|
// Keep original torrent title (lowercased only) for metadata marker detection
|
|
// Markers like [ ] ( ) : are removed by normalization but needed for suffix validation
|
|
const torrentTitleOriginal = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim();
|
|
|
|
// Try matching with full title first, then fall back to required title (without parentheses)
|
|
const titlesToTry = [requestTitle];
|
|
if (requiredTitle !== requestTitle) {
|
|
titlesToTry.push(requiredTitle); // Add required-only version if different
|
|
}
|
|
|
|
let bestMatch = false;
|
|
for (const titleToMatch of titlesToTry) {
|
|
if (torrentTitle.includes(titleToMatch)) {
|
|
// Found the title, but is it the complete title or part of a longer one?
|
|
const titleIndex = torrentTitle.indexOf(titleToMatch);
|
|
const beforeTitle = torrentTitle.substring(0, titleIndex);
|
|
const afterTitle = torrentTitle.substring(titleIndex + titleToMatch.length);
|
|
|
|
// For metadata marker detection, try to find where the title starts in the ORIGINAL string
|
|
// Search for key words from the title to locate position in original
|
|
const titleWords = titleToMatch.split(/\s+/).filter(w => w.length > 2);
|
|
let afterTitleOriginal = '';
|
|
if (titleWords.length > 0) {
|
|
// Find the last significant title word in the original string
|
|
const lastTitleWord = titleWords[titleWords.length - 1];
|
|
const lastWordIdxOriginal = torrentTitleOriginal.lastIndexOf(lastTitleWord);
|
|
if (lastWordIdxOriginal !== -1) {
|
|
afterTitleOriginal = torrentTitleOriginal.substring(lastWordIdxOriginal + lastTitleWord.length);
|
|
}
|
|
}
|
|
|
|
// Extract significant words BEFORE the matched title
|
|
const beforeWords = extractWords(beforeTitle, stopWords);
|
|
|
|
// Title is complete if:
|
|
// 1. Acceptable prefix (no words, OR structured metadata like "Author - Series - ")
|
|
// 2. Followed by clear metadata markers (not "'s Secret" or " Is Watching")
|
|
// Check ORIGINAL title for metadata markers ([ ] ( ) etc. not normalized away)
|
|
const metadataMarkers = [' by ', ' - ', ' [', ' (', ' {', ' :', ','];
|
|
|
|
// Check if afterTitle starts with any author name (handles space-separated format like "Title Author Year")
|
|
const afterStartsWithAuthor = normalizedAuthors.some(author =>
|
|
author.length > 2 && afterTitle.trim().startsWith(author)
|
|
);
|
|
|
|
// Check metadata markers in both normalized and original suffixes
|
|
const hasMetadataSuffix = afterTitle === '' ||
|
|
metadataMarkers.some(marker => afterTitle.startsWith(marker)) ||
|
|
metadataMarkers.some(marker => afterTitleOriginal.startsWith(marker)) ||
|
|
afterStartsWithAuthor;
|
|
|
|
// Check prefix validity:
|
|
// - No words before = clean match
|
|
// - Title preceded by separator (` - `, `: `) = structured metadata (Author - Series - Title)
|
|
// - Author name in prefix = author attribution before title
|
|
const hasNoWordsPrefix = beforeWords.length === 0;
|
|
|
|
// Check if title is immediately preceded by a metadata separator
|
|
// This handles "Author - Series - 01 - Title" patterns
|
|
// Check both normalized and original strings for separators
|
|
const precedingText = beforeTitle.trimEnd();
|
|
|
|
// Also check original string for separators that got normalized away (like colons)
|
|
let beforeTitleOriginal = '';
|
|
if (titleWords.length > 0) {
|
|
const firstTitleWord = titleWords[0];
|
|
const firstWordIdxOriginal = torrentTitleOriginal.indexOf(firstTitleWord);
|
|
if (firstWordIdxOriginal !== -1) {
|
|
beforeTitleOriginal = torrentTitleOriginal.substring(0, firstWordIdxOriginal).trimEnd();
|
|
}
|
|
}
|
|
|
|
const titlePrecededBySeparator =
|
|
precedingText.endsWith('-') ||
|
|
precedingText.endsWith(':') ||
|
|
precedingText.endsWith('—') ||
|
|
beforeTitleOriginal.endsWith('-') ||
|
|
beforeTitleOriginal.endsWith(':') ||
|
|
beforeTitleOriginal.endsWith('—');
|
|
|
|
// Check if any author name appears in the prefix
|
|
// This handles "Author Name - Title" patterns
|
|
const authorInPrefix = normalizedAuthors.some(author =>
|
|
author.length > 2 && beforeTitle.includes(author)
|
|
);
|
|
|
|
const hasAcceptablePrefix =
|
|
hasNoWordsPrefix ||
|
|
titlePrecededBySeparator ||
|
|
authorInPrefix;
|
|
|
|
const isCompleteTitle = hasAcceptablePrefix && hasMetadataSuffix;
|
|
|
|
if (isCompleteTitle) {
|
|
// Complete title match → full points
|
|
titleScore = 45;
|
|
bestMatch = true;
|
|
break; // Found a good match, stop trying
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!bestMatch) {
|
|
// No complete match found, use fuzzy similarity as fallback
|
|
// Try against full title first, then required title
|
|
const fuzzyScores = titlesToTry.map(title => compareTwoStrings(title, torrentTitle));
|
|
titleScore = Math.max(...fuzzyScores) * 45;
|
|
}
|
|
|
|
// ========== STAGE 3: AUTHOR MATCHING (0-15 points) ==========
|
|
// Check how many authors appear in torrent title (exact substring match)
|
|
const authorMatches = normalizedAuthors.filter(author =>
|
|
torrentTitle.includes(author)
|
|
);
|
|
|
|
let authorScore = 0;
|
|
if (authorMatches.length > 0) {
|
|
// Exact substring match → proportional credit
|
|
authorScore = (authorMatches.length / normalizedAuthors.length) * 15;
|
|
} else {
|
|
// No exact match → use fuzzy similarity for partial credit
|
|
authorScore = compareTwoStrings(requestAuthorNormalized, torrentTitle) * 15;
|
|
}
|
|
|
|
return Math.min(60, titleScore + authorScore);
|
|
}
|
|
|
|
/**
|
|
* Check if author is present in torrent title with high confidence
|
|
* Uses pre-parsed and normalized authors array
|
|
*
|
|
* @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching)
|
|
* @param normalizedAuthors - Array of normalized author names (roles already filtered)
|
|
* @returns true if at least ONE author is present with high confidence
|
|
*/
|
|
private checkAuthorPresenceWithParsed(torrentTitle: string, normalizedAuthors: string[]): boolean {
|
|
// At least ONE author must match with high confidence
|
|
return normalizedAuthors.some(author => {
|
|
// Check 1: Exact substring match (works well now that both are normalized)
|
|
if (torrentTitle.includes(author)) {
|
|
return true;
|
|
}
|
|
|
|
// Check 2: High fuzzy similarity (≥ 0.85)
|
|
// Handles: "J.K. Rowling" vs "J. K. Rowling" vs "JK Rowling"
|
|
// Also handles: "Dennis E. Taylor" vs "Dennis Taylor"
|
|
const similarity = compareTwoStrings(author, torrentTitle);
|
|
if (similarity >= 0.85) {
|
|
return true;
|
|
}
|
|
|
|
// Check 3: Core name components (first + last name present within 30 chars)
|
|
// Handles: "Sanderson, Brandon" vs "Brandon Sanderson"
|
|
// Handles: "Brandon R. Sanderson" vs "Brandon Sanderson"
|
|
// Now also handles: "VirginaEvans" → "virgina evans" (after normalization)
|
|
const words = author.split(/\s+/).filter(w => w.length > 1);
|
|
if (words.length >= 2) {
|
|
const firstName = words[0];
|
|
const lastName = words[words.length - 1];
|
|
|
|
const firstIdx = torrentTitle.indexOf(firstName);
|
|
const lastIdx = torrentTitle.indexOf(lastName);
|
|
|
|
// Both components present and reasonably close?
|
|
if (firstIdx !== -1 && lastIdx !== -1) {
|
|
const distance = Math.abs(lastIdx - firstIdx);
|
|
if (distance <= 30) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Check if author is present in torrent title with high confidence
|
|
* Handles variations: middle initials, spacing, punctuation, name order, CamelCase
|
|
*
|
|
* @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching)
|
|
* @param requestAuthor - Raw author string (will be parsed and normalized internally)
|
|
* @returns true if at least ONE author is present with high confidence
|
|
*/
|
|
private checkAuthorPresence(torrentTitle: string, requestAuthor: string, characterReplacements?: Record<string, string>): boolean {
|
|
// Parse multiple authors (same logic as Stage 3 author matching)
|
|
const authors = requestAuthor
|
|
.split(/,|&| and | - /)
|
|
.map(a => a.trim())
|
|
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
|
|
|
// Normalize each author for matching
|
|
const normalizedAuthors = authors.map(a => this.normalizeForMatching(a, characterReplacements));
|
|
|
|
return this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors);
|
|
}
|
|
|
|
/**
|
|
* Detect format from torrent title
|
|
*/
|
|
private detectFormat(torrent: TorrentResult): 'M4B' | 'M4A' | 'MP3' | 'FLAC' | 'OTHER' {
|
|
// Use explicit format if provided
|
|
if (torrent.format) {
|
|
return torrent.format;
|
|
}
|
|
|
|
const title = torrent.title.toUpperCase();
|
|
|
|
// Check for format keywords in title
|
|
if (title.includes('M4B')) return 'M4B';
|
|
if (title.includes('M4A')) return 'M4A';
|
|
if (title.includes('MP3')) return 'MP3';
|
|
if (title.includes('FLAC')) return 'FLAC';
|
|
|
|
// Default to OTHER if no format detected
|
|
return 'OTHER';
|
|
}
|
|
|
|
/**
|
|
* Generate human-readable notes about scoring
|
|
*/
|
|
private generateNotes(
|
|
torrent: TorrentResult,
|
|
breakdown: ScoreBreakdown,
|
|
runtimeMinutes?: number
|
|
): string[] {
|
|
const notes: string[] = [];
|
|
|
|
// Format notes
|
|
const format = this.detectFormat(torrent);
|
|
if (format === 'M4B') {
|
|
notes.push('Excellent format (M4B)');
|
|
if (torrent.hasChapters !== false) {
|
|
notes.push('Has chapter markers');
|
|
}
|
|
} else if (format === 'FLAC') {
|
|
notes.push('Lossless format (FLAC)');
|
|
} else if (format === 'M4A') {
|
|
notes.push('Good format (M4A)');
|
|
} else if (format === 'MP3') {
|
|
notes.push('Acceptable format (MP3)');
|
|
} else {
|
|
notes.push('Unknown or uncommon format');
|
|
}
|
|
|
|
// Size notes
|
|
if (runtimeMinutes && runtimeMinutes > 0) {
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
const mbPerMin = sizeMB / runtimeMinutes;
|
|
|
|
if (mbPerMin >= 1.5) {
|
|
notes.push('✓ Premium quality (high bitrate)');
|
|
} else if (mbPerMin >= 1.0) {
|
|
notes.push('✓ High quality');
|
|
} else if (mbPerMin >= 0.5) {
|
|
notes.push('Standard quality');
|
|
} else if (mbPerMin >= 0.3) {
|
|
notes.push('⚠️ Low quality (low bitrate)');
|
|
} else {
|
|
notes.push('⚠️ Very low quality - may be ebook');
|
|
}
|
|
}
|
|
|
|
// Seeder notes (skip for NZB/Usenet results which don't have seeders)
|
|
if (torrent.seeders !== undefined && torrent.seeders !== null && !isNaN(torrent.seeders)) {
|
|
if (torrent.seeders === 0) {
|
|
notes.push('⚠️ No seeders available');
|
|
} else if (torrent.seeders < 5) {
|
|
notes.push(`Low seeders (${torrent.seeders})`);
|
|
} else if (torrent.seeders >= 50) {
|
|
notes.push(`Excellent availability (${torrent.seeders} seeders)`);
|
|
}
|
|
}
|
|
|
|
// Match notes (now worth 60 points!)
|
|
if (breakdown.matchScore < 24) {
|
|
notes.push('⚠️ Poor title/author match');
|
|
} else if (breakdown.matchScore < 42) {
|
|
notes.push('⚠️ Weak title/author match');
|
|
} else if (breakdown.matchScore >= 54) {
|
|
notes.push('✓ Excellent title/author match');
|
|
}
|
|
|
|
// Overall quality assessment
|
|
if (breakdown.totalScore >= 75) {
|
|
notes.push('✓ Excellent choice');
|
|
} else if (breakdown.totalScore >= 55) {
|
|
notes.push('✓ Good choice');
|
|
} else if (breakdown.totalScore < 35) {
|
|
notes.push('⚠️ Consider reviewing this choice');
|
|
}
|
|
|
|
return notes;
|
|
}
|
|
|
|
// =========================================================================
|
|
// EBOOK TORRENT RANKING (for indexer results)
|
|
// Reuses scoreMatch() and scoreSeeders() from audiobook ranking
|
|
// Uses ebook-specific format and size scoring
|
|
// =========================================================================
|
|
|
|
/**
|
|
* Rank ebook torrents from indexers
|
|
* Reuses title/author matching and seeder scoring from audiobook ranking
|
|
* Uses ebook-specific format scoring (10 pts for match, 0 otherwise)
|
|
* Uses inverted size scoring (smaller = better, > 20MB filtered)
|
|
*
|
|
* @param torrents - Array of torrent results from Prowlarr
|
|
* @param ebook - Ebook request details (title, author, preferredFormat)
|
|
* @param options - Optional configuration for ranking behavior
|
|
*/
|
|
rankEbookTorrents(
|
|
torrents: TorrentResult[],
|
|
ebook: EbookTorrentRequest,
|
|
options: RankEbookTorrentsOptions = {}
|
|
): RankedEbookTorrent[] {
|
|
const {
|
|
indexerPriorities,
|
|
flagConfigs,
|
|
requireAuthor = true, // Safe default: require author in automatic mode
|
|
stopWords,
|
|
characterReplacements,
|
|
} = options;
|
|
|
|
// Filter out files > 20 MB (too large for ebooks)
|
|
const filteredTorrents = torrents.filter((torrent) => {
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
return sizeMB <= 20;
|
|
});
|
|
|
|
const ranked = filteredTorrents.map((torrent) => {
|
|
// Detect ebook format from title
|
|
const detectedFormat = this.detectEbookFormat(torrent);
|
|
|
|
// Calculate base scores (0-100)
|
|
// Reuse scoreMatch and scoreSeeders from audiobook ranking
|
|
const formatScore = this.scoreEbookFormat(torrent, ebook.preferredFormat);
|
|
const sizeScore = this.scoreEbookSize(torrent);
|
|
const seederScore = this.scoreSeeders(torrent.seeders);
|
|
const matchScore = this.scoreMatch(torrent, {
|
|
title: ebook.title,
|
|
author: ebook.author,
|
|
}, requireAuthor, stopWords, characterReplacements);
|
|
|
|
const baseScore = formatScore + sizeScore + seederScore + matchScore;
|
|
|
|
// Calculate bonus modifiers (same as audiobooks)
|
|
const bonusModifiers: BonusModifier[] = [];
|
|
|
|
// Indexer priority bonus (default: 10/25 = 40%)
|
|
if (torrent.indexerId !== undefined) {
|
|
const priority = indexerPriorities?.get(torrent.indexerId) ?? 10;
|
|
const modifier = priority / 25; // Convert 1-25 to 0.04-1.0 (4%-100%)
|
|
const points = baseScore * modifier;
|
|
|
|
bonusModifiers.push({
|
|
type: 'indexer_priority',
|
|
value: modifier,
|
|
points: points,
|
|
reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`,
|
|
});
|
|
}
|
|
|
|
// Flag bonuses/penalties (same as audiobooks)
|
|
if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) {
|
|
torrent.flags.forEach(torrentFlag => {
|
|
const matchingConfig = flagConfigs.find(cfg =>
|
|
cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase()
|
|
);
|
|
|
|
if (matchingConfig) {
|
|
const modifier = matchingConfig.modifier / 100;
|
|
const points = baseScore * modifier;
|
|
|
|
bonusModifiers.push({
|
|
type: 'indexer_flag',
|
|
value: modifier,
|
|
points: points,
|
|
reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`,
|
|
});
|
|
}
|
|
});
|
|
}
|
|
|
|
// Sum all bonus points
|
|
const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0);
|
|
|
|
// Calculate final score
|
|
const finalScore = baseScore + bonusPoints;
|
|
|
|
return {
|
|
...torrent,
|
|
score: baseScore,
|
|
bonusModifiers,
|
|
bonusPoints,
|
|
finalScore,
|
|
rank: 0, // Will be assigned after sorting
|
|
breakdown: {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore: baseScore,
|
|
notes: this.generateEbookNotes(torrent, {
|
|
formatScore,
|
|
sizeScore,
|
|
seederScore,
|
|
matchScore,
|
|
totalScore: baseScore,
|
|
notes: [],
|
|
}, ebook.preferredFormat),
|
|
},
|
|
ebookFormat: detectedFormat !== 'unknown' ? detectedFormat : undefined,
|
|
};
|
|
});
|
|
|
|
// Sort by finalScore descending (best first), then by publishDate descending (newest first)
|
|
ranked.sort((a, b) => {
|
|
if (b.finalScore !== a.finalScore) {
|
|
return b.finalScore - a.finalScore;
|
|
}
|
|
return b.publishDate.getTime() - a.publishDate.getTime();
|
|
});
|
|
|
|
// Assign ranks
|
|
ranked.forEach((r, index) => {
|
|
r.rank = index + 1;
|
|
});
|
|
|
|
return ranked;
|
|
}
|
|
|
|
/**
|
|
* Score ebook format (10 points max)
|
|
* Full points for matching preferred format, 0 otherwise
|
|
*/
|
|
private scoreEbookFormat(torrent: TorrentResult, preferredFormat: string): number {
|
|
const detectedFormat = this.detectEbookFormat(torrent);
|
|
const preferred = preferredFormat.toLowerCase();
|
|
|
|
// Exact match = full points, otherwise 0
|
|
if (detectedFormat === preferred) {
|
|
return 10;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Score ebook file size (15 points max, inverted - smaller is better)
|
|
* < 5 MB = 15 pts (full)
|
|
* 5-15 MB = 10 pts
|
|
* 15-20 MB = 5 pts
|
|
* > 20 MB = filtered out (not scored)
|
|
*/
|
|
private scoreEbookSize(torrent: TorrentResult): number {
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
|
|
if (sizeMB < 5) {
|
|
return 15; // Optimal size for ebooks
|
|
} else if (sizeMB <= 15) {
|
|
return 10; // Acceptable, may have images
|
|
} else if (sizeMB <= 20) {
|
|
return 5; // Large but within limit
|
|
}
|
|
|
|
// > 20 MB should have been filtered, but return 0 as safety
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Detect ebook format from torrent title
|
|
* Handles formats in various positions: .epub, (epub), [epub], " epub"
|
|
*/
|
|
private detectEbookFormat(torrent: TorrentResult): string {
|
|
const title = torrent.title.toLowerCase();
|
|
|
|
// Check for common ebook format extensions/keywords
|
|
// Patterns: .format, (format), [format], " format", "_format"
|
|
const formats = ['epub', 'pdf', 'mobi', 'azw3', 'azw', 'fb2', 'cbz', 'cbr'];
|
|
|
|
for (const format of formats) {
|
|
if (
|
|
title.includes(`.${format}`) || // file.epub
|
|
title.includes(`(${format})`) || // (epub)
|
|
title.includes(`[${format}]`) || // [epub]
|
|
title.includes(` ${format}`) || // " epub" (space before)
|
|
title.includes(`_${format}`) || // _epub (underscore)
|
|
title.endsWith(format) // ends with format
|
|
) {
|
|
return format;
|
|
}
|
|
}
|
|
|
|
// Default to unknown
|
|
return 'unknown';
|
|
}
|
|
|
|
/**
|
|
* Generate human-readable notes for ebook scoring
|
|
*/
|
|
private generateEbookNotes(
|
|
torrent: TorrentResult,
|
|
breakdown: EbookScoreBreakdown,
|
|
preferredFormat: string
|
|
): string[] {
|
|
const notes: string[] = [];
|
|
|
|
// Format notes
|
|
const detectedFormat = this.detectEbookFormat(torrent);
|
|
if (breakdown.formatScore === 10) {
|
|
notes.push(`✓ Preferred format (${detectedFormat.toUpperCase()})`);
|
|
} else if (detectedFormat !== 'unknown') {
|
|
notes.push(`Different format (${detectedFormat.toUpperCase()}, wanted ${preferredFormat.toUpperCase()})`);
|
|
} else {
|
|
notes.push('⚠️ Unknown format');
|
|
}
|
|
|
|
// Size notes
|
|
const sizeMB = torrent.size / (1024 * 1024);
|
|
if (sizeMB < 5) {
|
|
notes.push('✓ Optimal file size');
|
|
} else if (sizeMB <= 15) {
|
|
notes.push('Good file size (may have images)');
|
|
} else if (sizeMB <= 20) {
|
|
notes.push('⚠️ Large file size');
|
|
}
|
|
|
|
// Seeder notes (same logic as audiobooks)
|
|
if (torrent.seeders !== undefined && torrent.seeders !== null && !isNaN(torrent.seeders)) {
|
|
if (torrent.seeders === 0) {
|
|
notes.push('⚠️ No seeders available');
|
|
} else if (torrent.seeders < 5) {
|
|
notes.push(`Low seeders (${torrent.seeders})`);
|
|
} else if (torrent.seeders >= 50) {
|
|
notes.push(`Excellent availability (${torrent.seeders} seeders)`);
|
|
}
|
|
}
|
|
|
|
// Match notes (same thresholds as audiobooks)
|
|
if (breakdown.matchScore < 24) {
|
|
notes.push('⚠️ Poor title/author match');
|
|
} else if (breakdown.matchScore < 42) {
|
|
notes.push('⚠️ Weak title/author match');
|
|
} else if (breakdown.matchScore >= 54) {
|
|
notes.push('✓ Excellent title/author match');
|
|
}
|
|
|
|
// Overall quality assessment
|
|
if (breakdown.totalScore >= 75) {
|
|
notes.push('✓ Excellent choice');
|
|
} else if (breakdown.totalScore >= 55) {
|
|
notes.push('✓ Good choice');
|
|
} else if (breakdown.totalScore < 35) {
|
|
notes.push('⚠️ Consider reviewing this choice');
|
|
}
|
|
|
|
return notes;
|
|
}
|
|
}
|
|
|
|
// =========================================================================
|
|
// EBOOK RANKING (simplified algorithm for ebook search results)
|
|
// =========================================================================
|
|
|
|
export interface EbookResult {
|
|
md5: string;
|
|
title: string;
|
|
author: string;
|
|
format: string; // epub, pdf, mobi, etc.
|
|
fileSize?: number; // in bytes
|
|
downloadUrls: string[];
|
|
source: 'annas_archive' | 'prowlarr'; // Source of the result
|
|
indexerId?: number; // Prowlarr indexer ID (if applicable)
|
|
}
|
|
|
|
export interface EbookRequest {
|
|
title: string;
|
|
author: string;
|
|
preferredFormat: string; // User's preferred format (epub, pdf, etc.)
|
|
}
|
|
|
|
export interface RankedEbook extends EbookResult {
|
|
score: number; // Total score (0-100)
|
|
rank: number;
|
|
breakdown: {
|
|
formatScore: number; // 0-40 points
|
|
sizeScore: number; // 0-30 points (inverted - smaller is better)
|
|
sourceScore: number; // 0-30 points (Anna's Archive priority)
|
|
notes: string[];
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Rank ebook search results
|
|
* Scoring priorities (inverted from audiobooks):
|
|
* - Format match: 40 points (matching preferred format)
|
|
* - Size: 30 points (smaller files = better, inverted from audiobooks)
|
|
* - Source: 30 points (Anna's Archive priority for reliability)
|
|
*/
|
|
export function rankEbooks(
|
|
results: EbookResult[],
|
|
request: EbookRequest
|
|
): RankedEbook[] {
|
|
const preferredFormat = request.preferredFormat.toLowerCase();
|
|
|
|
const ranked = results.map((result): RankedEbook => {
|
|
const notes: string[] = [];
|
|
|
|
// ========== FORMAT SCORING (0-40 points) ==========
|
|
// Exact format match gets full points
|
|
// Similar formats get partial credit
|
|
let formatScore = 0;
|
|
const resultFormat = result.format.toLowerCase();
|
|
|
|
if (resultFormat === preferredFormat) {
|
|
formatScore = 40;
|
|
notes.push(`✓ Preferred format (${result.format.toUpperCase()})`);
|
|
} else {
|
|
// Partial credit for compatible formats
|
|
const ebookFormatGroups = [
|
|
['epub', 'kepub'], // EPUB family
|
|
['mobi', 'azw', 'azw3'], // Kindle family
|
|
['pdf'], // PDF standalone
|
|
['fb2', 'fb2.zip'], // FB2 family
|
|
['cbz', 'cbr'], // Comic formats
|
|
];
|
|
|
|
const preferredGroup = ebookFormatGroups.find(g => g.includes(preferredFormat));
|
|
const resultGroup = ebookFormatGroups.find(g => g.includes(resultFormat));
|
|
|
|
if (preferredGroup && resultGroup && preferredGroup === resultGroup) {
|
|
formatScore = 30; // Same family
|
|
notes.push(`Similar format (${result.format.toUpperCase()})`);
|
|
} else if (resultFormat === 'epub') {
|
|
formatScore = 25; // EPUB is universally convertible
|
|
notes.push(`Convertible format (${result.format.toUpperCase()})`);
|
|
} else if (resultFormat === 'pdf') {
|
|
formatScore = 15; // PDF is common but less flexible
|
|
notes.push(`PDF format (less flexible)`);
|
|
} else {
|
|
formatScore = 10; // Other formats
|
|
notes.push(`Different format (${result.format.toUpperCase()})`);
|
|
}
|
|
}
|
|
|
|
// ========== SIZE SCORING (0-30 points, inverted) ==========
|
|
// For ebooks, smaller files are generally better (cleaner, no bloat)
|
|
// Typical ebook sizes: 0.5-5 MB (good), 5-20 MB (has images), 20+ MB (may have issues)
|
|
let sizeScore = 0;
|
|
|
|
if (result.fileSize !== undefined && result.fileSize > 0) {
|
|
const sizeMB = result.fileSize / (1024 * 1024);
|
|
|
|
if (sizeMB <= 2) {
|
|
sizeScore = 30; // Ideal size
|
|
notes.push('✓ Optimal file size');
|
|
} else if (sizeMB <= 5) {
|
|
sizeScore = 25; // Good size
|
|
notes.push('Good file size');
|
|
} else if (sizeMB <= 15) {
|
|
sizeScore = 20; // Has images, acceptable
|
|
notes.push('Larger file (may have images)');
|
|
} else if (sizeMB <= 50) {
|
|
sizeScore = 10; // Large, possibly bloated
|
|
notes.push('⚠️ Large file size');
|
|
} else {
|
|
sizeScore = 5; // Very large, suspicious
|
|
notes.push('⚠️ Very large file (may include extras)');
|
|
}
|
|
} else {
|
|
// No size info - give middle score
|
|
sizeScore = 15;
|
|
notes.push('File size unknown');
|
|
}
|
|
|
|
// ========== SOURCE SCORING (0-30 points) ==========
|
|
// Anna's Archive is the primary reliable source
|
|
// Future: Prowlarr indexers will get configurable priority
|
|
let sourceScore = 0;
|
|
|
|
if (result.source === 'annas_archive') {
|
|
sourceScore = 30; // Full points for Anna's Archive
|
|
notes.push('✓ Anna\'s Archive (reliable)');
|
|
} else if (result.source === 'prowlarr') {
|
|
// Future: Use indexer priority from config
|
|
sourceScore = 15; // Base score for Prowlarr results
|
|
notes.push('Prowlarr indexer');
|
|
}
|
|
|
|
const totalScore = formatScore + sizeScore + sourceScore;
|
|
|
|
return {
|
|
...result,
|
|
score: totalScore,
|
|
rank: 0, // Will be assigned after sorting
|
|
breakdown: {
|
|
formatScore,
|
|
sizeScore,
|
|
sourceScore,
|
|
notes,
|
|
},
|
|
};
|
|
});
|
|
|
|
// Sort by score descending
|
|
ranked.sort((a, b) => b.score - a.score);
|
|
|
|
// Assign ranks
|
|
ranked.forEach((r, index) => {
|
|
r.rank = index + 1;
|
|
});
|
|
|
|
return ranked;
|
|
}
|
|
|
|
// Singleton instance
|
|
let ranker: RankingAlgorithm | null = null;
|
|
|
|
export function getRankingAlgorithm(): RankingAlgorithm {
|
|
if (!ranker) {
|
|
ranker = new RankingAlgorithm();
|
|
}
|
|
return ranker;
|
|
}
|
|
|
|
/**
|
|
* Helper function to rank torrents using the singleton instance
|
|
*
|
|
* @param torrents - Array of torrent results to rank
|
|
* @param audiobook - Audiobook request details
|
|
* @param options - Optional ranking configuration
|
|
* @returns Ranked torrents with quality scores
|
|
*/
|
|
export function rankTorrents(
|
|
torrents: TorrentResult[],
|
|
audiobook: AudiobookRequest,
|
|
options?: RankTorrentsOptions
|
|
): (RankedTorrent & { qualityScore: number })[];
|
|
|
|
/**
|
|
* Helper function to rank torrents using the singleton instance (legacy signature)
|
|
* @deprecated Use options object instead
|
|
*/
|
|
export function rankTorrents(
|
|
torrents: TorrentResult[],
|
|
audiobook: AudiobookRequest,
|
|
indexerPriorities?: Map<number, number>,
|
|
flagConfigs?: IndexerFlagConfig[]
|
|
): (RankedTorrent & { qualityScore: number })[];
|
|
|
|
export function rankTorrents(
|
|
torrents: TorrentResult[],
|
|
audiobook: AudiobookRequest,
|
|
optionsOrPriorities?: RankTorrentsOptions | Map<number, number>,
|
|
flagConfigs?: IndexerFlagConfig[]
|
|
): (RankedTorrent & { qualityScore: number })[] {
|
|
const algorithm = getRankingAlgorithm();
|
|
|
|
// Handle both new options object and legacy parameters
|
|
let options: RankTorrentsOptions;
|
|
if (optionsOrPriorities instanceof Map) {
|
|
// Legacy call: rankTorrents(torrents, audiobook, priorities, flags)
|
|
options = {
|
|
indexerPriorities: optionsOrPriorities,
|
|
flagConfigs,
|
|
requireAuthor: true // Safe default
|
|
};
|
|
} else {
|
|
// New call: rankTorrents(torrents, audiobook, options)
|
|
options = optionsOrPriorities || {};
|
|
}
|
|
|
|
const ranked = algorithm.rankTorrents(torrents, audiobook, options);
|
|
|
|
// Add qualityScore field for UI compatibility (rounded score)
|
|
return ranked.map((r) => ({
|
|
...r,
|
|
qualityScore: Math.round(r.score),
|
|
}));
|
|
}
|
|
|
|
/**
|
|
* Helper function to rank ebook torrents using the singleton instance
|
|
*
|
|
* @param torrents - Array of torrent results from Prowlarr
|
|
* @param ebook - Ebook request details (title, author, preferredFormat)
|
|
* @param options - Optional ranking configuration
|
|
* @returns Ranked ebook torrents with quality scores
|
|
*/
|
|
export function rankEbookTorrents(
|
|
torrents: TorrentResult[],
|
|
ebook: EbookTorrentRequest,
|
|
options?: RankEbookTorrentsOptions
|
|
): (RankedEbookTorrent & { qualityScore: number })[] {
|
|
const algorithm = getRankingAlgorithm();
|
|
const ranked = algorithm.rankEbookTorrents(torrents, ebook, options || {});
|
|
|
|
// Add qualityScore field for UI compatibility (rounded score)
|
|
return ranked.map((r) => ({
|
|
...r,
|
|
qualityScore: Math.round(r.score),
|
|
}));
|
|
}
|