mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-03 12:50:09 +00:00
Implement file hash-based library matching and remove fuzzy ASIN matching
Adds file hash-based matching for Audiobookshelf library items to ensure 100% accurate ASIN assignment for RMAB-organized content. Removes fuzzy matching from library availability checks, making all matching ASIN-only to eliminate false positives and race conditions. Updates database schema, processors, and matcher utilities; adds new tests and documentation for the new matching strategy. Removes obsolete scripts, Dockerfile, and related tests; updates docker-compose for test environments.
This commit is contained in:
@@ -3,11 +3,10 @@
|
||||
* Documentation: documentation/integrations/audible.md
|
||||
*
|
||||
* Real-time matching between Audible books and library backends (Plex or Audiobookshelf).
|
||||
* Supports ASIN, ISBN, and fuzzy title/author matching.
|
||||
* ASIN-only matching for library availability checks (exact matches only).
|
||||
*/
|
||||
|
||||
import { prisma } from '@/lib/db';
|
||||
import { compareTwoStrings } from 'string-similarity';
|
||||
import { LibraryItem } from '@/lib/services/library';
|
||||
import { RMABLogger } from './logger';
|
||||
|
||||
@@ -28,43 +27,13 @@ export interface AudiobookMatchResult {
|
||||
author: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize audiobook title for matching by removing common suffixes/prefixes
|
||||
* that don't affect the core title identity.
|
||||
*/
|
||||
function normalizeTitle(title: string): string {
|
||||
let normalized = title.toLowerCase().trim();
|
||||
|
||||
// Remove common parenthetical additions (case-insensitive)
|
||||
normalized = normalized.replace(/\s*\(unabridged\)\s*/gi, ' ');
|
||||
normalized = normalized.replace(/\s*\(abridged\)\s*/gi, ' ');
|
||||
normalized = normalized.replace(/\s*\(full cast\)\s*/gi, ' ');
|
||||
normalized = normalized.replace(/\s*\(full-cast edition\)\s*/gi, ' ');
|
||||
normalized = normalized.replace(/\s*\(dramatized\)\s*/gi, ' ');
|
||||
normalized = normalized.replace(/\s*\(narrated by[^)]*\)\s*/gi, ' ');
|
||||
|
||||
// Remove common subtitle patterns
|
||||
normalized = normalized.replace(/:\s*a novel\s*$/gi, '');
|
||||
normalized = normalized.replace(/:\s*a thriller\s*$/gi, '');
|
||||
normalized = normalized.replace(/:\s*a memoir\s*$/gi, '');
|
||||
|
||||
// Remove book number suffixes (but keep them in main title if they're significant)
|
||||
// Only remove if they're clearly series indicators at the end
|
||||
normalized = normalized.replace(/,?\s*book\s+\d+\s*$/gi, '');
|
||||
normalized = normalized.replace(/:\s*book\s+\d+\s*$/gi, '');
|
||||
|
||||
// Clean up extra whitespace
|
||||
normalized = normalized.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return normalized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a matching audiobook in the Plex library for a given Audible audiobook.
|
||||
*
|
||||
* Matching logic (in order of priority):
|
||||
* 1. **ASIN in plexGuid** - Check if any Plex book's GUID contains the Audible ASIN (100% match)
|
||||
* 2. **Fuzzy matching** - Normalized title/author string similarity with 70% threshold
|
||||
* Matching logic (ASIN-only, exact matches):
|
||||
* 1. **ASIN in dedicated field** - Check if plexLibrary.asin matches (100% confidence)
|
||||
* 2. **ASIN in plexGuid** - Check if Plex GUID contains the Audible ASIN (backward compatibility)
|
||||
* 3. **No match** - Return null (no fuzzy fallback)
|
||||
*
|
||||
* @param audiobook - Audible audiobook to match
|
||||
* @returns Matched Plex library item or null
|
||||
@@ -72,25 +41,22 @@ function normalizeTitle(title: string): string {
|
||||
export async function findPlexMatch(
|
||||
audiobook: AudiobookMatchInput
|
||||
): Promise<AudiobookMatchResult | null> {
|
||||
// Query plex_library for potential matches
|
||||
// IMPORTANT: Search by TITLE ONLY (not author) because Plex often has narrator as author
|
||||
const titleSearchLength = Math.min(20, audiobook.title.length);
|
||||
// Query plex_library directly by ASIN (indexed O(1) lookup)
|
||||
// Check both dedicated asin field and plexGuid for backward compatibility
|
||||
const plexBooks = await prisma.plexLibrary.findMany({
|
||||
where: {
|
||||
title: {
|
||||
contains: audiobook.title.substring(0, titleSearchLength),
|
||||
mode: 'insensitive',
|
||||
},
|
||||
OR: [
|
||||
{ asin: audiobook.asin },
|
||||
{ plexGuid: { contains: audiobook.asin } },
|
||||
],
|
||||
},
|
||||
select: {
|
||||
plexGuid: true,
|
||||
plexRatingKey: true,
|
||||
title: true,
|
||||
author: true,
|
||||
asin: true, // Include ASIN field for direct matching
|
||||
isbn: true, // Include ISBN field for additional matching
|
||||
asin: true,
|
||||
},
|
||||
take: 20,
|
||||
});
|
||||
|
||||
// Build match result for logging
|
||||
@@ -107,9 +73,9 @@ export async function findPlexMatch(
|
||||
result: null,
|
||||
};
|
||||
|
||||
// If no candidates found, log and return null
|
||||
// If no ASIN matches found, log and return null
|
||||
if (plexBooks.length === 0) {
|
||||
matchResult.matchType = 'no_candidates';
|
||||
matchResult.matchType = 'no_asin_match';
|
||||
logger.debug('Matcher result', { MATCHER: matchResult });
|
||||
return null;
|
||||
}
|
||||
@@ -147,116 +113,8 @@ export async function findPlexMatch(
|
||||
}
|
||||
}
|
||||
|
||||
// FILTER OUT candidates with wrong ASINs (check both dedicated field and plexGuid)
|
||||
const ASIN_PATTERN = /[A-Z0-9]{10}/g;
|
||||
const rejectedAsins: string[] = [];
|
||||
const validCandidates = plexBooks.filter((plexBook) => {
|
||||
// Check dedicated ASIN field first (more reliable)
|
||||
if (plexBook.asin) {
|
||||
if (plexBook.asin.toLowerCase() !== audiobook.asin.toLowerCase()) {
|
||||
rejectedAsins.push(plexBook.asin);
|
||||
return false; // Wrong ASIN in dedicated field - reject
|
||||
}
|
||||
return true; // Correct ASIN in dedicated field - keep
|
||||
}
|
||||
|
||||
// Fall back to checking plexGuid for legacy Plex data
|
||||
if (!plexBook.plexGuid) return true;
|
||||
const asinsInGuid = plexBook.plexGuid.match(ASIN_PATTERN);
|
||||
if (!asinsInGuid || asinsInGuid.length === 0) return true;
|
||||
|
||||
const hasOurAsin = asinsInGuid.some(asin => asin === audiobook.asin);
|
||||
const hasOtherAsins = asinsInGuid.some(asin => asin !== audiobook.asin);
|
||||
|
||||
if (hasOtherAsins && !hasOurAsin) {
|
||||
rejectedAsins.push(...asinsInGuid);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
matchResult.asinFiltering = {
|
||||
beforeCount: plexBooks.length,
|
||||
afterCount: validCandidates.length,
|
||||
rejectedAsins: rejectedAsins.length > 0 ? rejectedAsins : undefined,
|
||||
};
|
||||
|
||||
if (validCandidates.length === 0) {
|
||||
matchResult.matchType = 'asin_filtered_all';
|
||||
logger.debug('Matcher result', { MATCHER: matchResult });
|
||||
return null;
|
||||
}
|
||||
|
||||
// Normalize the Audible title
|
||||
const normalizedAudibleTitle = normalizeTitle(audiobook.title);
|
||||
|
||||
// PRIORITY 2: Perform fuzzy matching
|
||||
const candidates = validCandidates.map((plexBook) => {
|
||||
const normalizedPlexTitle = normalizeTitle(plexBook.title);
|
||||
const titleScore = compareTwoStrings(normalizedAudibleTitle, normalizedPlexTitle);
|
||||
const authorScore = compareTwoStrings(
|
||||
audiobook.author.toLowerCase(),
|
||||
plexBook.author.toLowerCase()
|
||||
);
|
||||
|
||||
let narratorScore = 0;
|
||||
let usedNarratorMatch = false;
|
||||
if (audiobook.narrator) {
|
||||
narratorScore = compareTwoStrings(
|
||||
audiobook.narrator.toLowerCase(),
|
||||
plexBook.author.toLowerCase()
|
||||
);
|
||||
usedNarratorMatch = narratorScore > authorScore;
|
||||
}
|
||||
|
||||
const personScore = usedNarratorMatch ? narratorScore : authorScore;
|
||||
const overallScore = titleScore * 0.7 + personScore * 0.3;
|
||||
|
||||
return {
|
||||
plexBook,
|
||||
titleScore,
|
||||
authorScore,
|
||||
narratorScore,
|
||||
usedNarratorMatch,
|
||||
score: overallScore
|
||||
};
|
||||
});
|
||||
|
||||
// Sort by score descending
|
||||
candidates.sort((a, b) => b.score - a.score);
|
||||
const bestMatch = candidates[0];
|
||||
|
||||
// Add best match details to result
|
||||
matchResult.bestCandidate = {
|
||||
plexTitle: bestMatch.plexBook.title,
|
||||
plexAuthor: bestMatch.plexBook.author,
|
||||
plexGuid: bestMatch.plexBook.plexGuid,
|
||||
scores: {
|
||||
title: Math.round(bestMatch.titleScore * 100),
|
||||
author: Math.round(bestMatch.authorScore * 100),
|
||||
narrator: audiobook.narrator ? Math.round(bestMatch.narratorScore * 100) : null,
|
||||
usedMatch: bestMatch.usedNarratorMatch ? 'narrator' : 'author',
|
||||
overall: Math.round(bestMatch.score * 100),
|
||||
},
|
||||
threshold: 70,
|
||||
};
|
||||
|
||||
// Accept match if score >= 70%
|
||||
if (bestMatch && bestMatch.score >= 0.7) {
|
||||
matchResult.matchType = 'fuzzy';
|
||||
matchResult.matched = true;
|
||||
matchResult.result = {
|
||||
plexGuid: bestMatch.plexBook.plexGuid,
|
||||
plexTitle: bestMatch.plexBook.title,
|
||||
plexAuthor: bestMatch.plexBook.author,
|
||||
confidence: Math.round(bestMatch.score * 100),
|
||||
};
|
||||
logger.debug('Matcher result', { MATCHER: matchResult });
|
||||
return bestMatch.plexBook;
|
||||
}
|
||||
|
||||
// No match found
|
||||
matchResult.matchType = 'fuzzy_below_threshold';
|
||||
// No exact match found (shouldn't happen given the query, but defensive)
|
||||
matchResult.matchType = 'no_exact_match';
|
||||
logger.debug('Matcher result', { MATCHER: matchResult });
|
||||
return null;
|
||||
}
|
||||
@@ -384,10 +242,10 @@ function normalizeISBN(isbn: string): string {
|
||||
* Generic audiobook matching function that works with LibraryItem interface.
|
||||
* Works with any library backend (Plex, Audiobookshelf, etc.)
|
||||
*
|
||||
* Matching priority:
|
||||
* Matching priority (ASIN-only, exact matches):
|
||||
* 1. Exact ASIN match (100% confidence)
|
||||
* 2. Exact ISBN match (95% confidence)
|
||||
* 3. Fuzzy title/author match (70%+ threshold)
|
||||
* 3. No match - Return null (no fuzzy fallback)
|
||||
*
|
||||
* @param request - Audiobook request details
|
||||
* @param libraryItems - Items from library backend
|
||||
@@ -430,49 +288,15 @@ export function matchAudiobook(
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Fuzzy title/author match
|
||||
const normalizedRequestTitle = normalizeTitle(request.title);
|
||||
const normalizedRequestAuthor = request.author.toLowerCase();
|
||||
|
||||
const candidates = libraryItems.map(item => {
|
||||
const normalizedItemTitle = normalizeTitle(item.title);
|
||||
const normalizedItemAuthor = item.author.toLowerCase();
|
||||
|
||||
const titleScore = compareTwoStrings(normalizedRequestTitle, normalizedItemTitle);
|
||||
const authorScore = compareTwoStrings(normalizedRequestAuthor, normalizedItemAuthor);
|
||||
|
||||
// Weighted average: title is more important
|
||||
const overallScore = titleScore * 0.7 + authorScore * 0.3;
|
||||
|
||||
return { item, titleScore, authorScore, score: overallScore };
|
||||
});
|
||||
|
||||
// Sort by score and get best match
|
||||
candidates.sort((a, b) => b.score - a.score);
|
||||
const bestMatch = candidates[0];
|
||||
|
||||
// Accept if score >= 70%
|
||||
if (bestMatch && bestMatch.score >= 0.7) {
|
||||
logger.debug('Generic matcher result', {
|
||||
matchType: 'fuzzy',
|
||||
input: { title: request.title, author: request.author },
|
||||
matched: { title: bestMatch.item.title, author: bestMatch.item.author },
|
||||
scores: {
|
||||
title: Math.round(bestMatch.titleScore * 100),
|
||||
author: Math.round(bestMatch.authorScore * 100),
|
||||
overall: Math.round(bestMatch.score * 100)
|
||||
},
|
||||
confidence: Math.round(bestMatch.score * 100)
|
||||
});
|
||||
return bestMatch.item;
|
||||
}
|
||||
|
||||
// No match found
|
||||
// No match found (no ASIN/ISBN match, no fuzzy fallback)
|
||||
logger.debug('Generic matcher result', {
|
||||
matchType: 'no_match',
|
||||
input: { title: request.title, author: request.author },
|
||||
bestScore: bestMatch ? Math.round(bestMatch.score * 100) : 0,
|
||||
threshold: 70
|
||||
matchType: 'no_asin_isbn_match',
|
||||
input: {
|
||||
title: request.title,
|
||||
author: request.author,
|
||||
asin: request.asin || 'none',
|
||||
isbn: request.isbn || 'none'
|
||||
},
|
||||
});
|
||||
|
||||
return null;
|
||||
|
||||
@@ -10,7 +10,7 @@ import { exec, spawn } from 'child_process';
|
||||
import { promisify } from 'util';
|
||||
import path from 'path';
|
||||
import fs from 'fs/promises';
|
||||
import { JobLogger } from './job-logger';
|
||||
import { RMABLogger } from './logger';
|
||||
|
||||
const execPromise = promisify(exec);
|
||||
|
||||
@@ -79,7 +79,7 @@ export interface MergeResult {
|
||||
* This is more permissive and catches edge cases where filenames don't match patterns
|
||||
* but metadata (track numbers) provides correct ordering.
|
||||
*/
|
||||
export async function detectChapterFiles(files: string[], logger?: JobLogger): Promise<boolean> {
|
||||
export async function detectChapterFiles(files: string[], logger?: RMABLogger): Promise<boolean> {
|
||||
// Need at least 3 files to consider as multi-chapter audiobook
|
||||
// (2 files might be "Book" + "Credits", so require 3+)
|
||||
if (files.length < 3) {
|
||||
@@ -285,7 +285,7 @@ function detectBookTitle(files: { titleMetadata?: string }[]): string | null {
|
||||
*/
|
||||
export async function analyzeChapterFiles(
|
||||
filePaths: string[],
|
||||
logger?: JobLogger
|
||||
logger?: RMABLogger
|
||||
): Promise<ChapterFile[]> {
|
||||
await logger?.info(`Analyzing ${filePaths.length} chapter files...`);
|
||||
|
||||
@@ -484,7 +484,7 @@ async function executeFFmpegWithProgress(
|
||||
command: string,
|
||||
timeout: number,
|
||||
expectedDuration: number, // milliseconds
|
||||
logger?: JobLogger
|
||||
logger?: RMABLogger
|
||||
): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
// Parse the command to extract args (remove 'ffmpeg' and handle quotes)
|
||||
@@ -532,7 +532,7 @@ async function executeFFmpegWithProgress(
|
||||
const speed = speedMatch ? parseFloat(speedMatch[1]) : null;
|
||||
|
||||
const speedInfo = speed ? ` (${speed.toFixed(1)}x realtime)` : '';
|
||||
logger?.info(`Encoding progress: ${progressPercent}%${speedInfo} - ${formatDuration(currentTimeMs)} / ${formatDuration(expectedDuration)}`).catch(() => {});
|
||||
logger?.info(`Encoding progress: ${progressPercent}%${speedInfo} - ${formatDuration(currentTimeMs)} / ${formatDuration(expectedDuration)}`);
|
||||
|
||||
lastProgressLog = Date.now();
|
||||
lastProgressPercent = progressPercent;
|
||||
@@ -546,7 +546,7 @@ async function executeFFmpegWithProgress(
|
||||
if (code === 0) {
|
||||
// Check stderr for errors even if exit code is 0
|
||||
if (stderrBuffer.includes('Error') || stderrBuffer.includes('Invalid')) {
|
||||
logger?.warn(`FFmpeg completed but reported issues: ${stderrBuffer.substring(stderrBuffer.lastIndexOf('Error'), stderrBuffer.lastIndexOf('Error') + 200)}`).catch(() => {});
|
||||
logger?.warn(`FFmpeg completed but reported issues: ${stderrBuffer.substring(stderrBuffer.lastIndexOf('Error'), stderrBuffer.lastIndexOf('Error') + 200)}`);
|
||||
}
|
||||
resolve();
|
||||
} else {
|
||||
@@ -574,7 +574,7 @@ async function executeFFmpegWithProgress(
|
||||
export async function mergeChapters(
|
||||
chapters: ChapterFile[],
|
||||
options: MergeOptions,
|
||||
logger?: JobLogger
|
||||
logger?: RMABLogger
|
||||
): Promise<MergeResult> {
|
||||
if (chapters.length === 0) {
|
||||
await logger?.error('Chapter merge failed: No chapters provided');
|
||||
@@ -806,7 +806,7 @@ export async function mergeChapters(
|
||||
async function validateMergedFile(
|
||||
outputPath: string,
|
||||
expectedDuration: number, // milliseconds
|
||||
logger?: JobLogger
|
||||
logger?: RMABLogger
|
||||
): Promise<{ valid: boolean; error?: string; actualDuration?: number }> {
|
||||
try {
|
||||
await logger?.info('Validating merged file...');
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
import fs from 'fs/promises';
|
||||
import path from 'path';
|
||||
import axios from 'axios';
|
||||
import { createJobLogger, JobLogger } from './job-logger';
|
||||
import { tagMultipleFiles, checkFfmpegAvailable } from './metadata-tagger';
|
||||
import { RMABLogger } from './logger';
|
||||
|
||||
@@ -73,7 +72,7 @@ export class FileOrganizer {
|
||||
loggerConfig?: LoggerConfig
|
||||
): Promise<OrganizationResult> {
|
||||
// Create logger if config provided
|
||||
const logger = loggerConfig ? createJobLogger(loggerConfig.jobId, loggerConfig.context) : null;
|
||||
const logger = loggerConfig ? RMABLogger.forJob(loggerConfig.jobId, loggerConfig.context) : null;
|
||||
|
||||
const result: OrganizationResult = {
|
||||
success: false,
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
* File Hash Utility
|
||||
* Documentation: documentation/fixes/file-hash-matching.md
|
||||
*
|
||||
* Generates deterministic hashes of audio file collections for accurate library matching.
|
||||
* Used to match RMAB-organized audiobooks with Audiobookshelf library items.
|
||||
*/
|
||||
|
||||
import crypto from 'crypto';
|
||||
import path from 'path';
|
||||
|
||||
/**
|
||||
* Supported audio file extensions for hash generation
|
||||
*/
|
||||
const AUDIO_EXTENSIONS = ['.m4b', '.m4a', '.mp3', '.mp4', '.aa', '.aax'];
|
||||
|
||||
/**
|
||||
* Generates a SHA256 hash of audio filenames for library matching.
|
||||
*
|
||||
* Process:
|
||||
* 1. Extract basenames from file paths
|
||||
* 2. Filter to supported audio extensions
|
||||
* 3. Normalize to lowercase
|
||||
* 4. Sort alphabetically
|
||||
* 5. Generate SHA256 hash
|
||||
*
|
||||
* @param filePaths - Array of absolute or relative file paths
|
||||
* @returns 64-character hex string (SHA256 hash) or empty string if no audio files
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const hash = generateFilesHash([
|
||||
* '/path/to/Chapter 01.mp3',
|
||||
* '/path/to/Chapter 02.mp3',
|
||||
* '/path/to/cover.jpg' // Filtered out (not audio)
|
||||
* ]);
|
||||
* // Returns: "abc123def456..." (64 chars)
|
||||
* ```
|
||||
*/
|
||||
export function generateFilesHash(filePaths: string[]): string {
|
||||
if (!filePaths || filePaths.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Extract basenames and filter to audio files only
|
||||
const audioBasenames = filePaths
|
||||
.map((filePath) => path.basename(filePath))
|
||||
.filter((basename) => {
|
||||
const ext = path.extname(basename).toLowerCase();
|
||||
return AUDIO_EXTENSIONS.includes(ext);
|
||||
})
|
||||
.map((basename) => basename.toLowerCase()) // Normalize case
|
||||
.sort(); // Sort alphabetically for deterministic hash
|
||||
|
||||
// No audio files found
|
||||
if (audioBasenames.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Generate SHA256 hash
|
||||
const hash = crypto
|
||||
.createHash('sha256')
|
||||
.update(JSON.stringify(audioBasenames))
|
||||
.digest('hex');
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if a hash string is a valid SHA256 hash
|
||||
*/
|
||||
export function isValidHash(hash: string): boolean {
|
||||
return /^[a-f0-9]{64}$/i.test(hash);
|
||||
}
|
||||
@@ -1,66 +0,0 @@
|
||||
/**
|
||||
* Component: Job Logger Utility (Backward Compatibility)
|
||||
* Documentation: documentation/backend/services/jobs.md
|
||||
*
|
||||
* @deprecated Use RMABLogger.forJob() directly for new code.
|
||||
* This file provides backward compatibility for existing processors.
|
||||
*
|
||||
* Migration example:
|
||||
* ```typescript
|
||||
* // Before (deprecated)
|
||||
* const logger = jobId ? createJobLogger(jobId, 'Context') : null;
|
||||
* await logger?.info('message');
|
||||
*
|
||||
* // After (preferred)
|
||||
* import { RMABLogger } from './logger';
|
||||
* const logger = RMABLogger.forJob(jobId, 'Context');
|
||||
* logger.info('message'); // No await needed!
|
||||
* ```
|
||||
*/
|
||||
|
||||
import { RMABLogger, LogMetadata } from './logger';
|
||||
|
||||
export type LogLevel = 'info' | 'warn' | 'error';
|
||||
|
||||
/**
|
||||
* @deprecated Use RMABLogger.forJob() directly
|
||||
*/
|
||||
export class JobLogger {
|
||||
private logger: RMABLogger;
|
||||
|
||||
constructor(jobId: string, context: string) {
|
||||
this.logger = RMABLogger.forJob(jobId, context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log info message
|
||||
* @deprecated Returns Promise for backward compat but is actually synchronous
|
||||
*/
|
||||
async info(message: string, metadata?: LogMetadata): Promise<void> {
|
||||
this.logger.info(message, metadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log warning message
|
||||
* @deprecated Returns Promise for backward compat but is actually synchronous
|
||||
*/
|
||||
async warn(message: string, metadata?: LogMetadata): Promise<void> {
|
||||
this.logger.warn(message, metadata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Log error message
|
||||
* @deprecated Returns Promise for backward compat but is actually synchronous
|
||||
*/
|
||||
async error(message: string, metadata?: LogMetadata): Promise<void> {
|
||||
this.logger.error(message, metadata);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a job logger instance
|
||||
* @deprecated Use RMABLogger.forJob() directly
|
||||
*/
|
||||
export function createJobLogger(jobId: string, context: string): JobLogger {
|
||||
return new JobLogger(jobId, context);
|
||||
}
|
||||
@@ -36,6 +36,12 @@ export interface IndexerFlagConfig {
|
||||
modifier: number; // -100 to 100 (percentage)
|
||||
}
|
||||
|
||||
export interface RankTorrentsOptions {
|
||||
indexerPriorities?: Map<number, number>; // indexerId -> priority (1-25)
|
||||
flagConfigs?: IndexerFlagConfig[]; // Flag bonus configurations
|
||||
requireAuthor?: boolean; // Enforce author presence check (default: true)
|
||||
}
|
||||
|
||||
export interface BonusModifier {
|
||||
type: 'indexer_priority' | 'indexer_flag' | 'custom';
|
||||
value: number; // Multiplier (e.g., 0.4 for 40%)
|
||||
@@ -66,15 +72,18 @@ export class RankingAlgorithm {
|
||||
* Rank all torrents and return sorted by finalScore (best first)
|
||||
* @param torrents - Array of torrent results to rank
|
||||
* @param audiobook - Audiobook request details for matching (includes durationMinutes for size scoring)
|
||||
* @param indexerPriorities - Optional map of indexerId to priority (1-25), defaults to 10
|
||||
* @param flagConfigs - Optional array of flag configurations for bonus/penalty modifiers
|
||||
* @param options - Optional configuration for ranking behavior
|
||||
*/
|
||||
rankTorrents(
|
||||
torrents: TorrentResult[],
|
||||
audiobook: AudiobookRequest,
|
||||
indexerPriorities?: Map<number, number>,
|
||||
flagConfigs?: IndexerFlagConfig[]
|
||||
options: RankTorrentsOptions = {}
|
||||
): RankedTorrent[] {
|
||||
const {
|
||||
indexerPriorities,
|
||||
flagConfigs,
|
||||
requireAuthor = true // Safe default: require author in automatic mode
|
||||
} = options;
|
||||
// Filter out files < 20 MB (likely ebooks/samples)
|
||||
const filteredTorrents = torrents.filter((torrent) => {
|
||||
const sizeMB = torrent.size / (1024 * 1024);
|
||||
@@ -86,7 +95,7 @@ export class RankingAlgorithm {
|
||||
const formatScore = this.scoreFormat(torrent);
|
||||
const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes);
|
||||
const seederScore = this.scoreSeeders(torrent.seeders);
|
||||
const matchScore = this.scoreMatch(torrent, audiobook);
|
||||
const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor);
|
||||
|
||||
const baseScore = formatScore + sizeScore + seederScore + matchScore;
|
||||
|
||||
@@ -183,12 +192,13 @@ export class RankingAlgorithm {
|
||||
*/
|
||||
getScoreBreakdown(
|
||||
torrent: TorrentResult,
|
||||
audiobook: AudiobookRequest
|
||||
audiobook: AudiobookRequest,
|
||||
requireAuthor: boolean = true
|
||||
): ScoreBreakdown {
|
||||
const formatScore = this.scoreFormat(torrent);
|
||||
const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes);
|
||||
const seederScore = this.scoreSeeders(torrent.seeders);
|
||||
const matchScore = this.scoreMatch(torrent, audiobook);
|
||||
const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor);
|
||||
const totalScore = formatScore + sizeScore + seederScore + matchScore;
|
||||
|
||||
return {
|
||||
@@ -297,7 +307,8 @@ export class RankingAlgorithm {
|
||||
*/
|
||||
private scoreMatch(
|
||||
torrent: TorrentResult,
|
||||
audiobook: AudiobookRequest
|
||||
audiobook: AudiobookRequest,
|
||||
requireAuthor: boolean = true
|
||||
): number {
|
||||
// Normalize whitespace (multiple spaces → single space) for consistent matching
|
||||
const torrentTitle = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim();
|
||||
@@ -356,6 +367,14 @@ export class RankingAlgorithm {
|
||||
}
|
||||
}
|
||||
|
||||
// ========== STAGE 1.5: AUTHOR PRESENCE CHECK (OPTIONAL) ==========
|
||||
// Only enforced in automatic mode (requireAuthor: true)
|
||||
// Interactive search (requireAuthor: false) shows all results
|
||||
if (requireAuthor && !this.checkAuthorPresence(torrentTitle, requestAuthor)) {
|
||||
// No high-confidence author match → reject to prevent wrong-author matches
|
||||
return 0;
|
||||
}
|
||||
|
||||
// ========== STAGE 2: TITLE MATCHING (0-35 points) ==========
|
||||
let titleScore = 0;
|
||||
|
||||
@@ -455,6 +474,60 @@ export class RankingAlgorithm {
|
||||
return Math.min(60, titleScore + authorScore);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if author is present in torrent title with high confidence
|
||||
* Handles variations: middle initials, spacing, punctuation, name order
|
||||
*
|
||||
* @param torrentTitle - Normalized torrent title (lowercase)
|
||||
* @param requestAuthor - Normalized author name (lowercase)
|
||||
* @returns true if at least ONE author is present with high confidence
|
||||
*/
|
||||
private checkAuthorPresence(torrentTitle: string, requestAuthor: string): boolean {
|
||||
// Parse multiple authors (same logic as Stage 3 author matching)
|
||||
const authors = requestAuthor
|
||||
.split(/,|&| and | - /)
|
||||
.map(a => a.trim())
|
||||
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
||||
|
||||
// At least ONE author must match with high confidence
|
||||
return authors.some(author => {
|
||||
// Check 1: Exact substring match
|
||||
if (torrentTitle.includes(author)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check 2: High fuzzy similarity (≥ 0.85)
|
||||
// Handles: "J.K. Rowling" vs "J. K. Rowling" vs "JK Rowling"
|
||||
// Also handles: "Dennis E. Taylor" vs "Dennis Taylor"
|
||||
const similarity = compareTwoStrings(author, torrentTitle);
|
||||
if (similarity >= 0.85) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check 3: Core name components (first + last name present within 30 chars)
|
||||
// Handles: "Sanderson, Brandon" vs "Brandon Sanderson"
|
||||
// Handles: "Brandon R. Sanderson" vs "Brandon Sanderson"
|
||||
const words = author.split(/\s+/).filter(w => w.length > 1);
|
||||
if (words.length >= 2) {
|
||||
const firstName = words[0];
|
||||
const lastName = words[words.length - 1];
|
||||
|
||||
const firstIdx = torrentTitle.indexOf(firstName);
|
||||
const lastIdx = torrentTitle.indexOf(lastName);
|
||||
|
||||
// Both components present and reasonably close?
|
||||
if (firstIdx !== -1 && lastIdx !== -1) {
|
||||
const distance = Math.abs(lastIdx - firstIdx);
|
||||
if (distance <= 30) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect format from torrent title
|
||||
*/
|
||||
@@ -563,15 +636,52 @@ export function getRankingAlgorithm(): RankingAlgorithm {
|
||||
|
||||
/**
|
||||
* Helper function to rank torrents using the singleton instance
|
||||
*
|
||||
* @param torrents - Array of torrent results to rank
|
||||
* @param audiobook - Audiobook request details
|
||||
* @param options - Optional ranking configuration
|
||||
* @returns Ranked torrents with quality scores
|
||||
*/
|
||||
export function rankTorrents(
|
||||
torrents: TorrentResult[],
|
||||
audiobook: AudiobookRequest,
|
||||
options?: RankTorrentsOptions
|
||||
): (RankedTorrent & { qualityScore: number })[];
|
||||
|
||||
/**
|
||||
* Helper function to rank torrents using the singleton instance (legacy signature)
|
||||
* @deprecated Use options object instead
|
||||
*/
|
||||
export function rankTorrents(
|
||||
torrents: TorrentResult[],
|
||||
audiobook: AudiobookRequest,
|
||||
indexerPriorities?: Map<number, number>,
|
||||
flagConfigs?: IndexerFlagConfig[]
|
||||
): (RankedTorrent & { qualityScore: number })[];
|
||||
|
||||
export function rankTorrents(
|
||||
torrents: TorrentResult[],
|
||||
audiobook: AudiobookRequest,
|
||||
optionsOrPriorities?: RankTorrentsOptions | Map<number, number>,
|
||||
flagConfigs?: IndexerFlagConfig[]
|
||||
): (RankedTorrent & { qualityScore: number })[] {
|
||||
const algorithm = getRankingAlgorithm();
|
||||
const ranked = algorithm.rankTorrents(torrents, audiobook, indexerPriorities, flagConfigs);
|
||||
|
||||
// Handle both new options object and legacy parameters
|
||||
let options: RankTorrentsOptions;
|
||||
if (optionsOrPriorities instanceof Map) {
|
||||
// Legacy call: rankTorrents(torrents, audiobook, priorities, flags)
|
||||
options = {
|
||||
indexerPriorities: optionsOrPriorities,
|
||||
flagConfigs,
|
||||
requireAuthor: true // Safe default
|
||||
};
|
||||
} else {
|
||||
// New call: rankTorrents(torrents, audiobook, options)
|
||||
options = optionsOrPriorities || {};
|
||||
}
|
||||
|
||||
const ranked = algorithm.rankTorrents(torrents, audiobook, options);
|
||||
|
||||
// Add qualityScore field for UI compatibility (rounded score)
|
||||
return ranked.map((r) => ({
|
||||
|
||||
Reference in New Issue
Block a user