Implement file hash-based library matching and remove fuzzy ASIN matching

Adds file hash-based matching for Audiobookshelf library items to ensure 100% accurate ASIN assignment for RMAB-organized content. Removes fuzzy matching from library availability checks, making all matching ASIN-only to eliminate false positives and race conditions. Updates database schema, processors, and matcher utilities; adds new tests and documentation for the new matching strategy. Removes obsolete scripts, Dockerfile, and related tests; updates docker-compose for test environments.
This commit is contained in:
kikootwo
2026-01-28 10:32:14 -05:00
parent 497849f427
commit a97979358f
111 changed files with 6571 additions and 1426 deletions
@@ -67,6 +67,7 @@ export async function processDownloadTorrent(payload: DownloadTorrentPayload): P
data: {
requestId,
indexerName: torrent.indexer,
indexerId: torrent.indexerId, // Store indexer ID for configuration lookup
downloadClient: 'sabnzbd',
downloadClientId,
torrentName: torrent.title,
@@ -131,6 +132,7 @@ export async function processDownloadTorrent(payload: DownloadTorrentPayload): P
data: {
requestId,
indexerName: torrent.indexer,
indexerId: torrent.indexerId, // Store indexer ID for configuration lookup
downloadClient: 'qbittorrent',
downloadClientId,
torrentName: torrent.title,
-191
View File
@@ -1,191 +0,0 @@
/**
* Component: Match Library Job Processor
* Documentation: documentation/phase3/README.md
*
* DEPRECATED: This processor is deprecated. Matching is now handled by scan_library job.
* Kept for backwards compatibility but should not be used in new code.
*/
import { MatchPlexPayload } from '../services/job-queue.service';
import { prisma } from '../db';
import { getLibraryService } from '../services/library';
import { compareTwoStrings } from 'string-similarity';
import { getConfigService } from '../services/config.service';
import { RMABLogger } from '../utils/logger';
/**
* Process match library job (DEPRECATED - use scan_library instead)
* Fuzzy matches requested audiobook to library item and updates status
*/
export async function processMatchPlex(payload: MatchPlexPayload): Promise<any> {
const { requestId, audiobookId, title, author, jobId } = payload;
const logger = RMABLogger.forJob(jobId, 'MatchLibrary');
logger.warn('DEPRECATED: match_plex job is deprecated. Use scan_plex instead.');
logger.info(`Matching "${title}" by ${author} in library`);
try {
// Get library service and configuration
const configService = getConfigService();
const libraryService = await getLibraryService();
const backendMode = await configService.getBackendMode();
logger.info(`Backend mode: ${backendMode}`);
// Get configured library ID
const libraryId = backendMode === 'audiobookshelf'
? await configService.get('audiobookshelf.library_id')
: (await configService.getPlexConfig()).libraryId;
if (!libraryId) {
throw new Error(`${backendMode} library not configured`);
}
// Search library using abstraction layer
const searchResults = await libraryService.searchItems(libraryId, title);
logger.info(`Found ${searchResults.length} results in library`);
if (searchResults.length === 0) {
logger.warn(`No matches found in library for "${title}"`);
// Mark as completed anyway - the file is there, library just needs time to scan
await prisma.request.update({
where: { id: requestId },
data: {
status: 'completed',
updatedAt: new Date(),
completedAt: new Date(),
},
});
return {
success: true,
message: 'No library match found yet, but request completed',
requestId,
matched: false,
note: 'Library may need time to scan the new files',
};
}
// Fuzzy match against results
const matches = searchResults.map((item) => {
const titleScore = compareTwoStrings(title.toLowerCase(), (item.title || '').toLowerCase());
const authorScore = author
? compareTwoStrings(author.toLowerCase(), (item.author || '').toLowerCase())
: 0.5;
// Weighted average: title is more important
const overallScore = titleScore * 0.7 + authorScore * 0.3;
return {
item,
score: overallScore,
titleScore,
authorScore,
};
});
// Sort by score
matches.sort((a, b) => b.score - a.score);
const bestMatch = matches[0];
logger.info(`Best match: "${bestMatch.item.title}" by ${bestMatch.item.author || 'Unknown'}`, {
score: Math.round(bestMatch.score * 100),
titleScore: Math.round(bestMatch.titleScore * 100),
authorScore: Math.round(bestMatch.authorScore * 100),
});
// Accept match if score >= 70%
if (bestMatch.score >= 0.7) {
logger.info(`Match accepted!`);
// Update audiobook with library item ID
const updateData: any = {
completedAt: new Date(),
updatedAt: new Date(),
};
if (backendMode === 'audiobookshelf') {
updateData.absItemId = bestMatch.item.externalId;
} else {
updateData.plexGuid = bestMatch.item.externalId;
}
await prisma.audiobook.update({
where: { id: audiobookId },
data: updateData,
});
// Ensure request is marked as completed
await prisma.request.update({
where: { id: requestId },
data: {
status: 'completed',
updatedAt: new Date(),
completedAt: new Date(),
},
});
return {
success: true,
message: `Successfully matched audiobook in library (${backendMode})`,
backendMode,
requestId,
matched: true,
matchScore: bestMatch.score,
libraryItem: {
title: bestMatch.item.title,
author: bestMatch.item.author,
id: bestMatch.item.id,
externalId: bestMatch.item.externalId,
},
};
} else {
logger.warn(`Match score too low (${Math.round(bestMatch.score * 100)}%), but marking as completed anyway`);
// Mark as completed even if match is poor
await prisma.request.update({
where: { id: requestId },
data: {
status: 'completed',
updatedAt: new Date(),
completedAt: new Date(),
},
});
return {
success: true,
message: 'Request completed, but library match uncertain',
requestId,
matched: false,
matchScore: bestMatch.score,
note: `Low match score: ${Math.round(bestMatch.score * 100)}%`,
};
}
} catch (error) {
logger.error(`Error: ${error instanceof Error ? error.message : 'Unknown error'}`);
// Don't fail the request - the files are organized correctly
// Just log the error and mark as completed
await prisma.request.update({
where: { id: requestId },
data: {
status: 'completed',
errorMessage: `Library matching failed: ${error instanceof Error ? error.message : 'Unknown error'}`,
updatedAt: new Date(),
completedAt: new Date(),
},
});
return {
success: false,
message: 'Request completed despite library matching error',
requestId,
matched: false,
error: error instanceof Error ? error.message : 'Unknown error',
};
}
}
@@ -85,7 +85,7 @@ export async function processMonitorDownload(payload: MonitorDownloadPayload): P
// Convert NZBInfo to progress format
progress = {
percent: nzbInfo.progress,
percent: nzbInfo.progress * 100, // Convert 0.0-1.0 to 0-100 (matches qBittorrent format)
bytesDownloaded: nzbInfo.size * nzbInfo.progress,
bytesTotal: nzbInfo.size,
speed: nzbInfo.downloadSpeed,
+98 -1
View File
@@ -9,6 +9,7 @@ import { getFileOrganizer } from '../utils/file-organizer';
import { RMABLogger } from '../utils/logger';
import { getLibraryService } from '../services/library';
import { getConfigService } from '../services/config.service';
import { generateFilesHash } from '../utils/files-hash';
/**
* Process organize files job
@@ -107,11 +108,18 @@ export async function processOrganizeFiles(payload: OrganizeFilesPayload): Promi
logger.info(`Successfully moved ${result.filesMovedCount} files to ${result.targetPath}`);
// Update audiobook record with file path and status
// Generate hash from organized audio files for library matching
const filesHash = generateFilesHash(result.audioFiles);
if (filesHash) {
logger.info(`Generated files hash: ${filesHash.substring(0, 16)}... (${result.audioFiles.length} audio files)`);
}
// Update audiobook record with file path, hash, and status
await prisma.audiobook.update({
where: { id: audiobookId },
data: {
filePath: result.targetPath,
filesHash: filesHash || null,
status: 'completed',
completedAt: new Date(),
updatedAt: new Date(),
@@ -189,6 +197,95 @@ export async function processOrganizeFiles(payload: OrganizeFilesPayload): Promi
);
}
// Cleanup Usenet downloads if configured
try {
logger.info('Checking if cleanup is needed for this download');
// Get download history to find NZB ID and indexer
const downloadHistory = await prisma.downloadHistory.findFirst({
where: { requestId },
orderBy: { createdAt: 'desc' },
});
logger.info(`Download history found: ${downloadHistory ? 'yes' : 'no'}`, {
hasNzbId: !!downloadHistory?.nzbId,
hasIndexerId: !!downloadHistory?.indexerId,
nzbId: downloadHistory?.nzbId || 'none',
indexerId: downloadHistory?.indexerId || 'none',
});
if (downloadHistory?.nzbId && downloadHistory?.indexerId) {
// Get indexer configuration
const indexersConfig = await configService.get('prowlarr_indexers');
logger.info(`Indexers config found: ${indexersConfig ? 'yes' : 'no'}`);
if (indexersConfig) {
const indexers: Array<{ id: number; protocol: string; removeAfterProcessing?: boolean }> = JSON.parse(indexersConfig);
const indexer = indexers.find(idx => idx.id === downloadHistory.indexerId);
logger.info(`Indexer found in config: ${indexer ? 'yes' : 'no'}`, {
indexerId: downloadHistory.indexerId,
protocol: indexer?.protocol || 'none',
removeAfterProcessing: indexer?.removeAfterProcessing ?? 'undefined',
});
// Check if this is a Usenet indexer with cleanup enabled
if (indexer && indexer.protocol?.toLowerCase() !== 'torrent' && indexer.removeAfterProcessing) {
logger.info(`Cleaning up NZB ${downloadHistory.nzbId} (cleanup enabled for indexer ${indexer.id})`);
// First, manually delete files from filesystem
if (downloadPath) {
logger.info(`Removing download files from filesystem: ${downloadPath}`);
const fs = await import('fs/promises');
try {
// Check if it's a file or directory
const stats = await fs.stat(downloadPath);
if (stats.isDirectory()) {
// Remove directory and all contents
await fs.rm(downloadPath, { recursive: true, force: true });
logger.info(`Removed directory: ${downloadPath}`);
} else {
// Remove single file
await fs.unlink(downloadPath);
logger.info(`Removed file: ${downloadPath}`);
}
} catch (fsError) {
// File/directory might already be deleted or not exist
if ((fsError as NodeJS.ErrnoException).code === 'ENOENT') {
logger.info(`Download path already deleted: ${downloadPath}`);
} else {
throw fsError;
}
}
} else {
logger.warn(`No download path available, skipping filesystem deletion`);
}
// Then archive from SABnzbd history (hides from UI but preserves for troubleshooting)
// Note: We only archive from history, not queue. If the NZB is still in the queue
// when we're organizing files, something went wrong with the download monitoring.
const { getSABnzbdService } = await import('../integrations/sabnzbd.service');
const sabnzbd = await getSABnzbdService();
await sabnzbd.archiveCompletedNZB(downloadHistory.nzbId);
logger.info(`Successfully archived NZB ${downloadHistory.nzbId} and removed files`);
}
}
}
} catch (error) {
// Log error but don't fail the job - cleanup is optional
logger.warn(
`Failed to cleanup NZB download: ${error instanceof Error ? error.message : 'Unknown error'}`,
{
error: error instanceof Error ? error.stack : undefined,
}
);
}
return {
success: true,
message: 'Files organized successfully',
@@ -178,6 +178,77 @@ export async function processPlexRecentlyAddedCheck(payload: PlexRecentlyAddedPa
}
}
// For Audiobookshelf: Trigger metadata match for items without ASIN
// This ensures ASIN gets populated so items can be matched against requests
if (backendMode === 'audiobookshelf') {
const { triggerABSItemMatch, getABSItem } = await import('../services/audiobookshelf/api');
const { generateFilesHash } = await import('../utils/files-hash');
const itemsWithoutAsin = recentItems.filter(item => !item.asin && item.externalId);
if (itemsWithoutAsin.length > 0) {
logger.info(`Found ${itemsWithoutAsin.length} recent items without ASIN, attempting file hash matching...`);
let fileMatchCount = 0;
let fuzzyMatchCount = 0;
for (const item of itemsWithoutAsin) {
try {
// 1. Fetch full item details to get file list
const absItem = await getABSItem(item.externalId);
// 2. Extract audio filenames and generate hash
const audioFilenames = absItem.media?.audioFiles?.map((f: any) => f.metadata?.filename).filter(Boolean) || [];
const itemHash = generateFilesHash(audioFilenames);
// 3. Query database for matching downloaded request
let matchedAsin: string | undefined = undefined;
if (itemHash) {
const matchedAudiobook = await prisma.audiobook.findFirst({
where: {
filesHash: itemHash,
status: 'completed',
},
select: {
audibleAsin: true,
title: true,
},
});
if (matchedAudiobook?.audibleAsin) {
matchedAsin = matchedAudiobook.audibleAsin;
logger.info(
`File hash match found for "${item.title}" → ASIN: ${matchedAsin} (from "${matchedAudiobook.title}")`
);
fileMatchCount++;
}
}
// 4. Trigger metadata match (with ASIN if matched, undefined if not)
await triggerABSItemMatch(item.externalId, matchedAsin);
if (matchedAsin) {
logger.info(`Triggered metadata match with ASIN ${matchedAsin} for: "${item.title}"`);
} else {
logger.info(`No file match found, triggering fuzzy metadata match for: "${item.title}"`);
fuzzyMatchCount++;
}
} catch (error) {
logger.error(
`Failed to process metadata match for "${item.title}": ${error instanceof Error ? error.message : 'Unknown error'}`
);
fuzzyMatchCount++;
}
}
logger.info(
`Metadata match complete: ${fileMatchCount} file hash matches, ${fuzzyMatchCount} fuzzy matches (ASIN population is async)`
);
}
}
// Check for all non-terminal requests to match
const matchableRequests = await prisma.request.findMany({
where: {
@@ -259,15 +330,8 @@ export async function processPlexRecentlyAddedCheck(payload: PlexRecentlyAddedPa
matchedDownloads++;
// Trigger metadata match for Audiobookshelf items (only for our downloaded requests)
if (backendMode === 'audiobookshelf') {
const itemId = match.plexGuid; // plexGuid contains the Audiobookshelf item ID
const asin = audiobook.audibleAsin || undefined;
const matchInfo = asin ? ` with ASIN ${asin}` : '';
logger.info(`Triggering metadata match for matched item: ${itemId}${matchInfo}`);
const { triggerABSItemMatch } = await import('../services/audiobookshelf/api');
await triggerABSItemMatch(itemId, asin);
}
// Note: Audiobookshelf metadata matching is handled in the file hash phase above
// Items without ASIN get file-hash-matched ASIN, items with ASIN already have correct metadata
}
} catch (error) {
logger.error(`Failed to match request ${request.id}: ${error instanceof Error ? error.message : 'Unknown error'}`);
+76 -9
View File
@@ -180,6 +180,80 @@ export async function processScanPlex(payload: ScanPlexPayload): Promise<any> {
logger.info(`Scan complete: ${libraryItems.length} items scanned, ${newCount} new, ${updatedCount} updated, ${skippedCount} skipped`);
// 4b. For Audiobookshelf: Trigger metadata match for items without ASIN
// This ensures ASIN gets populated so items can be matched against requests
if (backendMode === 'audiobookshelf') {
logger.info(`Checking for Audiobookshelf items without ASIN...`);
const { triggerABSItemMatch, getABSItem } = await import('../services/audiobookshelf/api');
const { generateFilesHash } = await import('../utils/files-hash');
const itemsWithoutAsin = libraryItems.filter(item => !item.asin && item.externalId);
if (itemsWithoutAsin.length > 0) {
logger.info(`Found ${itemsWithoutAsin.length} items without ASIN, attempting file hash matching...`);
let fileMatchCount = 0;
let fuzzyMatchCount = 0;
for (const item of itemsWithoutAsin) {
try {
// 1. Fetch full item details to get file list
const absItem = await getABSItem(item.externalId);
// 2. Extract audio filenames and generate hash
const audioFilenames = absItem.media?.audioFiles?.map((f: any) => f.metadata?.filename).filter(Boolean) || [];
const itemHash = generateFilesHash(audioFilenames);
// 3. Query database for matching downloaded request
let matchedAsin: string | undefined = undefined;
if (itemHash) {
const matchedAudiobook = await prisma.audiobook.findFirst({
where: {
filesHash: itemHash,
status: 'completed',
},
select: {
audibleAsin: true,
title: true,
},
});
if (matchedAudiobook?.audibleAsin) {
matchedAsin = matchedAudiobook.audibleAsin;
logger.info(
`File hash match found for "${item.title}" → ASIN: ${matchedAsin} (from "${matchedAudiobook.title}")`
);
fileMatchCount++;
}
}
// 4. Trigger metadata match (with ASIN if matched, undefined if not)
await triggerABSItemMatch(item.externalId, matchedAsin);
if (matchedAsin) {
logger.info(`Triggered metadata match with ASIN ${matchedAsin} for: "${item.title}"`);
} else {
logger.info(`No file match found, triggering fuzzy metadata match for: "${item.title}"`);
fuzzyMatchCount++;
}
} catch (error) {
logger.error(
`Failed to process metadata match for "${item.title}": ${error instanceof Error ? error.message : 'Unknown error'}`
);
fuzzyMatchCount++;
}
}
logger.info(
`Metadata match complete: ${fileMatchCount} file hash matches, ${fuzzyMatchCount} fuzzy matches (ASIN population is async)`
);
} else {
logger.info(`All items have ASIN, no metadata match needed`);
}
}
// 5. Remove stale records from plex_library (items no longer in the actual library)
// This ensures the database is a fresh snapshot of the library state
logger.info(`Checking for stale library records...`);
@@ -445,15 +519,8 @@ export async function processScanPlex(payload: ScanPlexPayload): Promise<any> {
matchedCount++;
// Trigger metadata match for Audiobookshelf items (only for our downloaded requests)
if (backendMode === 'audiobookshelf') {
const itemId = match.plexGuid; // plexGuid contains the Audiobookshelf item ID
const asin = audiobook.audibleAsin || undefined;
const matchInfo = asin ? ` with ASIN ${asin}` : '';
logger.info(`Triggering metadata match for matched item: ${itemId}${matchInfo}`);
const { triggerABSItemMatch } = await import('../services/audiobookshelf/api');
await triggerABSItemMatch(itemId, asin);
}
// Note: Audiobookshelf metadata matching is handled in the file hash phase above
// Items without ASIN get file-hash-matched ASIN, items with ASIN already have correct metadata
}
} catch (error) {
logger.error(`Failed to match request ${request.id}: ${error instanceof Error ? error.message : 'Unknown error'}`);
@@ -103,13 +103,13 @@ export async function processSearchIndexers(payload: SearchIndexersPayload): Pro
if (searchResults.length === 0) {
// No results found - queue for re-search instead of failing
logger.warn(`No torrents found for request ${requestId}, marking as awaiting_search`);
logger.warn(`No torrents/nzbs found for request ${requestId}, marking as awaiting_search`);
await prisma.request.update({
where: { id: requestId },
data: {
status: 'awaiting_search',
errorMessage: 'No torrents found. Will retry automatically.',
errorMessage: 'No torrents/nzbs found. Will retry automatically.',
lastSearchAt: new Date(),
updatedAt: new Date(),
},
@@ -117,7 +117,7 @@ export async function processSearchIndexers(payload: SearchIndexersPayload): Pro
return {
success: false,
message: 'No torrents found, queued for re-search',
message: 'No torrents/nzbs found, queued for re-search',
requestId,
};
}
@@ -149,11 +149,16 @@ export async function processSearchIndexers(payload: SearchIndexersPayload): Pro
// Rank results with indexer priorities and flag configs
// Note: rankTorrents now filters out results < 20 MB internally
// requireAuthor: true (default) - strict filtering for automatic selection
const rankedResults = ranker.rankTorrents(searchResults, {
title: audiobook.title,
author: audiobook.author,
durationMinutes,
}, indexerPriorities, flagConfigs);
}, {
indexerPriorities,
flagConfigs,
requireAuthor: true // Automatic mode - prevent wrong authors
});
// Log filter results
const postFilterCount = rankedResults.length;