Add interactive ebook search & selection

Introduce interactive ebook support: adds two API endpoints to search (interactive-search-ebook) and create/select ebook requests (select-ebook), plus server-side handlers to route Anna's Archive (direct) and indexer (torrent/NZB) downloads. Frontend: extend RequestActionsDropdown and InteractiveTorrentSearchModal to support an "ebook" search mode and selection flow, and add hooks (useInteractiveSearchEbook / useSelectEbook). Settings: add ebook_auto_grab_enabled with UI toggle and enforce disabling when no ebook sources are enabled; settings GET/PUT updated to persist the flag (default = true to preserve behavior). Documentation updated (scheduler, ebook-sidecar, settings pages) and ranking algorithm docs/tests extended to cover ebook-related normalization and matching cases. Includes logging and ranking integration for indexer results and normalization for Anna's Archive handling.
This commit is contained in:
kikootwo
2026-02-02 19:59:58 -05:00
parent c913be5ca2
commit 1afab5d47f
19 changed files with 1339 additions and 115 deletions
+85
View File
@@ -397,3 +397,88 @@ export function useRequestWithTorrent() {
return { requestWithTorrent, isLoading, error };
}
export function useInteractiveSearchEbook() {
const { accessToken } = useAuth();
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const searchEbooks = async (requestId: string, customTitle?: string) => {
if (!accessToken) {
throw new Error('Not authenticated');
}
setIsLoading(true);
setError(null);
try {
const response = await fetchWithAuth(`/api/requests/${requestId}/interactive-search-ebook`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: customTitle ? JSON.stringify({ customTitle }) : undefined,
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.error || data.message || 'Failed to search for ebooks');
}
return data.results || [];
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
setError(message);
throw err;
} finally {
setIsLoading(false);
}
};
return { searchEbooks, isLoading, error };
}
export function useSelectEbook() {
const { accessToken } = useAuth();
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const selectEbook = async (requestId: string, ebook: any) => {
if (!accessToken) {
throw new Error('Not authenticated');
}
setIsLoading(true);
setError(null);
try {
const response = await fetchWithAuth(`/api/requests/${requestId}/select-ebook`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({ ebook }),
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.error || data.message || 'Failed to download ebook');
}
// Revalidate requests
mutate((key) => typeof key === 'string' && key.includes('/api/requests'));
return data;
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
setError(message);
throw err;
} finally {
setIsLoading(false);
}
};
return { selectEbook, isLoading, error };
}
@@ -2,7 +2,7 @@
* Component: Monitor RSS Feeds Processor
* Documentation: documentation/backend/services/scheduler.md
*
* Monitors RSS feeds for new audiobook releases and matches against missing requests
* Monitors RSS feeds for new releases and matches against missing requests (audiobooks and ebooks)
*/
import { prisma } from '../db';
@@ -57,11 +57,10 @@ export async function processMonitorRssFeeds(payload: MonitorRssFeedsPayload): P
return { success: true, message: 'No RSS results', matched: 0 };
}
// Get all active audiobook requests awaiting search (missing audiobooks)
// Note: RSS feeds are for torrents, so only audiobook requests are matched
// Get all active requests awaiting search (audiobooks and ebooks)
// Both types can be matched against RSS torrent feeds
const missingRequests = await prisma.request.findMany({
where: {
type: 'audiobook', // Only audiobook requests (RSS feeds are for torrents)
status: 'awaiting_search',
deletedAt: null,
},
@@ -75,7 +74,7 @@ export async function processMonitorRssFeeds(payload: MonitorRssFeedsPayload): P
return { success: true, message: 'No missing requests', matched: 0 };
}
// Match RSS results against missing audiobooks
// Match RSS results against missing requests
let matched = 0;
const jobQueue = getJobQueueService();
@@ -96,16 +95,27 @@ export async function processMonitorRssFeeds(payload: MonitorRssFeedsPayload): P
if (hasAuthor && titleMatchCount >= 2) {
logger.info(`Match found! "${audiobook.title}" by ${audiobook.author} matches torrent: ${torrent.title}`);
// Trigger search job to process this request
// Trigger appropriate search job based on request type
try {
await jobQueue.addSearchJob(request.id, {
id: audiobook.id,
title: audiobook.title,
author: audiobook.author,
asin: audiobook.audibleAsin || undefined,
});
matched++;
logger.info(`Triggered search job for request ${request.id}`);
if (request.type === 'ebook') {
await jobQueue.addSearchEbookJob(request.id, {
id: audiobook.id,
title: audiobook.title,
author: audiobook.author,
asin: audiobook.audibleAsin || undefined,
});
matched++;
logger.info(`Triggered ebook search job for request ${request.id}`);
} else {
await jobQueue.addSearchJob(request.id, {
id: audiobook.id,
title: audiobook.title,
author: audiobook.author,
asin: audiobook.audibleAsin || undefined,
});
matched++;
logger.info(`Triggered audiobook search job for request ${request.id}`);
}
} catch (error) {
logger.error(`Failed to trigger search for request ${request.id}: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
@@ -785,8 +785,16 @@ async function createEbookRequestIfEnabled(
logger: RMABLogger
): Promise<void> {
try {
// Check which ebook sources are enabled
const configService = getConfigService();
// Check if auto-grab is enabled (default: true for backward compatibility)
const autoGrabEnabled = await configService.get('ebook_auto_grab_enabled');
if (autoGrabEnabled === 'false') {
logger.info('Ebook auto-grab disabled, skipping automatic ebook request creation');
return;
}
// Check which ebook sources are enabled
const annasArchiveEnabled = await configService.get('ebook_annas_archive_enabled');
const indexerSearchEnabled = await configService.get('ebook_indexer_search_enabled');
@@ -21,11 +21,9 @@ export async function processRetryMissingTorrents(payload: RetryMissingTorrentsP
logger.info('Starting retry job for requests awaiting search...');
try {
// Find all active audiobook requests in awaiting_search status
// Note: Ebook requests have separate search mechanism (search_ebook job)
// Find all active requests (audiobook or ebook) in awaiting_search status
const requests = await prisma.request.findMany({
where: {
type: 'audiobook', // Only audiobook requests (ebooks use different search)
status: 'awaiting_search',
deletedAt: null,
},
@@ -45,20 +43,33 @@ export async function processRetryMissingTorrents(payload: RetryMissingTorrentsP
};
}
// Trigger search job for each request
// Trigger appropriate search job for each request based on type
const jobQueue = getJobQueueService();
let triggered = 0;
for (const request of requests) {
try {
await jobQueue.addSearchJob(request.id, {
id: request.audiobook.id,
title: request.audiobook.title,
author: request.audiobook.author,
asin: request.audiobook.audibleAsin || undefined,
});
triggered++;
logger.info(`Triggered search for request ${request.id}: ${request.audiobook.title}`);
if (request.type === 'ebook') {
// Ebook requests use ebook search (Anna's Archive, etc.)
await jobQueue.addSearchEbookJob(request.id, {
id: request.audiobook.id,
title: request.audiobook.title,
author: request.audiobook.author,
asin: request.audiobook.audibleAsin || undefined,
});
triggered++;
logger.info(`Triggered ebook search for request ${request.id}: ${request.audiobook.title}`);
} else {
// Audiobook requests use indexer search (Prowlarr)
await jobQueue.addSearchJob(request.id, {
id: request.audiobook.id,
title: request.audiobook.title,
author: request.audiobook.author,
asin: request.audiobook.audibleAsin || undefined,
});
triggered++;
logger.info(`Triggered audiobook search for request ${request.id}: ${request.audiobook.title}`);
}
} catch (error) {
logger.error(`Failed to trigger search for request ${request.id}: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
+147 -44
View File
@@ -95,6 +95,7 @@ export interface RankedEbookTorrent extends TorrentResult {
finalScore: number; // score + bonusPoints
rank: number;
breakdown: EbookScoreBreakdown;
ebookFormat?: string; // Detected ebook format (epub, pdf, mobi, etc.)
}
export class RankingAlgorithm {
@@ -330,6 +331,26 @@ export class RankingAlgorithm {
}
/**
* Normalize text for matching by handling CamelCase and punctuation separators
* "VirginaEvans TheCorrespondent" → "virgina evans the correspondent"
* "Twelve.Months-Jim.Butcher" → "twelve months jim butcher"
* "Author_Name_Book" → "author name book"
*/
private normalizeForMatching(text: string): string {
return text
// Split CamelCase FIRST (before lowercasing): "TheCorrespondent" → "The Correspondent"
.replace(/([a-z])([A-Z])/g, '$1 $2')
.toLowerCase()
// Replace underscores with spaces (must be explicit since \w includes _)
.replace(/_/g, ' ')
// Replace other punctuation/separators with spaces (preserves apostrophes in contractions)
.replace(/[^\w\s']/g, ' ')
// Collapse multiple spaces
.replace(/\s+/g, ' ')
.trim();
}
/**
* Score title/author match quality (60 points max)
* Title similarity: 0-45 points (heavily weighted!)
@@ -340,10 +361,22 @@ export class RankingAlgorithm {
audiobook: AudiobookRequest,
requireAuthor: boolean = true
): number {
// Normalize whitespace (multiple spaces → single space) for consistent matching
const torrentTitle = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim();
const requestTitle = audiobook.title.toLowerCase().replace(/\s+/g, ' ').trim();
const requestAuthor = audiobook.author.toLowerCase().replace(/\s+/g, ' ').trim();
// Normalize for matching (handles CamelCase, punctuation separators)
const torrentTitle = this.normalizeForMatching(torrent.title);
const requestTitle = this.normalizeForMatching(audiobook.title);
// Parse authors from RAW string first (preserving commas for splitting)
// Then normalize individual authors for matching
const requestAuthorRaw = audiobook.author.toLowerCase().replace(/\s+/g, ' ').trim();
const parsedAuthors = requestAuthorRaw
.split(/,|&| and | - /)
.map(a => a.trim())
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
// Normalize parsed authors for matching (handles CamelCase in author names)
const normalizedAuthors = parsedAuthors.map(a => this.normalizeForMatching(a));
// Combined normalized author string for fuzzy matching
const requestAuthorNormalized = normalizedAuthors.join(' ');
// ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ==========
// Extract significant words (filter out common stop words)
@@ -351,26 +384,37 @@ export class RankingAlgorithm {
const extractWords = (text: string, stopList: string[]): string[] => {
return text
// Split CamelCase FIRST: "TheCorrespondent" → "The Correspondent"
.replace(/([a-z])([A-Z])/g, '$1 $2')
.toLowerCase()
.replace(/[^\w\s]/g, ' ') // Remove punctuation
// Replace underscores with spaces (must be explicit since \w includes _)
.replace(/_/g, ' ')
// Remove other punctuation (but keep apostrophes for contractions)
.replace(/[^\w\s']/g, ' ')
.split(/\s+/)
.filter(word => word.length > 0 && !stopList.includes(word));
};
// Separate required words (outside parentheses/brackets) from optional words (inside)
// This handles common patterns like "Title (Subtitle)" where subtitle may be omitted
// Note: Run on ORIGINAL title to preserve brackets, then normalize the result
const separateRequiredOptional = (title: string): { required: string; optional: string } => {
// Work with original title format for bracket detection
const originalTitle = audiobook.title.toLowerCase();
// Extract content in parentheses/brackets as optional
const optionalPattern = /[(\[{]([^)\]}]+)[)\]}]/g;
const optionalMatches: string[] = [];
let match;
while ((match = optionalPattern.exec(title)) !== null) {
while ((match = optionalPattern.exec(originalTitle)) !== null) {
optionalMatches.push(match[1]);
}
// Remove parenthetical/bracketed content to get required portion
const required = title.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim();
const requiredRaw = originalTitle.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim();
// Normalize the required portion (handles CamelCase, punctuation)
const required = this.normalizeForMatching(requiredRaw);
const optional = optionalMatches.join(' ');
return { required, optional };
@@ -400,7 +444,7 @@ export class RankingAlgorithm {
// ========== STAGE 1.5: AUTHOR PRESENCE CHECK (OPTIONAL) ==========
// Only enforced in automatic mode (requireAuthor: true)
// Interactive search (requireAuthor: false) shows all results
if (requireAuthor && !this.checkAuthorPresence(torrentTitle, requestAuthor)) {
if (requireAuthor && !this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors)) {
// No high-confidence author match → reject to prevent wrong-author matches
return 0;
}
@@ -408,6 +452,10 @@ export class RankingAlgorithm {
// ========== STAGE 2: TITLE MATCHING (0-35 points) ==========
let titleScore = 0;
// Keep original torrent title (lowercased only) for metadata marker detection
// Markers like [ ] ( ) : are removed by normalization but needed for suffix validation
const torrentTitleOriginal = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim();
// Try matching with full title first, then fall back to required title (without parentheses)
const titlesToTry = [requestTitle];
if (requiredTitle !== requestTitle) {
@@ -422,20 +470,37 @@ export class RankingAlgorithm {
const beforeTitle = torrentTitle.substring(0, titleIndex);
const afterTitle = torrentTitle.substring(titleIndex + titleToMatch.length);
// For metadata marker detection, try to find where the title starts in the ORIGINAL string
// Search for key words from the title to locate position in original
const titleWords = titleToMatch.split(/\s+/).filter(w => w.length > 2);
let afterTitleOriginal = '';
if (titleWords.length > 0) {
// Find the last significant title word in the original string
const lastTitleWord = titleWords[titleWords.length - 1];
const lastWordIdxOriginal = torrentTitleOriginal.lastIndexOf(lastTitleWord);
if (lastWordIdxOriginal !== -1) {
afterTitleOriginal = torrentTitleOriginal.substring(lastWordIdxOriginal + lastTitleWord.length);
}
}
// Extract significant words BEFORE the matched title
const beforeWords = extractWords(beforeTitle, stopWords);
// Title is complete if:
// 1. Acceptable prefix (no words, OR structured metadata like "Author - Series - ")
// 2. Followed by clear metadata markers (not "'s Secret" or " Is Watching")
// Check ORIGINAL title for metadata markers ([ ] ( ) etc. not normalized away)
const metadataMarkers = [' by ', ' - ', ' [', ' (', ' {', ' :', ','];
// Check if afterTitle starts with author name (handles space-separated format like "Title Author Year")
const afterStartsWithAuthor = requestAuthor.length > 2 &&
afterTitle.trim().startsWith(requestAuthor);
// Check if afterTitle starts with any author name (handles space-separated format like "Title Author Year")
const afterStartsWithAuthor = normalizedAuthors.some(author =>
author.length > 2 && afterTitle.trim().startsWith(author)
);
// Check metadata markers in both normalized and original suffixes
const hasMetadataSuffix = afterTitle === '' ||
metadataMarkers.some(marker => afterTitle.startsWith(marker)) ||
metadataMarkers.some(marker => afterTitleOriginal.startsWith(marker)) ||
afterStartsWithAuthor;
// Check prefix validity:
@@ -446,16 +511,32 @@ export class RankingAlgorithm {
// Check if title is immediately preceded by a metadata separator
// This handles "Author - Series - 01 - Title" patterns
// Check both normalized and original strings for separators
const precedingText = beforeTitle.trimEnd();
// Also check original string for separators that got normalized away (like colons)
let beforeTitleOriginal = '';
if (titleWords.length > 0) {
const firstTitleWord = titleWords[0];
const firstWordIdxOriginal = torrentTitleOriginal.indexOf(firstTitleWord);
if (firstWordIdxOriginal !== -1) {
beforeTitleOriginal = torrentTitleOriginal.substring(0, firstWordIdxOriginal).trimEnd();
}
}
const titlePrecededBySeparator =
precedingText.endsWith('-') ||
precedingText.endsWith(':') ||
precedingText.endsWith('—');
precedingText.endsWith('—') ||
beforeTitleOriginal.endsWith('-') ||
beforeTitleOriginal.endsWith(':') ||
beforeTitleOriginal.endsWith('—');
// Check if author name appears in the prefix
// Check if any author name appears in the prefix
// This handles "Author Name - Title" patterns
const authorInPrefix = requestAuthor.length > 2 &&
beforeTitle.includes(requestAuthor);
const authorInPrefix = normalizedAuthors.some(author =>
author.length > 2 && beforeTitle.includes(author)
);
const hasAcceptablePrefix =
hasNoWordsPrefix ||
@@ -481,24 +562,18 @@ export class RankingAlgorithm {
}
// ========== STAGE 3: AUTHOR MATCHING (0-15 points) ==========
// Parse requested authors (split on separators, filter out roles)
const requestAuthors = requestAuthor
.split(/,|&| and | - /)
.map(a => a.trim())
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
// Check how many authors appear in torrent title (exact substring match)
const authorMatches = requestAuthors.filter(author =>
const authorMatches = normalizedAuthors.filter(author =>
torrentTitle.includes(author)
);
let authorScore = 0;
if (authorMatches.length > 0) {
// Exact substring match → proportional credit
authorScore = (authorMatches.length / requestAuthors.length) * 15;
authorScore = (authorMatches.length / normalizedAuthors.length) * 15;
} else {
// No exact match → use fuzzy similarity for partial credit
authorScore = compareTwoStrings(requestAuthor, torrentTitle) * 15;
authorScore = compareTwoStrings(requestAuthorNormalized, torrentTitle) * 15;
}
return Math.min(60, titleScore + authorScore);
@@ -506,22 +581,16 @@ export class RankingAlgorithm {
/**
* Check if author is present in torrent title with high confidence
* Handles variations: middle initials, spacing, punctuation, name order
* Uses pre-parsed and normalized authors array
*
* @param torrentTitle - Normalized torrent title (lowercase)
* @param requestAuthor - Normalized author name (lowercase)
* @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching)
* @param normalizedAuthors - Array of normalized author names (roles already filtered)
* @returns true if at least ONE author is present with high confidence
*/
private checkAuthorPresence(torrentTitle: string, requestAuthor: string): boolean {
// Parse multiple authors (same logic as Stage 3 author matching)
const authors = requestAuthor
.split(/,|&| and | - /)
.map(a => a.trim())
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
private checkAuthorPresenceWithParsed(torrentTitle: string, normalizedAuthors: string[]): boolean {
// At least ONE author must match with high confidence
return authors.some(author => {
// Check 1: Exact substring match
return normalizedAuthors.some(author => {
// Check 1: Exact substring match (works well now that both are normalized)
if (torrentTitle.includes(author)) {
return true;
}
@@ -537,6 +606,7 @@ export class RankingAlgorithm {
// Check 3: Core name components (first + last name present within 30 chars)
// Handles: "Sanderson, Brandon" vs "Brandon Sanderson"
// Handles: "Brandon R. Sanderson" vs "Brandon Sanderson"
// Now also handles: "VirginaEvans" → "virgina evans" (after normalization)
const words = author.split(/\s+/).filter(w => w.length > 1);
if (words.length >= 2) {
const firstName = words[0];
@@ -558,6 +628,27 @@ export class RankingAlgorithm {
});
}
/**
* Check if author is present in torrent title with high confidence
* Handles variations: middle initials, spacing, punctuation, name order, CamelCase
*
* @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching)
* @param requestAuthor - Raw author string (will be parsed and normalized internally)
* @returns true if at least ONE author is present with high confidence
*/
private checkAuthorPresence(torrentTitle: string, requestAuthor: string): boolean {
// Parse multiple authors (same logic as Stage 3 author matching)
const authors = requestAuthor
.split(/,|&| and | - /)
.map(a => a.trim())
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
// Normalize each author for matching
const normalizedAuthors = authors.map(a => this.normalizeForMatching(a));
return this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors);
}
/**
* Detect format from torrent title
*/
@@ -687,6 +778,9 @@ export class RankingAlgorithm {
});
const ranked = filteredTorrents.map((torrent) => {
// Detect ebook format from title
const detectedFormat = this.detectEbookFormat(torrent);
// Calculate base scores (0-100)
// Reuse scoreMatch and scoreSeeders from audiobook ranking
const formatScore = this.scoreEbookFormat(torrent, ebook.preferredFormat);
@@ -765,6 +859,7 @@ export class RankingAlgorithm {
notes: [],
}, ebook.preferredFormat),
},
ebookFormat: detectedFormat !== 'unknown' ? detectedFormat : undefined,
};
});
@@ -824,19 +919,27 @@ export class RankingAlgorithm {
/**
* Detect ebook format from torrent title
* Handles formats in various positions: .epub, (epub), [epub], " epub"
*/
private detectEbookFormat(torrent: TorrentResult): string {
const title = torrent.title.toLowerCase();
// Check for common ebook format extensions/keywords
if (title.includes('.epub') || title.includes(' epub')) return 'epub';
if (title.includes('.pdf') || title.includes(' pdf')) return 'pdf';
if (title.includes('.mobi') || title.includes(' mobi')) return 'mobi';
if (title.includes('.azw3') || title.includes(' azw3')) return 'azw3';
if (title.includes('.azw') || title.includes(' azw')) return 'azw';
if (title.includes('.fb2') || title.includes(' fb2')) return 'fb2';
if (title.includes('.cbz') || title.includes(' cbz')) return 'cbz';
if (title.includes('.cbr') || title.includes(' cbr')) return 'cbr';
// Patterns: .format, (format), [format], " format", "_format"
const formats = ['epub', 'pdf', 'mobi', 'azw3', 'azw', 'fb2', 'cbz', 'cbr'];
for (const format of formats) {
if (
title.includes(`.${format}`) || // file.epub
title.includes(`(${format})`) || // (epub)
title.includes(`[${format}]`) || // [epub]
title.includes(` ${format}`) || // " epub" (space before)
title.includes(`_${format}`) || // _epub (underscore)
title.endsWith(format) // ends with format
) {
return format;
}
}
// Default to unknown
return 'unknown';