mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-03 04:40:09 +00:00
Add interactive ebook search & selection
Introduce interactive ebook support: adds two API endpoints to search (interactive-search-ebook) and create/select ebook requests (select-ebook), plus server-side handlers to route Anna's Archive (direct) and indexer (torrent/NZB) downloads. Frontend: extend RequestActionsDropdown and InteractiveTorrentSearchModal to support an "ebook" search mode and selection flow, and add hooks (useInteractiveSearchEbook / useSelectEbook). Settings: add ebook_auto_grab_enabled with UI toggle and enforce disabling when no ebook sources are enabled; settings GET/PUT updated to persist the flag (default = true to preserve behavior). Documentation updated (scheduler, ebook-sidecar, settings pages) and ranking algorithm docs/tests extended to cover ebook-related normalization and matching cases. Includes logging and ranking integration for indexer results and normalization for Anna's Archive handling.
This commit is contained in:
@@ -397,3 +397,88 @@ export function useRequestWithTorrent() {
|
||||
|
||||
return { requestWithTorrent, isLoading, error };
|
||||
}
|
||||
|
||||
export function useInteractiveSearchEbook() {
|
||||
const { accessToken } = useAuth();
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const searchEbooks = async (requestId: string, customTitle?: string) => {
|
||||
if (!accessToken) {
|
||||
throw new Error('Not authenticated');
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const response = await fetchWithAuth(`/api/requests/${requestId}/interactive-search-ebook`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: customTitle ? JSON.stringify({ customTitle }) : undefined,
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(data.error || data.message || 'Failed to search for ebooks');
|
||||
}
|
||||
|
||||
return data.results || [];
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : 'Unknown error';
|
||||
setError(message);
|
||||
throw err;
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return { searchEbooks, isLoading, error };
|
||||
}
|
||||
|
||||
export function useSelectEbook() {
|
||||
const { accessToken } = useAuth();
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
const [error, setError] = useState<string | null>(null);
|
||||
|
||||
const selectEbook = async (requestId: string, ebook: any) => {
|
||||
if (!accessToken) {
|
||||
throw new Error('Not authenticated');
|
||||
}
|
||||
|
||||
setIsLoading(true);
|
||||
setError(null);
|
||||
|
||||
try {
|
||||
const response = await fetchWithAuth(`/api/requests/${requestId}/select-ebook`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ ebook }),
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(data.error || data.message || 'Failed to download ebook');
|
||||
}
|
||||
|
||||
// Revalidate requests
|
||||
mutate((key) => typeof key === 'string' && key.includes('/api/requests'));
|
||||
|
||||
return data;
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : 'Unknown error';
|
||||
setError(message);
|
||||
throw err;
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return { selectEbook, isLoading, error };
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
* Component: Monitor RSS Feeds Processor
|
||||
* Documentation: documentation/backend/services/scheduler.md
|
||||
*
|
||||
* Monitors RSS feeds for new audiobook releases and matches against missing requests
|
||||
* Monitors RSS feeds for new releases and matches against missing requests (audiobooks and ebooks)
|
||||
*/
|
||||
|
||||
import { prisma } from '../db';
|
||||
@@ -57,11 +57,10 @@ export async function processMonitorRssFeeds(payload: MonitorRssFeedsPayload): P
|
||||
return { success: true, message: 'No RSS results', matched: 0 };
|
||||
}
|
||||
|
||||
// Get all active audiobook requests awaiting search (missing audiobooks)
|
||||
// Note: RSS feeds are for torrents, so only audiobook requests are matched
|
||||
// Get all active requests awaiting search (audiobooks and ebooks)
|
||||
// Both types can be matched against RSS torrent feeds
|
||||
const missingRequests = await prisma.request.findMany({
|
||||
where: {
|
||||
type: 'audiobook', // Only audiobook requests (RSS feeds are for torrents)
|
||||
status: 'awaiting_search',
|
||||
deletedAt: null,
|
||||
},
|
||||
@@ -75,7 +74,7 @@ export async function processMonitorRssFeeds(payload: MonitorRssFeedsPayload): P
|
||||
return { success: true, message: 'No missing requests', matched: 0 };
|
||||
}
|
||||
|
||||
// Match RSS results against missing audiobooks
|
||||
// Match RSS results against missing requests
|
||||
let matched = 0;
|
||||
const jobQueue = getJobQueueService();
|
||||
|
||||
@@ -96,16 +95,27 @@ export async function processMonitorRssFeeds(payload: MonitorRssFeedsPayload): P
|
||||
if (hasAuthor && titleMatchCount >= 2) {
|
||||
logger.info(`Match found! "${audiobook.title}" by ${audiobook.author} matches torrent: ${torrent.title}`);
|
||||
|
||||
// Trigger search job to process this request
|
||||
// Trigger appropriate search job based on request type
|
||||
try {
|
||||
await jobQueue.addSearchJob(request.id, {
|
||||
id: audiobook.id,
|
||||
title: audiobook.title,
|
||||
author: audiobook.author,
|
||||
asin: audiobook.audibleAsin || undefined,
|
||||
});
|
||||
matched++;
|
||||
logger.info(`Triggered search job for request ${request.id}`);
|
||||
if (request.type === 'ebook') {
|
||||
await jobQueue.addSearchEbookJob(request.id, {
|
||||
id: audiobook.id,
|
||||
title: audiobook.title,
|
||||
author: audiobook.author,
|
||||
asin: audiobook.audibleAsin || undefined,
|
||||
});
|
||||
matched++;
|
||||
logger.info(`Triggered ebook search job for request ${request.id}`);
|
||||
} else {
|
||||
await jobQueue.addSearchJob(request.id, {
|
||||
id: audiobook.id,
|
||||
title: audiobook.title,
|
||||
author: audiobook.author,
|
||||
asin: audiobook.audibleAsin || undefined,
|
||||
});
|
||||
matched++;
|
||||
logger.info(`Triggered audiobook search job for request ${request.id}`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to trigger search for request ${request.id}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
|
||||
@@ -785,8 +785,16 @@ async function createEbookRequestIfEnabled(
|
||||
logger: RMABLogger
|
||||
): Promise<void> {
|
||||
try {
|
||||
// Check which ebook sources are enabled
|
||||
const configService = getConfigService();
|
||||
|
||||
// Check if auto-grab is enabled (default: true for backward compatibility)
|
||||
const autoGrabEnabled = await configService.get('ebook_auto_grab_enabled');
|
||||
if (autoGrabEnabled === 'false') {
|
||||
logger.info('Ebook auto-grab disabled, skipping automatic ebook request creation');
|
||||
return;
|
||||
}
|
||||
|
||||
// Check which ebook sources are enabled
|
||||
const annasArchiveEnabled = await configService.get('ebook_annas_archive_enabled');
|
||||
const indexerSearchEnabled = await configService.get('ebook_indexer_search_enabled');
|
||||
|
||||
|
||||
@@ -21,11 +21,9 @@ export async function processRetryMissingTorrents(payload: RetryMissingTorrentsP
|
||||
logger.info('Starting retry job for requests awaiting search...');
|
||||
|
||||
try {
|
||||
// Find all active audiobook requests in awaiting_search status
|
||||
// Note: Ebook requests have separate search mechanism (search_ebook job)
|
||||
// Find all active requests (audiobook or ebook) in awaiting_search status
|
||||
const requests = await prisma.request.findMany({
|
||||
where: {
|
||||
type: 'audiobook', // Only audiobook requests (ebooks use different search)
|
||||
status: 'awaiting_search',
|
||||
deletedAt: null,
|
||||
},
|
||||
@@ -45,20 +43,33 @@ export async function processRetryMissingTorrents(payload: RetryMissingTorrentsP
|
||||
};
|
||||
}
|
||||
|
||||
// Trigger search job for each request
|
||||
// Trigger appropriate search job for each request based on type
|
||||
const jobQueue = getJobQueueService();
|
||||
let triggered = 0;
|
||||
|
||||
for (const request of requests) {
|
||||
try {
|
||||
await jobQueue.addSearchJob(request.id, {
|
||||
id: request.audiobook.id,
|
||||
title: request.audiobook.title,
|
||||
author: request.audiobook.author,
|
||||
asin: request.audiobook.audibleAsin || undefined,
|
||||
});
|
||||
triggered++;
|
||||
logger.info(`Triggered search for request ${request.id}: ${request.audiobook.title}`);
|
||||
if (request.type === 'ebook') {
|
||||
// Ebook requests use ebook search (Anna's Archive, etc.)
|
||||
await jobQueue.addSearchEbookJob(request.id, {
|
||||
id: request.audiobook.id,
|
||||
title: request.audiobook.title,
|
||||
author: request.audiobook.author,
|
||||
asin: request.audiobook.audibleAsin || undefined,
|
||||
});
|
||||
triggered++;
|
||||
logger.info(`Triggered ebook search for request ${request.id}: ${request.audiobook.title}`);
|
||||
} else {
|
||||
// Audiobook requests use indexer search (Prowlarr)
|
||||
await jobQueue.addSearchJob(request.id, {
|
||||
id: request.audiobook.id,
|
||||
title: request.audiobook.title,
|
||||
author: request.audiobook.author,
|
||||
asin: request.audiobook.audibleAsin || undefined,
|
||||
});
|
||||
triggered++;
|
||||
logger.info(`Triggered audiobook search for request ${request.id}: ${request.audiobook.title}`);
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error(`Failed to trigger search for request ${request.id}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
|
||||
@@ -95,6 +95,7 @@ export interface RankedEbookTorrent extends TorrentResult {
|
||||
finalScore: number; // score + bonusPoints
|
||||
rank: number;
|
||||
breakdown: EbookScoreBreakdown;
|
||||
ebookFormat?: string; // Detected ebook format (epub, pdf, mobi, etc.)
|
||||
}
|
||||
|
||||
export class RankingAlgorithm {
|
||||
@@ -330,6 +331,26 @@ export class RankingAlgorithm {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Normalize text for matching by handling CamelCase and punctuation separators
|
||||
* "VirginaEvans TheCorrespondent" → "virgina evans the correspondent"
|
||||
* "Twelve.Months-Jim.Butcher" → "twelve months jim butcher"
|
||||
* "Author_Name_Book" → "author name book"
|
||||
*/
|
||||
private normalizeForMatching(text: string): string {
|
||||
return text
|
||||
// Split CamelCase FIRST (before lowercasing): "TheCorrespondent" → "The Correspondent"
|
||||
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
||||
.toLowerCase()
|
||||
// Replace underscores with spaces (must be explicit since \w includes _)
|
||||
.replace(/_/g, ' ')
|
||||
// Replace other punctuation/separators with spaces (preserves apostrophes in contractions)
|
||||
.replace(/[^\w\s']/g, ' ')
|
||||
// Collapse multiple spaces
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Score title/author match quality (60 points max)
|
||||
* Title similarity: 0-45 points (heavily weighted!)
|
||||
@@ -340,10 +361,22 @@ export class RankingAlgorithm {
|
||||
audiobook: AudiobookRequest,
|
||||
requireAuthor: boolean = true
|
||||
): number {
|
||||
// Normalize whitespace (multiple spaces → single space) for consistent matching
|
||||
const torrentTitle = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim();
|
||||
const requestTitle = audiobook.title.toLowerCase().replace(/\s+/g, ' ').trim();
|
||||
const requestAuthor = audiobook.author.toLowerCase().replace(/\s+/g, ' ').trim();
|
||||
// Normalize for matching (handles CamelCase, punctuation separators)
|
||||
const torrentTitle = this.normalizeForMatching(torrent.title);
|
||||
const requestTitle = this.normalizeForMatching(audiobook.title);
|
||||
|
||||
// Parse authors from RAW string first (preserving commas for splitting)
|
||||
// Then normalize individual authors for matching
|
||||
const requestAuthorRaw = audiobook.author.toLowerCase().replace(/\s+/g, ' ').trim();
|
||||
const parsedAuthors = requestAuthorRaw
|
||||
.split(/,|&| and | - /)
|
||||
.map(a => a.trim())
|
||||
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
||||
|
||||
// Normalize parsed authors for matching (handles CamelCase in author names)
|
||||
const normalizedAuthors = parsedAuthors.map(a => this.normalizeForMatching(a));
|
||||
// Combined normalized author string for fuzzy matching
|
||||
const requestAuthorNormalized = normalizedAuthors.join(' ');
|
||||
|
||||
// ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ==========
|
||||
// Extract significant words (filter out common stop words)
|
||||
@@ -351,26 +384,37 @@ export class RankingAlgorithm {
|
||||
|
||||
const extractWords = (text: string, stopList: string[]): string[] => {
|
||||
return text
|
||||
// Split CamelCase FIRST: "TheCorrespondent" → "The Correspondent"
|
||||
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
||||
.toLowerCase()
|
||||
.replace(/[^\w\s]/g, ' ') // Remove punctuation
|
||||
// Replace underscores with spaces (must be explicit since \w includes _)
|
||||
.replace(/_/g, ' ')
|
||||
// Remove other punctuation (but keep apostrophes for contractions)
|
||||
.replace(/[^\w\s']/g, ' ')
|
||||
.split(/\s+/)
|
||||
.filter(word => word.length > 0 && !stopList.includes(word));
|
||||
};
|
||||
|
||||
// Separate required words (outside parentheses/brackets) from optional words (inside)
|
||||
// This handles common patterns like "Title (Subtitle)" where subtitle may be omitted
|
||||
// Note: Run on ORIGINAL title to preserve brackets, then normalize the result
|
||||
const separateRequiredOptional = (title: string): { required: string; optional: string } => {
|
||||
// Work with original title format for bracket detection
|
||||
const originalTitle = audiobook.title.toLowerCase();
|
||||
|
||||
// Extract content in parentheses/brackets as optional
|
||||
const optionalPattern = /[(\[{]([^)\]}]+)[)\]}]/g;
|
||||
const optionalMatches: string[] = [];
|
||||
let match;
|
||||
|
||||
while ((match = optionalPattern.exec(title)) !== null) {
|
||||
while ((match = optionalPattern.exec(originalTitle)) !== null) {
|
||||
optionalMatches.push(match[1]);
|
||||
}
|
||||
|
||||
// Remove parenthetical/bracketed content to get required portion
|
||||
const required = title.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim();
|
||||
const requiredRaw = originalTitle.replace(/[(\[{][^)\]}]+[)\]}]/g, ' ').trim();
|
||||
// Normalize the required portion (handles CamelCase, punctuation)
|
||||
const required = this.normalizeForMatching(requiredRaw);
|
||||
const optional = optionalMatches.join(' ');
|
||||
|
||||
return { required, optional };
|
||||
@@ -400,7 +444,7 @@ export class RankingAlgorithm {
|
||||
// ========== STAGE 1.5: AUTHOR PRESENCE CHECK (OPTIONAL) ==========
|
||||
// Only enforced in automatic mode (requireAuthor: true)
|
||||
// Interactive search (requireAuthor: false) shows all results
|
||||
if (requireAuthor && !this.checkAuthorPresence(torrentTitle, requestAuthor)) {
|
||||
if (requireAuthor && !this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors)) {
|
||||
// No high-confidence author match → reject to prevent wrong-author matches
|
||||
return 0;
|
||||
}
|
||||
@@ -408,6 +452,10 @@ export class RankingAlgorithm {
|
||||
// ========== STAGE 2: TITLE MATCHING (0-35 points) ==========
|
||||
let titleScore = 0;
|
||||
|
||||
// Keep original torrent title (lowercased only) for metadata marker detection
|
||||
// Markers like [ ] ( ) : are removed by normalization but needed for suffix validation
|
||||
const torrentTitleOriginal = torrent.title.toLowerCase().replace(/\s+/g, ' ').trim();
|
||||
|
||||
// Try matching with full title first, then fall back to required title (without parentheses)
|
||||
const titlesToTry = [requestTitle];
|
||||
if (requiredTitle !== requestTitle) {
|
||||
@@ -422,20 +470,37 @@ export class RankingAlgorithm {
|
||||
const beforeTitle = torrentTitle.substring(0, titleIndex);
|
||||
const afterTitle = torrentTitle.substring(titleIndex + titleToMatch.length);
|
||||
|
||||
// For metadata marker detection, try to find where the title starts in the ORIGINAL string
|
||||
// Search for key words from the title to locate position in original
|
||||
const titleWords = titleToMatch.split(/\s+/).filter(w => w.length > 2);
|
||||
let afterTitleOriginal = '';
|
||||
if (titleWords.length > 0) {
|
||||
// Find the last significant title word in the original string
|
||||
const lastTitleWord = titleWords[titleWords.length - 1];
|
||||
const lastWordIdxOriginal = torrentTitleOriginal.lastIndexOf(lastTitleWord);
|
||||
if (lastWordIdxOriginal !== -1) {
|
||||
afterTitleOriginal = torrentTitleOriginal.substring(lastWordIdxOriginal + lastTitleWord.length);
|
||||
}
|
||||
}
|
||||
|
||||
// Extract significant words BEFORE the matched title
|
||||
const beforeWords = extractWords(beforeTitle, stopWords);
|
||||
|
||||
// Title is complete if:
|
||||
// 1. Acceptable prefix (no words, OR structured metadata like "Author - Series - ")
|
||||
// 2. Followed by clear metadata markers (not "'s Secret" or " Is Watching")
|
||||
// Check ORIGINAL title for metadata markers ([ ] ( ) etc. not normalized away)
|
||||
const metadataMarkers = [' by ', ' - ', ' [', ' (', ' {', ' :', ','];
|
||||
|
||||
// Check if afterTitle starts with author name (handles space-separated format like "Title Author Year")
|
||||
const afterStartsWithAuthor = requestAuthor.length > 2 &&
|
||||
afterTitle.trim().startsWith(requestAuthor);
|
||||
// Check if afterTitle starts with any author name (handles space-separated format like "Title Author Year")
|
||||
const afterStartsWithAuthor = normalizedAuthors.some(author =>
|
||||
author.length > 2 && afterTitle.trim().startsWith(author)
|
||||
);
|
||||
|
||||
// Check metadata markers in both normalized and original suffixes
|
||||
const hasMetadataSuffix = afterTitle === '' ||
|
||||
metadataMarkers.some(marker => afterTitle.startsWith(marker)) ||
|
||||
metadataMarkers.some(marker => afterTitleOriginal.startsWith(marker)) ||
|
||||
afterStartsWithAuthor;
|
||||
|
||||
// Check prefix validity:
|
||||
@@ -446,16 +511,32 @@ export class RankingAlgorithm {
|
||||
|
||||
// Check if title is immediately preceded by a metadata separator
|
||||
// This handles "Author - Series - 01 - Title" patterns
|
||||
// Check both normalized and original strings for separators
|
||||
const precedingText = beforeTitle.trimEnd();
|
||||
|
||||
// Also check original string for separators that got normalized away (like colons)
|
||||
let beforeTitleOriginal = '';
|
||||
if (titleWords.length > 0) {
|
||||
const firstTitleWord = titleWords[0];
|
||||
const firstWordIdxOriginal = torrentTitleOriginal.indexOf(firstTitleWord);
|
||||
if (firstWordIdxOriginal !== -1) {
|
||||
beforeTitleOriginal = torrentTitleOriginal.substring(0, firstWordIdxOriginal).trimEnd();
|
||||
}
|
||||
}
|
||||
|
||||
const titlePrecededBySeparator =
|
||||
precedingText.endsWith('-') ||
|
||||
precedingText.endsWith(':') ||
|
||||
precedingText.endsWith('—');
|
||||
precedingText.endsWith('—') ||
|
||||
beforeTitleOriginal.endsWith('-') ||
|
||||
beforeTitleOriginal.endsWith(':') ||
|
||||
beforeTitleOriginal.endsWith('—');
|
||||
|
||||
// Check if author name appears in the prefix
|
||||
// Check if any author name appears in the prefix
|
||||
// This handles "Author Name - Title" patterns
|
||||
const authorInPrefix = requestAuthor.length > 2 &&
|
||||
beforeTitle.includes(requestAuthor);
|
||||
const authorInPrefix = normalizedAuthors.some(author =>
|
||||
author.length > 2 && beforeTitle.includes(author)
|
||||
);
|
||||
|
||||
const hasAcceptablePrefix =
|
||||
hasNoWordsPrefix ||
|
||||
@@ -481,24 +562,18 @@ export class RankingAlgorithm {
|
||||
}
|
||||
|
||||
// ========== STAGE 3: AUTHOR MATCHING (0-15 points) ==========
|
||||
// Parse requested authors (split on separators, filter out roles)
|
||||
const requestAuthors = requestAuthor
|
||||
.split(/,|&| and | - /)
|
||||
.map(a => a.trim())
|
||||
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
||||
|
||||
// Check how many authors appear in torrent title (exact substring match)
|
||||
const authorMatches = requestAuthors.filter(author =>
|
||||
const authorMatches = normalizedAuthors.filter(author =>
|
||||
torrentTitle.includes(author)
|
||||
);
|
||||
|
||||
let authorScore = 0;
|
||||
if (authorMatches.length > 0) {
|
||||
// Exact substring match → proportional credit
|
||||
authorScore = (authorMatches.length / requestAuthors.length) * 15;
|
||||
authorScore = (authorMatches.length / normalizedAuthors.length) * 15;
|
||||
} else {
|
||||
// No exact match → use fuzzy similarity for partial credit
|
||||
authorScore = compareTwoStrings(requestAuthor, torrentTitle) * 15;
|
||||
authorScore = compareTwoStrings(requestAuthorNormalized, torrentTitle) * 15;
|
||||
}
|
||||
|
||||
return Math.min(60, titleScore + authorScore);
|
||||
@@ -506,22 +581,16 @@ export class RankingAlgorithm {
|
||||
|
||||
/**
|
||||
* Check if author is present in torrent title with high confidence
|
||||
* Handles variations: middle initials, spacing, punctuation, name order
|
||||
* Uses pre-parsed and normalized authors array
|
||||
*
|
||||
* @param torrentTitle - Normalized torrent title (lowercase)
|
||||
* @param requestAuthor - Normalized author name (lowercase)
|
||||
* @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching)
|
||||
* @param normalizedAuthors - Array of normalized author names (roles already filtered)
|
||||
* @returns true if at least ONE author is present with high confidence
|
||||
*/
|
||||
private checkAuthorPresence(torrentTitle: string, requestAuthor: string): boolean {
|
||||
// Parse multiple authors (same logic as Stage 3 author matching)
|
||||
const authors = requestAuthor
|
||||
.split(/,|&| and | - /)
|
||||
.map(a => a.trim())
|
||||
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
||||
|
||||
private checkAuthorPresenceWithParsed(torrentTitle: string, normalizedAuthors: string[]): boolean {
|
||||
// At least ONE author must match with high confidence
|
||||
return authors.some(author => {
|
||||
// Check 1: Exact substring match
|
||||
return normalizedAuthors.some(author => {
|
||||
// Check 1: Exact substring match (works well now that both are normalized)
|
||||
if (torrentTitle.includes(author)) {
|
||||
return true;
|
||||
}
|
||||
@@ -537,6 +606,7 @@ export class RankingAlgorithm {
|
||||
// Check 3: Core name components (first + last name present within 30 chars)
|
||||
// Handles: "Sanderson, Brandon" vs "Brandon Sanderson"
|
||||
// Handles: "Brandon R. Sanderson" vs "Brandon Sanderson"
|
||||
// Now also handles: "VirginaEvans" → "virgina evans" (after normalization)
|
||||
const words = author.split(/\s+/).filter(w => w.length > 1);
|
||||
if (words.length >= 2) {
|
||||
const firstName = words[0];
|
||||
@@ -558,6 +628,27 @@ export class RankingAlgorithm {
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if author is present in torrent title with high confidence
|
||||
* Handles variations: middle initials, spacing, punctuation, name order, CamelCase
|
||||
*
|
||||
* @param torrentTitle - Normalized torrent title (already processed by normalizeForMatching)
|
||||
* @param requestAuthor - Raw author string (will be parsed and normalized internally)
|
||||
* @returns true if at least ONE author is present with high confidence
|
||||
*/
|
||||
private checkAuthorPresence(torrentTitle: string, requestAuthor: string): boolean {
|
||||
// Parse multiple authors (same logic as Stage 3 author matching)
|
||||
const authors = requestAuthor
|
||||
.split(/,|&| and | - /)
|
||||
.map(a => a.trim())
|
||||
.filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));
|
||||
|
||||
// Normalize each author for matching
|
||||
const normalizedAuthors = authors.map(a => this.normalizeForMatching(a));
|
||||
|
||||
return this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect format from torrent title
|
||||
*/
|
||||
@@ -687,6 +778,9 @@ export class RankingAlgorithm {
|
||||
});
|
||||
|
||||
const ranked = filteredTorrents.map((torrent) => {
|
||||
// Detect ebook format from title
|
||||
const detectedFormat = this.detectEbookFormat(torrent);
|
||||
|
||||
// Calculate base scores (0-100)
|
||||
// Reuse scoreMatch and scoreSeeders from audiobook ranking
|
||||
const formatScore = this.scoreEbookFormat(torrent, ebook.preferredFormat);
|
||||
@@ -765,6 +859,7 @@ export class RankingAlgorithm {
|
||||
notes: [],
|
||||
}, ebook.preferredFormat),
|
||||
},
|
||||
ebookFormat: detectedFormat !== 'unknown' ? detectedFormat : undefined,
|
||||
};
|
||||
});
|
||||
|
||||
@@ -824,19 +919,27 @@ export class RankingAlgorithm {
|
||||
|
||||
/**
|
||||
* Detect ebook format from torrent title
|
||||
* Handles formats in various positions: .epub, (epub), [epub], " epub"
|
||||
*/
|
||||
private detectEbookFormat(torrent: TorrentResult): string {
|
||||
const title = torrent.title.toLowerCase();
|
||||
|
||||
// Check for common ebook format extensions/keywords
|
||||
if (title.includes('.epub') || title.includes(' epub')) return 'epub';
|
||||
if (title.includes('.pdf') || title.includes(' pdf')) return 'pdf';
|
||||
if (title.includes('.mobi') || title.includes(' mobi')) return 'mobi';
|
||||
if (title.includes('.azw3') || title.includes(' azw3')) return 'azw3';
|
||||
if (title.includes('.azw') || title.includes(' azw')) return 'azw';
|
||||
if (title.includes('.fb2') || title.includes(' fb2')) return 'fb2';
|
||||
if (title.includes('.cbz') || title.includes(' cbz')) return 'cbz';
|
||||
if (title.includes('.cbr') || title.includes(' cbr')) return 'cbr';
|
||||
// Patterns: .format, (format), [format], " format", "_format"
|
||||
const formats = ['epub', 'pdf', 'mobi', 'azw3', 'azw', 'fb2', 'cbz', 'cbr'];
|
||||
|
||||
for (const format of formats) {
|
||||
if (
|
||||
title.includes(`.${format}`) || // file.epub
|
||||
title.includes(`(${format})`) || // (epub)
|
||||
title.includes(`[${format}]`) || // [epub]
|
||||
title.includes(` ${format}`) || // " epub" (space before)
|
||||
title.includes(`_${format}`) || // _epub (underscore)
|
||||
title.endsWith(format) // ends with format
|
||||
) {
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
// Default to unknown
|
||||
return 'unknown';
|
||||
|
||||
Reference in New Issue
Block a user