diff --git a/documentation/TABLEOFCONTENTS.md b/documentation/TABLEOFCONTENTS.md index 5c99244..893287c 100644 --- a/documentation/TABLEOFCONTENTS.md +++ b/documentation/TABLEOFCONTENTS.md @@ -41,10 +41,11 @@ ## E-book Support (First-Class) - **First-class ebook requests, separate tracking** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md) - **Multi-source ebook downloads (Anna's Archive + Indexer Search)** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md) +- **Ebook indexer search (Prowlarr with ebook categories)** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md#flow-indexer-search) - **ASIN-based matching, format selection** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md) -- **Ebook ranking algorithm (inverted size scoring)** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md) +- **Ebook ranking algorithm (unified with audiobooks)** → [phase3/ranking-algorithm.md](phase3/ranking-algorithm.md#ebook-torrent-ranking) - **Direct HTTP downloads from Anna's Archive** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md) -- **Ebook delete behavior (files only)** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md) +- **Ebook delete behavior (files only, torrents seed)** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md#delete-behavior) - **Ebook settings (3-section UI)** → [settings-pages.md](settings-pages.md#e-book-sidecar) - **Indexer categories (audiobook/ebook tabs)** → [settings-pages.md](settings-pages.md#indexer-categories-tabbed) @@ -116,7 +117,9 @@ **"How does e-book support work?"** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md) **"How do I enable e-book downloads?"** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md), [settings-pages.md](settings-pages.md#e-book-sidecar) **"How do I configure ebook sources (Anna's Archive vs Indexer)?"** → [settings-pages.md](settings-pages.md#e-book-sidecar) +**"How does ebook indexer search work?"** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md#flow-indexer-search) **"How do I configure ebook categories per indexer?"** → [settings-pages.md](settings-pages.md#indexer-categories-tabbed) +**"How does ebook ranking work?"** → [phase3/ranking-algorithm.md](phase3/ranking-algorithm.md#ebook-torrent-ranking) **"What happens when I delete an ebook request?"** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md#delete-behavior) **"Why do ebook requests have an orange badge?"** → [integrations/ebook-sidecar.md](integrations/ebook-sidecar.md#ui-representation) **"How do scheduled jobs work?"** → [backend/services/scheduler.md](backend/services/scheduler.md) diff --git a/documentation/integrations/ebook-sidecar.md b/documentation/integrations/ebook-sidecar.md index 7098442..3c97b51 100644 --- a/documentation/integrations/ebook-sidecar.md +++ b/documentation/integrations/ebook-sidecar.md @@ -1,9 +1,9 @@ # E-book Support -**Status:** ✅ Implemented | First-class ebook requests with multi-source support (Anna's Archive + future Indexer Search) +**Status:** ✅ Implemented | First-class ebook requests with multi-source support (Anna's Archive + Indexer Search) ## Overview -Ebooks are first-class citizens in RMAB, with their own request type, tracking, and UI representation. When an audiobook request completes, an ebook request is automatically created (if a source is enabled). Supports multiple sources: Anna's Archive (direct HTTP) and Indexer Search (via Prowlarr, coming soon). +Ebooks are first-class citizens in RMAB, with their own request type, tracking, and UI representation. When an audiobook request completes, an ebook request is automatically created (if a source is enabled). Supports multiple sources: Anna's Archive (direct HTTP) and Indexer Search (via Prowlarr with ebook categories). ## Key Details @@ -14,15 +14,35 @@ Ebooks are first-class citizens in RMAB, with their own request type, tracking, - **UI Badge:** Orange (#f16f19) ebook badge to distinguish from audiobooks - **Separate Tracking:** Own progress, status, and error handling +### Source Priority +1. **Anna's Archive** (if enabled) - Direct HTTP downloads + - Searched first via ASIN, then title + author + - Uses FlareSolverr if configured (Cloudflare bypass) +2. **Indexer Search** (if enabled, and no Anna's Archive result) + - Searches Prowlarr with ebook categories (default: 7020) + - Ranks using unified ranking algorithm with ebook-specific scoring + - Downloads via qBittorrent (torrents) or SABnzbd (Usenet) +3. **Both disabled** → Ebook downloads disabled entirely + ### Flow (Anna's Archive) 1. Audiobook organization completes -2. Ebook request created automatically (if Anna's Archive enabled) +2. Ebook request created automatically (if source enabled) 3. `search_ebook` job searches Anna's Archive 4. `start_direct_download` downloads via HTTP 5. `organize_files` copies to audiobook folder 6. Request marked as `downloaded` (terminal) 7. "Available" notification sent +### Flow (Indexer Search) +1. Audiobook organization completes +2. Ebook request created automatically (if source enabled) +3. `search_ebook` job searches indexers (if Anna's Archive failed/disabled) +4. `download_torrent` job adds to qBittorrent/SABnzbd (reuses audiobook processor) +5. `monitor_download` tracks progress +6. `organize_files` copies to audiobook folder +7. Request marked as `downloaded` (terminal) +8. Torrent left to seed (respects seeding limits) + ### Configuration **Admin Settings → E-book Sidecar tab** (3 sections) @@ -37,7 +57,7 @@ Ebooks are first-class citizens in RMAB, with their own request type, tracking, #### Section 2: Indexer Search | Key | Default | Description | |-----|---------|-------------| -| `ebook_indexer_search_enabled` | `false` | Enable Indexer Search (not yet implemented) | +| `ebook_indexer_search_enabled` | `false` | Enable Indexer Search via Prowlarr | *Note: Ebook categories are configured per-indexer in Settings → Indexers → Edit Indexer → EBook tab* @@ -46,11 +66,6 @@ Ebooks are first-class citizens in RMAB, with their own request type, tracking, |-----|---------|---------|-------------| | `ebook_sidecar_preferred_format` | `epub` | `epub, pdf, mobi, azw3, any` | Preferred format | -### Source Priority -- If **Anna's Archive** is enabled → Use Anna's Archive (current behavior) -- If **only Indexer Search** is enabled → Log "not yet implemented", skip gracefully -- If **both disabled** → Ebook downloads disabled entirely - ## Database Schema **Request model additions:** @@ -66,25 +81,36 @@ childRequests Request[] @relation("EbookParent") ## Job Processors ### search_ebook -- Searches Anna's Archive by ASIN first, then title + author -- Creates download history record with `downloadClient: 'direct'` -- Triggers `start_direct_download` job +- Searches Anna's Archive first (if enabled), then indexers (if enabled) +- Anna's Archive: Creates download history with `downloadClient: 'direct'`, triggers `start_direct_download` +- Indexer: Triggers `download_torrent` job (reuses audiobook processor) ### start_direct_download - Downloads file via HTTP with progress tracking - Tries multiple slow download links on failure - Triggers `organize_files` on success -### monitor_direct_download -- Future use for async download monitoring -- Currently, most tracking happens in start_direct_download +### download_torrent (shared with audiobooks) +- Routes to qBittorrent (torrents) or SABnzbd (Usenet) +- Creates download history with indexer metadata +- Triggers `monitor_download` job -## Ranking Algorithm +## Ranking Algorithm (Indexer Results) -Ebook ranking (for future multi-source support): -- **Format Score:** 40 pts (exact match) to 10 pts (different format) -- **Size Score:** 30 pts (inverse - smaller files preferred) -- **Source Score:** 30 pts (Anna's Archive gets full score) +Ebook torrent ranking uses unified algorithm with ebook-specific scoring: + +| Component | Points | Description | +|-----------|--------|-------------| +| **Title/Author Match** | 60 pts | Reuses audiobook matching logic (word coverage, author presence) | +| **Format Match** | 10 pts | 10 pts if matches preferred format, 0 otherwise | +| **Size Quality** | 15 pts | Inverted: < 5MB = 15pts, 5-15MB = 10pts, 15-20MB = 5pts | +| **Seeder Count** | 15 pts | Logarithmic scaling (same as audiobooks) | + +**Filtering:** +- Files > 20 MB are filtered out (too large for ebooks) +- Dual threshold: base score >= 50 AND final score >= 50 + +**Bonus System:** Same as audiobooks (indexer priority, flag bonuses) ## Delete Behavior @@ -94,6 +120,7 @@ Ebook ranking (for future multi-source support): - Does NOT delete from backend library (Plex/ABS) - Does NOT clear audiobook availability linkage - Soft-deletes the ebook request record +- Torrents left to seed (respects seeding limits) ## UI Representation @@ -124,7 +151,7 @@ Configure URL in Admin Settings → E-book Sidecar: `http://localhost:8191` - Subsequent: ~2-5 seconds per page - Total: ~15-30 seconds per ebook -## Scraping Strategy +## Scraping Strategy (Anna's Archive) ### Method 1: ASIN Search (exact match) ``` @@ -161,17 +188,19 @@ Search: https://annas-archive.li/search?q=Title+Author&ext=epub&lang=en ## Technical Files **Processors:** -- `src/lib/processors/search-ebook.processor.ts` -- `src/lib/processors/direct-download.processor.ts` +- `src/lib/processors/search-ebook.processor.ts` - Multi-source search +- `src/lib/processors/direct-download.processor.ts` - Anna's Archive downloads +- `src/lib/processors/download-torrent.processor.ts` - Indexer downloads (shared) - `src/lib/processors/organize-files.processor.ts` (ebook branch) **Services:** -- `src/lib/services/ebook-scraper.ts` +- `src/lib/services/ebook-scraper.ts` - Anna's Archive scraping - `src/lib/services/job-queue.service.ts` (ebook job types) **Utils:** - `src/lib/utils/file-organizer.ts` (`organizeEbook` method) -- `src/lib/utils/ranking-algorithm.ts` (`rankEbooks` function) +- `src/lib/utils/ranking-algorithm.ts` (`rankEbookTorrents` function) +- `src/lib/utils/indexer-grouping.ts` (supports `'ebook'` type) **UI:** - `src/components/requests/RequestCard.tsx` (ebook badge) @@ -183,17 +212,10 @@ Search: https://annas-archive.li/search?q=Title+Author&ext=epub&lang=en | Format | Extension | Recommended | |--------|-----------|-------------| -| EPUB | `.epub` | ✅ Yes | -| PDF | `.pdf` | ⚠️ Sometimes | -| MOBI | `.mobi` | ⚠️ Legacy | -| AZW3 | `.azw3` | ⚠️ Sometimes | - -## Limitations - -1. Indexer Search not yet implemented (settings ready, search stubbed) -2. Title search may return wrong book for common titles -3. Download speed depends on file server load -4. English books only (title search filter) +| EPUB | `.epub` | Yes | +| PDF | `.pdf` | Sometimes | +| MOBI | `.mobi` | Legacy | +| AZW3 | `.azw3` | Sometimes | ## Indexer Categories @@ -203,8 +225,16 @@ Indexer configuration supports separate category arrays for audiobooks and ebook Categories are configured per-indexer via the tabbed interface in the Edit Indexer modal. +## Limitations + +1. Title search may return wrong book for common titles +2. Download speed depends on file server load (Anna's Archive) +3. English books only (title search filter for Anna's Archive) +4. Format detection from torrent titles may be imprecise + ## Related - [File Organization](../phase3/file-organization.md) - Ebook organization - [Settings Pages](../settings-pages.md) - Configuration UI - [Ranking Algorithm](../phase3/ranking-algorithm.md) - Ebook ranking - [Request Deletion](../admin-features/request-deletion.md) - Delete behavior +- [Prowlarr Integration](../phase3/prowlarr.md) - Indexer search diff --git a/documentation/phase3/ranking-algorithm.md b/documentation/phase3/ranking-algorithm.md index 35f3830..97d62b6 100644 --- a/documentation/phase3/ranking-algorithm.md +++ b/documentation/phase3/ranking-algorithm.md @@ -286,6 +286,80 @@ const ranked = rankTorrents(torrents, audiobook, { return ranked; // User can see torrents without author info ``` +## Ebook Torrent Ranking + +The ranking algorithm also supports ebook torrents from indexers with ebook-specific scoring. + +### Unified Code Architecture + +Ebook ranking **reuses** the following from audiobook ranking: +- `scoreMatch()` - Title/author matching (60 pts) +- `scoreSeeders()` - Seeder count scoring (15 pts) +- Bonus modifier system (indexer priority, flag bonuses) +- Dual threshold filtering (base >= 50, final >= 50) + +### Ebook-Specific Scoring + +**Format Match (10 pts max)** +- 10 pts if torrent format matches preferred format +- 0 pts otherwise (no partial credit) +- Format detected from torrent title keywords: `.epub`, `.pdf`, `.mobi`, `.azw3`, etc. + +**Size Quality (15 pts max, INVERTED)** +- < 5 MB: 15 pts (optimal for ebooks) +- 5-15 MB: 10 pts (may have images) +- 15-20 MB: 5 pts (large but acceptable) +- > 20 MB: **Filtered out** (too large for ebooks) + +### Ebook vs Audiobook Comparison + +| Component | Audiobook | Ebook | +|-----------|-----------|-------| +| Title/Author | 60 pts (reused) | 60 pts (reused) | +| Format | 10 pts (M4B > M4A > MP3) | 10 pts (match = 10, else 0) | +| Size | 15 pts (larger = better) | 15 pts (smaller = better) | +| Seeders | 15 pts (reused) | 15 pts (reused) | +| Size Filter | < 20 MB filtered | > 20 MB filtered | + +### Ebook Interface + +```typescript +interface EbookTorrentRequest { + title: string; + author: string; + preferredFormat: string; // 'epub', 'pdf', 'mobi', etc. +} + +interface RankEbookTorrentsOptions { + indexerPriorities?: Map; + flagConfigs?: IndexerFlagConfig[]; + requireAuthor?: boolean; // Default: true +} + +function rankEbookTorrents( + torrents: TorrentResult[], + ebook: EbookTorrentRequest, + options?: RankEbookTorrentsOptions +): RankedEbookTorrent[]; +``` + +### Ebook Usage Example + +```typescript +// Ebook search from indexers +const ranked = rankEbookTorrents(prowlarrResults, { + title: 'Project Hail Mary', + author: 'Andy Weir', + preferredFormat: 'epub', +}, { + indexerPriorities, + flagConfigs, + requireAuthor: true, +}); + +const bestEbook = ranked[0]; // Safe to auto-download +``` + ## Tech Stack - string-similarity (fuzzy matching) diff --git a/documentation/settings-pages.md b/documentation/settings-pages.md index 9da5277..6e215e0 100644 --- a/documentation/settings-pages.md +++ b/documentation/settings-pages.md @@ -85,7 +85,7 @@ src/app/admin/settings/ - FlareSolverr URL (optional, for Cloudflare bypass) 2. **Indexer Search Section** - - Enable toggle for indexer-based ebook search (not yet implemented) + - Enable toggle for indexer-based ebook search via Prowlarr - Hint directing users to Indexers tab for category configuration 3. **General Settings Section** (visible when any source enabled) @@ -95,14 +95,14 @@ src/app/admin/settings/ | Key | Default | Description | |-----|---------|-------------| | `ebook_annas_archive_enabled` | `false` | Enable Anna's Archive | -| `ebook_indexer_search_enabled` | `false` | Enable Indexer Search (stubbed) | +| `ebook_indexer_search_enabled` | `false` | Enable Indexer Search via Prowlarr | | `ebook_sidecar_preferred_format` | `epub` | Preferred format | | `ebook_sidecar_base_url` | `https://annas-archive.li` | Anna's Archive mirror | | `ebook_sidecar_flaresolverr_url` | `` | FlareSolverr URL | **Behavior:** -- If Anna's Archive enabled → Downloads work (current implementation) -- If only Indexer Search enabled → Gracefully logs "not yet implemented" +- If Anna's Archive enabled → Searches Anna's Archive first +- If Indexer Search enabled → Falls back to indexer search if Anna's Archive fails/disabled - If both disabled → Ebook downloads completely off ## Indexer Categories (Tabbed) diff --git a/src/app/admin/settings/tabs/EbookTab/EbookTab.tsx b/src/app/admin/settings/tabs/EbookTab/EbookTab.tsx index 7493ca2..3c8aefe 100644 --- a/src/app/admin/settings/tabs/EbookTab/EbookTab.tsx +++ b/src/app/admin/settings/tabs/EbookTab/EbookTab.tsx @@ -195,16 +195,6 @@ export function EbookTab({ ebook, onChange, onSuccess, onError, markAsSaved }: E

)} - - {/* Coming soon notice */} - {ebook.indexerSearchEnabled && ( -
-

- Coming Soon: Indexer search for e-books is not yet implemented. - Enabling this setting prepares your configuration for when the feature is released. -

-
- )} diff --git a/src/app/api/admin/requests/route.ts b/src/app/api/admin/requests/route.ts index 8c50d00..9ecb571 100644 --- a/src/app/api/admin/requests/route.ts +++ b/src/app/api/admin/requests/route.ts @@ -130,6 +130,7 @@ export async function GET(request: NextRequest) { title: request.audiobook.title, author: request.audiobook.author, status: request.status, + type: request.type || 'audiobook', // Include request type for UI display userId: request.user.id, user: request.user.plexUsername, createdAt: request.createdAt, diff --git a/src/app/api/requests/[id]/fetch-ebook/route.ts b/src/app/api/requests/[id]/fetch-ebook/route.ts index e6f99b7..d3b731b 100644 --- a/src/app/api/requests/[id]/fetch-ebook/route.ts +++ b/src/app/api/requests/[id]/fetch-ebook/route.ts @@ -42,14 +42,6 @@ export async function POST( ); } - // If only indexer search is enabled (not yet implemented), return error - if (!isAnnasArchiveEnabled && isIndexerSearchEnabled) { - return NextResponse.json( - { error: 'E-book indexer search is not yet implemented. Enable Anna\'s Archive to fetch e-books.' }, - { status: 400 } - ); - } - // Get the parent request with audiobook data const parentRequest = await prisma.request.findUnique({ where: { id: parentRequestId }, diff --git a/src/lib/processors/organize-files.processor.ts b/src/lib/processors/organize-files.processor.ts index 55a314c..046ba52 100644 --- a/src/lib/processors/organize-files.processor.ts +++ b/src/lib/processors/organize-files.processor.ts @@ -67,36 +67,53 @@ export async function processOrganizeFiles(payload: OrganizeFilesPayload): Promi logger.info(`Organizing: ${audiobook.title} by ${audiobook.author}`); - // Fetch year from multiple sources (priority order) + // Fetch missing metadata from AudibleCache if needed + // Year and narrator can both be part of path templates let year = audiobook.year || undefined; - logger.info(`Initial year from audiobook record: ${year || 'null'}`); + let narrator = audiobook.narrator || undefined; - if (!year && audiobook.audibleAsin) { - logger.info(`No year in audiobook record, attempting to fetch from AudibleCache for ASIN: ${audiobook.audibleAsin}`); + logger.info(`Initial metadata from audiobook record: year=${year || 'null'}, narrator=${narrator || 'null'}`); + + // Try to enrich missing metadata from AudibleCache + if (audiobook.audibleAsin && (!year || !narrator)) { + logger.info(`Missing metadata, attempting to fetch from AudibleCache for ASIN: ${audiobook.audibleAsin}`); - // Try AudibleCache (for popular/new releases) const audibleCache = await prisma.audibleCache.findUnique({ where: { asin: audiobook.audibleAsin }, - select: { releaseDate: true }, + select: { releaseDate: true, narrator: true }, }); - if (audibleCache?.releaseDate) { - logger.info(`Found AudibleCache entry with releaseDate: ${audibleCache.releaseDate}`); - year = new Date(audibleCache.releaseDate).getFullYear(); - logger.info(`Extracted year ${year} from AudibleCache releaseDate`); + if (audibleCache) { + const updates: { year?: number; narrator?: string } = {}; - // Update audiobook record with year for future use - await prisma.audiobook.update({ - where: { id: audiobookId }, - data: { year }, - }); - logger.info(`Updated audiobook record with year ${year}`); + // Extract year from releaseDate if missing + if (!year && audibleCache.releaseDate) { + year = new Date(audibleCache.releaseDate).getFullYear(); + updates.year = year; + logger.info(`Extracted year ${year} from AudibleCache releaseDate`); + } + + // Get narrator if missing + if (!narrator && audibleCache.narrator) { + narrator = audibleCache.narrator; + updates.narrator = narrator; + logger.info(`Got narrator "${narrator}" from AudibleCache`); + } + + // Update audiobook record with enriched data for future use + if (Object.keys(updates).length > 0) { + await prisma.audiobook.update({ + where: { id: audiobookId }, + data: updates, + }); + logger.info(`Updated audiobook record with enriched metadata`); + } } else { - logger.info(`No year found in AudibleCache for ASIN ${audiobook.audibleAsin}`); + logger.info(`No AudibleCache entry found for ASIN ${audiobook.audibleAsin}`); } } - logger.info(`Final year value for path organization: ${year || 'null (year will be omitted from path)'}`) + logger.info(`Final metadata for path organization: year=${year || 'null'}, narrator=${narrator || 'null'}`) // Get file organizer (reads media_dir from database config) const organizer = await getFileOrganizer(); @@ -113,7 +130,7 @@ export async function processOrganizeFiles(payload: OrganizeFilesPayload): Promi { title: audiobook.title, author: audiobook.author, - narrator: audiobook.narrator || undefined, + narrator, coverArtUrl: audiobook.coverArtUrl || undefined, asin: audiobook.audibleAsin || undefined, year, @@ -329,8 +346,10 @@ export async function processOrganizeFiles(payload: OrganizeFilesPayload): Promi const errorMessage = error instanceof Error ? error.message : 'File organization failed'; // Check if this is a retryable error (transient filesystem issues or no files found) + // These errors may resolve on retry (e.g., files still being extracted, permissions being set) const isRetryableError = errorMessage.includes('No audiobook files found') || + errorMessage.includes('No ebook files found') || // Ebook equivalent of above errorMessage.includes('ENOENT') || // File/directory not found errorMessage.includes('no such file or directory') || errorMessage.includes('EACCES') || // Permission denied (might be temporary) @@ -501,6 +520,64 @@ async function processEbookOrganization( logger.info(`Organizing ebook: ${book.title} by ${book.author}`); + // Fetch missing metadata from AudibleCache (same pattern as audiobooks) + // Year, narrator, series, seriesPart can all be part of path templates + let year = book.year || undefined; + let narrator = book.narrator || undefined; + let series = book.series || undefined; + let seriesPart = book.seriesPart || undefined; + + logger.info(`Initial metadata from book record: year=${year || 'null'}, narrator=${narrator || 'null'}, series=${series || 'null'}`); + + // Try to enrich missing metadata from AudibleCache + if (book.audibleAsin && (!year || !narrator)) { + logger.info(`Missing metadata, attempting to fetch from AudibleCache for ASIN: ${book.audibleAsin}`); + + const audibleCache = await prisma.audibleCache.findUnique({ + where: { asin: book.audibleAsin }, + select: { releaseDate: true, narrator: true, }, + }); + + if (audibleCache) { + const updates: { year?: number; narrator?: string } = {}; + + // Extract year from releaseDate if missing + if (!year && audibleCache.releaseDate) { + year = new Date(audibleCache.releaseDate).getFullYear(); + updates.year = year; + logger.info(`Extracted year ${year} from AudibleCache releaseDate`); + } + + // Get narrator if missing + if (!narrator && audibleCache.narrator) { + narrator = audibleCache.narrator; + updates.narrator = narrator; + logger.info(`Got narrator "${narrator}" from AudibleCache`); + } + + // Update book record with enriched data for future use + if (Object.keys(updates).length > 0) { + await prisma.audiobook.update({ + where: { id: audiobookId }, + data: updates, + }); + logger.info(`Updated book record with enriched metadata`); + } + } else { + logger.info(`No AudibleCache entry found for ASIN ${book.audibleAsin}`); + } + } + + logger.info(`Final metadata for path organization: year=${year || 'null'}, narrator=${narrator || 'null'}, series=${series || 'null'}, seriesPart=${seriesPart || 'null'}`); + + // Check if this is an indexer download (needs to keep source for seeding) + const downloadHistory = await prisma.downloadHistory.findFirst({ + where: { requestId }, + orderBy: { createdAt: 'desc' }, + }); + const isIndexerDownload = downloadHistory?.downloadClient !== 'direct'; + logger.info(`Download source: ${downloadHistory?.downloadClient || 'unknown'} (indexer download: ${isIndexerDownload})`); + // Get file organizer and template const organizer = await getFileOrganizer(); const templateConfig = await prisma.configuration.findUnique({ @@ -509,16 +586,21 @@ async function processEbookOrganization( const template = templateConfig?.value || '{author}/{title} {asin}'; // Organize ebook files (organizer will detect ebook type and skip audio-specific processing) + // Pass all metadata that could be used in path templates (same as audiobooks) const result = await organizer.organizeEbook( downloadPath, { title: book.title, author: book.author, + narrator, asin: book.audibleAsin || undefined, - year: book.year || undefined, + year, + series, + seriesPart, }, template, - jobId ? { jobId, context: 'FileOrganizer.Ebook' } : undefined + jobId ? { jobId, context: 'FileOrganizer.Ebook' } : undefined, + isIndexerDownload ); if (!result.success) { @@ -595,6 +677,88 @@ async function processEbookOrganization( logger.debug(`Ebook library scan disabled (scanEnabled=${scanEnabled})`); } + // Cleanup Usenet downloads if configured (same logic as audiobooks) + try { + logger.info('Checking if cleanup is needed for ebook download'); + + // downloadHistory was already fetched earlier in this function + logger.info(`Download history found: ${downloadHistory ? 'yes' : 'no'}`, { + hasNzbId: !!downloadHistory?.nzbId, + hasIndexerId: !!downloadHistory?.indexerId, + nzbId: downloadHistory?.nzbId || 'none', + indexerId: downloadHistory?.indexerId || 'none', + }); + + if (downloadHistory?.nzbId && downloadHistory?.indexerId) { + // Get indexer configuration + const indexersConfig = await configService.get('prowlarr_indexers'); + logger.info(`Indexers config found: ${indexersConfig ? 'yes' : 'no'}`); + + if (indexersConfig) { + const indexers: Array<{ id: number; protocol: string; removeAfterProcessing?: boolean }> = JSON.parse(indexersConfig); + const indexer = indexers.find(idx => idx.id === downloadHistory.indexerId); + + logger.info(`Indexer found in config: ${indexer ? 'yes' : 'no'}`, { + indexerId: downloadHistory.indexerId, + protocol: indexer?.protocol || 'none', + removeAfterProcessing: indexer?.removeAfterProcessing ?? 'undefined', + }); + + // Check if this is a Usenet indexer with cleanup enabled + if (indexer && indexer.protocol?.toLowerCase() !== 'torrent' && indexer.removeAfterProcessing) { + logger.info(`Cleaning up NZB ${downloadHistory.nzbId} (cleanup enabled for indexer ${indexer.id})`); + + // First, manually delete files from filesystem + if (downloadPath) { + logger.info(`Removing download files from filesystem: ${downloadPath}`); + + const fs = await import('fs/promises'); + + try { + // Check if it's a file or directory + const stats = await fs.stat(downloadPath); + + if (stats.isDirectory()) { + // Remove directory and all contents + await fs.rm(downloadPath, { recursive: true, force: true }); + logger.info(`Removed directory: ${downloadPath}`); + } else { + // Remove single file + await fs.unlink(downloadPath); + logger.info(`Removed file: ${downloadPath}`); + } + } catch (fsError) { + // File/directory might already be deleted or not exist + if ((fsError as NodeJS.ErrnoException).code === 'ENOENT') { + logger.info(`Download path already deleted: ${downloadPath}`); + } else { + throw fsError; + } + } + } else { + logger.warn(`No download path available, skipping filesystem deletion`); + } + + // Then archive from SABnzbd history (hides from UI but preserves for troubleshooting) + const { getSABnzbdService } = await import('../integrations/sabnzbd.service'); + const sabnzbd = await getSABnzbdService(); + + await sabnzbd.archiveCompletedNZB(downloadHistory.nzbId); + + logger.info(`Successfully archived NZB ${downloadHistory.nzbId} and removed files`); + } + } + } + } catch (error) { + // Log error but don't fail the job - cleanup is optional + logger.warn( + `Failed to cleanup NZB download: ${error instanceof Error ? error.message : 'Unknown error'}`, + { + error: error instanceof Error ? error.stack : undefined, + } + ); + } + return { success: true, message: 'Ebook organized successfully', @@ -638,13 +802,7 @@ async function createEbookRequestIfEnabled( return; } - // If only indexer search is enabled (not yet implemented), log and skip - if (!isAnnasArchiveEnabled && isIndexerSearchEnabled) { - logger.info('Ebook indexer search is enabled but not yet implemented, skipping ebook request creation'); - return; - } - - // Anna's Archive is enabled - proceed with ebook request creation + // At least one source is enabled - proceed with ebook request creation // Check if an ebook request already exists for this parent const existingEbookRequest = await prisma.request.findFirst({ diff --git a/src/lib/processors/search-ebook.processor.ts b/src/lib/processors/search-ebook.processor.ts index 103ea81..40f51d9 100644 --- a/src/lib/processors/search-ebook.processor.ts +++ b/src/lib/processors/search-ebook.processor.ts @@ -2,16 +2,20 @@ * Component: Search Ebook Job Processor * Documentation: documentation/integrations/ebook-sidecar.md * - * Searches Anna's Archive for ebook downloads. - * Part of the first-class ebook request flow. + * Searches for ebook downloads using multiple sources: + * 1. Anna's Archive (if enabled) - direct HTTP downloads + * 2. Indexer Search (if enabled) - via Prowlarr with ebook categories */ import { SearchEbookPayload, EbookSearchResult, getJobQueueService } from '../services/job-queue.service'; import { prisma } from '../db'; import { getConfigService } from '../services/config.service'; import { RMABLogger } from '../utils/logger'; +import { getProwlarrService } from '../integrations/prowlarr.service'; +import { rankEbookTorrents, RankedEbookTorrent } from '../utils/ranking-algorithm'; +import { groupIndexersByCategories, getGroupDescription } from '../utils/indexer-grouping'; -// Import ebook scraper functions (we'll refactor these to be reusable) +// Import ebook scraper functions for Anna's Archive import { searchByAsin, searchByTitle, @@ -20,7 +24,7 @@ import { /** * Process search ebook job - * Searches Anna's Archive for ebook matching the audiobook + * Searches Anna's Archive first (if enabled), then falls back to indexer search (if enabled) */ export async function processSearchEbook(payload: SearchEbookPayload): Promise { const { requestId, audiobook, preferredFormat: payloadFormat, jobId } = payload; @@ -43,49 +47,58 @@ export async function processSearchEbook(payload: SearchEbookPayload): Promise 0 + ? `No ebook found on ${enabledSources.join(' or ')}. Will retry automatically.` + : 'No ebook sources enabled. Enable Anna\'s Archive or Indexer Search in settings.'; + logger.warn(`No ebook found for request ${requestId}, marking as awaiting_search`); await prisma.request.update({ where: { id: requestId }, data: { status: 'awaiting_search', - errorMessage: 'No ebook found on Anna\'s Archive. Will retry automatically.', + errorMessage: message, lastSearchAt: new Date(), updatedAt: new Date(), }, @@ -98,107 +111,18 @@ export async function processSearchEbook(payload: SearchEbookPayload): Promise { + const configService = getConfigService(); + const baseUrl = await configService.get('ebook_sidecar_base_url') || 'https://annas-archive.li'; + const flaresolverrUrl = await configService.get('ebook_sidecar_flaresolverr_url') || undefined; + + if (flaresolverrUrl) { + logger.info(`Using FlareSolverr at ${flaresolverrUrl}`); + } + + let md5: string | null = null; + let searchMethod: 'asin' | 'title' = 'title'; + + // Try ASIN search first (exact match - best) + if (audiobook.asin) { + logger.info(`Searching Anna's Archive by ASIN: ${audiobook.asin} (format: ${preferredFormat})...`); + md5 = await searchByAsin(audiobook.asin, preferredFormat, baseUrl, logger, flaresolverrUrl); + + if (md5) { + logger.info(`Found via ASIN: ${md5}`); + searchMethod = 'asin'; + } else { + logger.info(`No ASIN results, trying title + author...`); + } + } + + // Fallback to title + author search + if (!md5) { + logger.info(`Searching Anna's Archive by title + author: "${audiobook.title}" by ${audiobook.author}...`); + md5 = await searchByTitle(audiobook.title, audiobook.author, preferredFormat, baseUrl, logger, flaresolverrUrl); + + if (md5) { + logger.info(`Found via title search: ${md5}`); + searchMethod = 'title'; + } + } + + if (!md5) { + return null; + } + + // Get slow download links + const slowLinks = await getSlowDownloadLinks(md5, baseUrl, logger, flaresolverrUrl); + + if (slowLinks.length === 0) { + logger.warn(`Found MD5 ${md5} but no download links available`); + return null; + } + + logger.info(`Found ${slowLinks.length} download link(s) for MD5 ${md5}`); + + return { + md5, + title: audiobook.title, + author: audiobook.author, + format: preferredFormat, + downloadUrls: slowLinks, + source: 'annas_archive', + score: searchMethod === 'asin' ? 100 : 80, + }; +} + +/** + * Search indexers for ebook torrents/NZBs + */ +async function searchIndexers( + requestId: string, + audiobook: { title: string; author: string }, + preferredFormat: string, + logger: RMABLogger +): Promise { + const configService = getConfigService(); + + // Get enabled indexers from configuration + const indexersConfigStr = await configService.get('prowlarr_indexers'); + + if (!indexersConfigStr) { + logger.warn('No indexers configured'); + return null; + } + + const indexersConfig = JSON.parse(indexersConfigStr); + + if (indexersConfig.length === 0) { + logger.warn('No indexers enabled'); + return null; + } + + // Build indexer priorities map (indexerId -> priority 1-25, default 10) + const indexerPriorities = new Map( + indexersConfig.map((indexer: any) => [indexer.id, indexer.priority ?? 10]) + ); + + // Get flag configurations + const flagConfigStr = await configService.get('indexer_flag_config'); + const flagConfigs = flagConfigStr ? JSON.parse(flagConfigStr) : []; + + // Group indexers by their EBOOK category configuration + const groups = groupIndexersByCategories(indexersConfig, 'ebook'); + + logger.info(`Searching ${indexersConfig.length} enabled indexers in ${groups.length} group${groups.length > 1 ? 's' : ''}`); + + // Log each group for transparency + groups.forEach((group, index) => { + logger.info(`Group ${index + 1}: ${getGroupDescription(group)}`); + }); + + // Get Prowlarr service + const prowlarr = await getProwlarrService(); + + // Build search query (title only - cast wide net, let ranking filter) + const searchQuery = audiobook.title; + + logger.info(`Searching for: "${searchQuery}"`); + + // Search Prowlarr for each group and combine results + const allResults = []; + + for (let i = 0; i < groups.length; i++) { + const group = groups[i]; + logger.info(`Searching group ${i + 1}/${groups.length}: ${getGroupDescription(group)}`); + + try { + const groupResults = await prowlarr.search(searchQuery, { + categories: group.categories, + indexerIds: group.indexerIds, + minSeeders: 0, // Ebooks may have fewer seeders + maxResults: 100, + }); + + logger.info(`Group ${i + 1} returned ${groupResults.length} results`); + allResults.push(...groupResults); + } catch (error) { + logger.error(`Group ${i + 1} search failed: ${error instanceof Error ? error.message : 'Unknown error'}`); + // Continue with other groups even if one fails + } + } + + logger.info(`Found ${allResults.length} total results from ${groups.length} group${groups.length > 1 ? 's' : ''}`); + + if (allResults.length === 0) { + return null; + } + + // Log filter info (ebooks > 20MB will be filtered) + const preFilterCount = allResults.length; + const aboveThreshold = allResults.filter(r => (r.size / (1024 * 1024)) > 20); + if (aboveThreshold.length > 0) { + logger.info(`Will filter ${aboveThreshold.length} results > 20 MB (too large for ebooks)`); + } + + // Rank results with ebook-specific scoring + // This filters out > 20MB and uses inverted size scoring + const rankedResults = rankEbookTorrents(allResults, { + title: audiobook.title, + author: audiobook.author, + preferredFormat, + }, { + indexerPriorities, + flagConfigs, + requireAuthor: true, // Automatic mode - prevent wrong authors + }); + + // Log filter results + const postFilterCount = rankedResults.length; + if (postFilterCount < preFilterCount) { + logger.info(`Filtered out ${preFilterCount - postFilterCount} results > 20 MB`); + } + + // Dual threshold filtering (same as audiobooks) + const filteredResults = rankedResults.filter(result => + result.score >= 50 && result.finalScore >= 50 + ); + + const disqualifiedByNegativeBonus = rankedResults.filter(result => + result.score >= 50 && result.finalScore < 50 + ).length; + + logger.info(`Ranked ${rankedResults.length} results, ${filteredResults.length} above threshold (50/100 base + final)`); + if (disqualifiedByNegativeBonus > 0) { + logger.info(`${disqualifiedByNegativeBonus} ebooks disqualified by negative flag bonuses`); + } + + if (filteredResults.length === 0) { + logger.warn(`No quality matches found (all below 50/100)`); + return null; + } + + // Select best result + const bestResult = filteredResults[0]; + + // Log top 3 results with detailed breakdown + const top3 = filteredResults.slice(0, 3); + logger.info(`==================== EBOOK RANKING DEBUG ====================`); + logger.info(`Requested Title: "${audiobook.title}"`); + logger.info(`Requested Author: "${audiobook.author}"`); + logger.info(`Preferred Format: ${preferredFormat}`); + logger.info(`Top ${top3.length} results (out of ${filteredResults.length} above threshold):`); + logger.info(`--------------------------------------------------------------`); + for (let i = 0; i < top3.length; i++) { + const result = top3[i]; + const sizeMB = (result.size / (1024 * 1024)).toFixed(1); + + logger.info(`${i + 1}. "${result.title}"`); + logger.info(` Indexer: ${result.indexer}${result.indexerId ? ` (ID: ${result.indexerId})` : ''}`); + logger.info(``); + logger.info(` Base Score: ${result.score.toFixed(1)}/100`); + logger.info(` - Title/Author Match: ${result.breakdown.matchScore.toFixed(1)}/60`); + logger.info(` - Format Match: ${result.breakdown.formatScore.toFixed(1)}/10`); + logger.info(` - Size Quality: ${result.breakdown.sizeScore.toFixed(1)}/15 (${sizeMB} MB)`); + logger.info(` - Seeder Count: ${result.breakdown.seederScore.toFixed(1)}/15 (${result.seeders !== undefined ? result.seeders + ' seeders' : 'N/A for Usenet'})`); + logger.info(``); + logger.info(` Bonus Points: +${result.bonusPoints.toFixed(1)}`); + if (result.bonusModifiers.length > 0) { + for (const mod of result.bonusModifiers) { + logger.info(` - ${mod.reason}: +${mod.points.toFixed(1)}`); + } + } + logger.info(``); + logger.info(` Final Score: ${result.finalScore.toFixed(1)}`); + if (result.breakdown.notes.length > 0) { + logger.info(` Notes: ${result.breakdown.notes.join(', ')}`); + } + if (i < top3.length - 1) { + logger.info(`--------------------------------------------------------------`); + } + } + logger.info(`==============================================================`); + logger.info(`Selected best result: ${bestResult.title} (final score: ${bestResult.finalScore.toFixed(1)})`); + + return bestResult; +} + +/** + * Handle Anna's Archive download (direct HTTP) + */ +async function handleAnnasArchiveDownload( + requestId: string, + audiobook: { title: string; author: string }, + result: EbookSearchResult, + preferredFormat: string, + logger: RMABLogger +): Promise { + logger.info(`==================== EBOOK SEARCH RESULT ====================`); + logger.info(`Source: Anna's Archive`); + logger.info(`Title: "${audiobook.title}"`); + logger.info(`Author: "${audiobook.author}"`); + logger.info(`Format: ${preferredFormat}`); + logger.info(`MD5: ${result.md5}`); + logger.info(`Download Links: ${result.downloadUrls.length}`); + logger.info(`Score: ${result.score}/100`); + logger.info(`==============================================================`); + + // Create download history record + const downloadHistory = await prisma.downloadHistory.create({ + data: { + requestId, + indexerName: "Anna's Archive", + torrentName: `${audiobook.title} - ${audiobook.author}.${preferredFormat}`, + torrentSizeBytes: null, // Unknown until download starts + qualityScore: result.score, + selected: true, + downloadClient: 'direct', // Direct HTTP download + downloadStatus: 'queued', + }, + }); + + // Trigger direct download job + const jobQueue = getJobQueueService(); + await jobQueue.addStartDirectDownloadJob( + requestId, + downloadHistory.id, + result.downloadUrls[0], // Start with first link + `${audiobook.title} - ${audiobook.author}.${preferredFormat}`, + undefined // Size unknown + ); + + // Store all download URLs for retry purposes + await prisma.downloadHistory.update({ + where: { id: downloadHistory.id }, + data: { + torrentUrl: JSON.stringify(result.downloadUrls), + }, + }); + + return { + success: true, + message: `Found ebook via Anna's Archive, starting download`, + requestId, + source: 'annas_archive', + searchResult: { + md5: result.md5, + format: result.format, + score: result.score, + downloadLinksCount: result.downloadUrls.length, + }, + }; +} + +/** + * Handle indexer download (torrent/NZB via download-torrent processor) + */ +async function handleIndexerDownload( + requestId: string, + audiobook: { title: string; author: string }, + result: RankedEbookTorrent, + preferredFormat: string, + logger: RMABLogger +): Promise { + logger.info(`==================== EBOOK SEARCH RESULT ====================`); + logger.info(`Source: Indexer (${result.indexer})`); + logger.info(`Title: "${audiobook.title}"`); + logger.info(`Author: "${audiobook.author}"`); + logger.info(`Torrent: "${result.title}"`); + logger.info(`Size: ${(result.size / (1024 * 1024)).toFixed(1)} MB`); + logger.info(`Seeders: ${result.seeders !== undefined ? result.seeders : 'N/A'}`); + logger.info(`Final Score: ${result.finalScore.toFixed(1)}/100`); + logger.info(`==============================================================`); + + // Trigger download job using the SAME processor as audiobooks + // The download-torrent processor is already generic and handles both torrent and NZB + const jobQueue = getJobQueueService(); + + // Fetch the request to get the parent audiobook ID for the download job + const request = await prisma.request.findUnique({ + where: { id: requestId }, + include: { parentRequest: true }, + }); + + if (!request) { + throw new Error(`Request ${requestId} not found`); + } + + // Use the parent audiobook's ID for the download job, or fall back to request ID + const audiobookId = request.parentRequest?.id || request.id; + + await jobQueue.addDownloadJob(requestId, { + id: audiobookId, + title: audiobook.title, + author: audiobook.author, + }, result); + + return { + success: true, + message: `Found ebook via indexer search, starting download`, + requestId, + source: 'prowlarr', + resultsCount: 1, + selectedTorrent: { + title: result.title, + score: result.score, + finalScore: result.finalScore, + seeders: result.seeders || 0, + size: result.size, + }, + }; +} diff --git a/src/lib/utils/file-organizer.ts b/src/lib/utils/file-organizer.ts index d32c102..c1649ac 100644 --- a/src/lib/utils/file-organizer.ts +++ b/src/lib/utils/file-organizer.ts @@ -645,12 +645,14 @@ export class FileOrganizer { /** * Organize ebook file into proper directory structure * Simplified compared to audiobooks - no metadata tagging, cover art, or chapter merging + * Supports both direct file paths (Anna's Archive) and directories (indexer downloads) */ async organizeEbook( downloadPath: string, - metadata: { title: string; author: string; asin?: string; year?: number }, + metadata: { title: string; author: string; narrator?: string; asin?: string; year?: number; series?: string; seriesPart?: string }, template: string, - loggerConfig?: LoggerConfig + loggerConfig?: LoggerConfig, + isIndexerDownload: boolean = false ): Promise { const logger = loggerConfig ? RMABLogger.forJob(loggerConfig.jobId, loggerConfig.context) : null; @@ -663,19 +665,21 @@ export class FileOrganizer { try { await logger?.info(`Organizing ebook: ${downloadPath}`); - // Get file info - const stats = await fs.stat(downloadPath); - if (!stats.isFile()) { - throw new Error('Ebook download path must be a file'); + const ebookFormats = ['epub', 'pdf', 'mobi', 'azw', 'azw3', 'fb2', 'cbz', 'cbr']; + + // Find ebook file (handle both file and directory cases) + const { ebookFile, baseSourcePath, isFile } = await this.findEbookFile(downloadPath, ebookFormats); + + if (!ebookFile) { + throw new Error(`No ebook files found in download (looking for: ${ebookFormats.join(', ')})`); } + // Build full path to source file + const sourceFilePath = isFile ? downloadPath : path.join(baseSourcePath, ebookFile); + await logger?.info(`Found ebook file: ${ebookFile}`); + // Detect format from extension - const ext = path.extname(downloadPath).toLowerCase().slice(1); - const ebookFormats = ['epub', 'pdf', 'mobi', 'azw', 'azw3', 'fb2', 'cbz', 'cbr']; - if (!ebookFormats.includes(ext)) { - throw new Error(`Unsupported ebook format: ${ext}`); - } - + const ext = path.extname(ebookFile).toLowerCase().slice(1); result.format = ext; await logger?.info(`Detected ebook format: ${ext}`); @@ -685,9 +689,11 @@ export class FileOrganizer { template, metadata.author, metadata.title, - undefined, // narrator + metadata.narrator, metadata.asin, - metadata.year + metadata.year, + metadata.series, + metadata.seriesPart ); await logger?.info(`Target directory: ${targetDir}`); @@ -696,7 +702,7 @@ export class FileOrganizer { await fs.mkdir(targetDir, { recursive: true }); // Build target filename (sanitize source filename) - const sourceFilename = path.basename(downloadPath); + const sourceFilename = path.basename(ebookFile); const targetFilename = this.sanitizePath(sourceFilename); const targetPath = path.join(targetDir, targetFilename); @@ -711,18 +717,22 @@ export class FileOrganizer { // File doesn't exist, continue with copy } - // Copy ebook file (don't delete original in case of direct download retry) - await fs.copyFile(downloadPath, targetPath); + // Copy ebook file (do NOT delete original - may need for seeding or retry) + await fs.copyFile(sourceFilePath, targetPath); await fs.chmod(targetPath, 0o644); await logger?.info(`Copied ebook: ${targetFilename}`); - // Clean up source file (for direct HTTP downloads, we don't need to keep the original) - try { - await fs.unlink(downloadPath); - await logger?.info(`Cleaned up source file: ${sourceFilename}`); - } catch { - // Ignore cleanup errors + // Clean up source file ONLY for direct HTTP downloads (not indexer downloads which need to seed) + if (!isIndexerDownload && isFile) { + try { + await fs.unlink(sourceFilePath); + await logger?.info(`Cleaned up source file: ${sourceFilename}`); + } catch { + // Ignore cleanup errors + } + } else if (isIndexerDownload) { + await logger?.info(`Keeping source file for seeding: ${sourceFilename}`); } result.success = true; @@ -737,6 +747,60 @@ export class FileOrganizer { return result; } } + + /** + * Find ebook file in download path (handles both single file and directory) + */ + private async findEbookFile( + downloadPath: string, + ebookFormats: string[] + ): Promise<{ ebookFile: string | null; baseSourcePath: string; isFile: boolean }> { + let ebookFile: string | null = null; + let isFile = false; + + try { + const stats = await fs.stat(downloadPath); + + if (stats.isFile()) { + // Handle single file case + isFile = true; + const ext = path.extname(downloadPath).toLowerCase().slice(1); + + if (ebookFormats.includes(ext)) { + ebookFile = path.basename(downloadPath); + } + } else { + // Handle directory case - find ebook files inside + const files = await this.walkDirectory(downloadPath); + + // Filter to ebook files and sort by preference (epub > pdf > others) + const ebookFiles = files.filter(file => { + const ext = path.extname(file).toLowerCase().slice(1); + return ebookFormats.includes(ext); + }); + + if (ebookFiles.length > 0) { + // Sort by format preference + ebookFiles.sort((a, b) => { + const extA = path.extname(a).toLowerCase().slice(1); + const extB = path.extname(b).toLowerCase().slice(1); + const priorityOrder = ['epub', 'pdf', 'mobi', 'azw3', 'azw', 'fb2', 'cbz', 'cbr']; + return priorityOrder.indexOf(extA) - priorityOrder.indexOf(extB); + }); + + ebookFile = ebookFiles[0]; + } + } + } catch { + // Path doesn't exist or inaccessible + } + + return { + ebookFile, + baseSourcePath: downloadPath, + isFile, + }; + } } /** diff --git a/src/lib/utils/ranking-algorithm.ts b/src/lib/utils/ranking-algorithm.ts index 190c844..53f6b65 100644 --- a/src/lib/utils/ranking-algorithm.ts +++ b/src/lib/utils/ranking-algorithm.ts @@ -42,6 +42,18 @@ export interface RankTorrentsOptions { requireAuthor?: boolean; // Enforce author presence check (default: true) } +export interface EbookTorrentRequest { + title: string; + author: string; + preferredFormat: string; // User's preferred format (epub, pdf, etc.) +} + +export interface RankEbookTorrentsOptions { + indexerPriorities?: Map; // indexerId -> priority (1-25) + flagConfigs?: IndexerFlagConfig[]; // Flag bonus configurations + requireAuthor?: boolean; // Enforce author presence check (default: true) +} + export interface BonusModifier { type: 'indexer_priority' | 'indexer_flag' | 'custom'; value: number; // Multiplier (e.g., 0.4 for 40%) @@ -67,6 +79,24 @@ export interface RankedTorrent extends TorrentResult { breakdown: ScoreBreakdown; } +export interface EbookScoreBreakdown { + formatScore: number; // 0-10 points (match preferred = 10, else 0) + sizeScore: number; // 0-15 points (inverted - smaller is better) + seederScore: number; // 0-15 points (same as audiobooks) + matchScore: number; // 0-60 points (same as audiobooks) + totalScore: number; + notes: string[]; +} + +export interface RankedEbookTorrent extends TorrentResult { + score: number; // Base score (0-100) + bonusModifiers: BonusModifier[]; + bonusPoints: number; // Sum of all bonus points + finalScore: number; // score + bonusPoints + rank: number; + breakdown: EbookScoreBreakdown; +} + export class RankingAlgorithm { /** * Rank all torrents and return sorted by finalScore (best first) @@ -622,6 +652,257 @@ export class RankingAlgorithm { return notes; } + + // ========================================================================= + // EBOOK TORRENT RANKING (for indexer results) + // Reuses scoreMatch() and scoreSeeders() from audiobook ranking + // Uses ebook-specific format and size scoring + // ========================================================================= + + /** + * Rank ebook torrents from indexers + * Reuses title/author matching and seeder scoring from audiobook ranking + * Uses ebook-specific format scoring (10 pts for match, 0 otherwise) + * Uses inverted size scoring (smaller = better, > 20MB filtered) + * + * @param torrents - Array of torrent results from Prowlarr + * @param ebook - Ebook request details (title, author, preferredFormat) + * @param options - Optional configuration for ranking behavior + */ + rankEbookTorrents( + torrents: TorrentResult[], + ebook: EbookTorrentRequest, + options: RankEbookTorrentsOptions = {} + ): RankedEbookTorrent[] { + const { + indexerPriorities, + flagConfigs, + requireAuthor = true // Safe default: require author in automatic mode + } = options; + + // Filter out files > 20 MB (too large for ebooks) + const filteredTorrents = torrents.filter((torrent) => { + const sizeMB = torrent.size / (1024 * 1024); + return sizeMB <= 20; + }); + + const ranked = filteredTorrents.map((torrent) => { + // Calculate base scores (0-100) + // Reuse scoreMatch and scoreSeeders from audiobook ranking + const formatScore = this.scoreEbookFormat(torrent, ebook.preferredFormat); + const sizeScore = this.scoreEbookSize(torrent); + const seederScore = this.scoreSeeders(torrent.seeders); + const matchScore = this.scoreMatch(torrent, { + title: ebook.title, + author: ebook.author, + }, requireAuthor); + + const baseScore = formatScore + sizeScore + seederScore + matchScore; + + // Calculate bonus modifiers (same as audiobooks) + const bonusModifiers: BonusModifier[] = []; + + // Indexer priority bonus (default: 10/25 = 40%) + if (torrent.indexerId !== undefined) { + const priority = indexerPriorities?.get(torrent.indexerId) ?? 10; + const modifier = priority / 25; // Convert 1-25 to 0.04-1.0 (4%-100%) + const points = baseScore * modifier; + + bonusModifiers.push({ + type: 'indexer_priority', + value: modifier, + points: points, + reason: `Indexer priority ${priority}/25 (${Math.round(modifier * 100)}%)`, + }); + } + + // Flag bonuses/penalties (same as audiobooks) + if (torrent.flags && torrent.flags.length > 0 && flagConfigs && flagConfigs.length > 0) { + torrent.flags.forEach(torrentFlag => { + const matchingConfig = flagConfigs.find(cfg => + cfg.name.trim().toLowerCase() === torrentFlag.trim().toLowerCase() + ); + + if (matchingConfig) { + const modifier = matchingConfig.modifier / 100; + const points = baseScore * modifier; + + bonusModifiers.push({ + type: 'indexer_flag', + value: modifier, + points: points, + reason: `Flag "${torrentFlag}" (${matchingConfig.modifier > 0 ? '+' : ''}${matchingConfig.modifier}%)`, + }); + } + }); + } + + // Sum all bonus points + const bonusPoints = bonusModifiers.reduce((sum, mod) => sum + mod.points, 0); + + // Calculate final score + const finalScore = baseScore + bonusPoints; + + return { + ...torrent, + score: baseScore, + bonusModifiers, + bonusPoints, + finalScore, + rank: 0, // Will be assigned after sorting + breakdown: { + formatScore, + sizeScore, + seederScore, + matchScore, + totalScore: baseScore, + notes: this.generateEbookNotes(torrent, { + formatScore, + sizeScore, + seederScore, + matchScore, + totalScore: baseScore, + notes: [], + }, ebook.preferredFormat), + }, + }; + }); + + // Sort by finalScore descending (best first), then by publishDate descending (newest first) + ranked.sort((a, b) => { + if (b.finalScore !== a.finalScore) { + return b.finalScore - a.finalScore; + } + return b.publishDate.getTime() - a.publishDate.getTime(); + }); + + // Assign ranks + ranked.forEach((r, index) => { + r.rank = index + 1; + }); + + return ranked; + } + + /** + * Score ebook format (10 points max) + * Full points for matching preferred format, 0 otherwise + */ + private scoreEbookFormat(torrent: TorrentResult, preferredFormat: string): number { + const detectedFormat = this.detectEbookFormat(torrent); + const preferred = preferredFormat.toLowerCase(); + + // Exact match = full points, otherwise 0 + if (detectedFormat === preferred) { + return 10; + } + + return 0; + } + + /** + * Score ebook file size (15 points max, inverted - smaller is better) + * < 5 MB = 15 pts (full) + * 5-15 MB = 10 pts + * 15-20 MB = 5 pts + * > 20 MB = filtered out (not scored) + */ + private scoreEbookSize(torrent: TorrentResult): number { + const sizeMB = torrent.size / (1024 * 1024); + + if (sizeMB < 5) { + return 15; // Optimal size for ebooks + } else if (sizeMB <= 15) { + return 10; // Acceptable, may have images + } else if (sizeMB <= 20) { + return 5; // Large but within limit + } + + // > 20 MB should have been filtered, but return 0 as safety + return 0; + } + + /** + * Detect ebook format from torrent title + */ + private detectEbookFormat(torrent: TorrentResult): string { + const title = torrent.title.toLowerCase(); + + // Check for common ebook format extensions/keywords + if (title.includes('.epub') || title.includes(' epub')) return 'epub'; + if (title.includes('.pdf') || title.includes(' pdf')) return 'pdf'; + if (title.includes('.mobi') || title.includes(' mobi')) return 'mobi'; + if (title.includes('.azw3') || title.includes(' azw3')) return 'azw3'; + if (title.includes('.azw') || title.includes(' azw')) return 'azw'; + if (title.includes('.fb2') || title.includes(' fb2')) return 'fb2'; + if (title.includes('.cbz') || title.includes(' cbz')) return 'cbz'; + if (title.includes('.cbr') || title.includes(' cbr')) return 'cbr'; + + // Default to unknown + return 'unknown'; + } + + /** + * Generate human-readable notes for ebook scoring + */ + private generateEbookNotes( + torrent: TorrentResult, + breakdown: EbookScoreBreakdown, + preferredFormat: string + ): string[] { + const notes: string[] = []; + + // Format notes + const detectedFormat = this.detectEbookFormat(torrent); + if (breakdown.formatScore === 10) { + notes.push(`✓ Preferred format (${detectedFormat.toUpperCase()})`); + } else if (detectedFormat !== 'unknown') { + notes.push(`Different format (${detectedFormat.toUpperCase()}, wanted ${preferredFormat.toUpperCase()})`); + } else { + notes.push('⚠️ Unknown format'); + } + + // Size notes + const sizeMB = torrent.size / (1024 * 1024); + if (sizeMB < 5) { + notes.push('✓ Optimal file size'); + } else if (sizeMB <= 15) { + notes.push('Good file size (may have images)'); + } else if (sizeMB <= 20) { + notes.push('⚠️ Large file size'); + } + + // Seeder notes (same logic as audiobooks) + if (torrent.seeders !== undefined && torrent.seeders !== null && !isNaN(torrent.seeders)) { + if (torrent.seeders === 0) { + notes.push('⚠️ No seeders available'); + } else if (torrent.seeders < 5) { + notes.push(`Low seeders (${torrent.seeders})`); + } else if (torrent.seeders >= 50) { + notes.push(`Excellent availability (${torrent.seeders} seeders)`); + } + } + + // Match notes (same thresholds as audiobooks) + if (breakdown.matchScore < 24) { + notes.push('⚠️ Poor title/author match'); + } else if (breakdown.matchScore < 42) { + notes.push('⚠️ Weak title/author match'); + } else if (breakdown.matchScore >= 54) { + notes.push('✓ Excellent title/author match'); + } + + // Overall quality assessment + if (breakdown.totalScore >= 75) { + notes.push('✓ Excellent choice'); + } else if (breakdown.totalScore >= 55) { + notes.push('✓ Good choice'); + } else if (breakdown.totalScore < 35) { + notes.push('⚠️ Consider reviewing this choice'); + } + + return notes; + } } // ========================================================================= @@ -844,3 +1125,26 @@ export function rankTorrents( qualityScore: Math.round(r.score), })); } + +/** + * Helper function to rank ebook torrents using the singleton instance + * + * @param torrents - Array of torrent results from Prowlarr + * @param ebook - Ebook request details (title, author, preferredFormat) + * @param options - Optional ranking configuration + * @returns Ranked ebook torrents with quality scores + */ +export function rankEbookTorrents( + torrents: TorrentResult[], + ebook: EbookTorrentRequest, + options?: RankEbookTorrentsOptions +): (RankedEbookTorrent & { qualityScore: number })[] { + const algorithm = getRankingAlgorithm(); + const ranked = algorithm.rankEbookTorrents(torrents, ebook, options || {}); + + // Add qualityScore field for UI compatibility (rounded score) + return ranked.map((r) => ({ + ...r, + qualityScore: Math.round(r.score), + })); +}