diff --git a/prisma/migrations/20260303000000_add_works_table/migration.sql b/prisma/migrations/20260303000000_add_works_table/migration.sql new file mode 100644 index 0000000..83aa861 --- /dev/null +++ b/prisma/migrations/20260303000000_add_works_table/migration.sql @@ -0,0 +1,42 @@ +-- CreateTable +CREATE TABLE "works" ( + "id" TEXT NOT NULL, + "title" TEXT NOT NULL, + "author" TEXT NOT NULL, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updated_at" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "works_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "work_asins" ( + "id" TEXT NOT NULL, + "work_id" TEXT NOT NULL, + "asin" TEXT NOT NULL, + "narrator" TEXT, + "duration_minutes" INTEGER, + "is_canonical" BOOLEAN NOT NULL DEFAULT false, + "source" TEXT NOT NULL, + "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + + CONSTRAINT "work_asins_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "works_title_idx" ON "works"("title"); + +-- CreateIndex +CREATE INDEX "works_author_idx" ON "works"("author"); + +-- CreateIndex +CREATE UNIQUE INDEX "work_asins_asin_key" ON "work_asins"("asin"); + +-- CreateIndex +CREATE INDEX "work_asins_work_id_idx" ON "work_asins"("work_id"); + +-- CreateIndex +CREATE INDEX "work_asins_asin_idx" ON "work_asins"("asin"); + +-- AddForeignKey +ALTER TABLE "work_asins" ADD CONSTRAINT "work_asins_work_id_fkey" FOREIGN KEY ("work_id") REFERENCES "works"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 6a39100..bcb900a 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -531,3 +531,43 @@ model GoodreadsBookMapping { @@index([audibleAsin]) @@map("goodreads_book_mappings") } + +// ============================================================================ +// WORKS TABLE +// Cross-ASIN audiobook identity mapping — links multiple Audible ASINs +// to a single logical work for library matching across editions. +// Documentation: documentation/integrations/audible.md +// ============================================================================ + +model Work { + id String @id @default(uuid()) + title String + author String + createdAt DateTime @default(now()) @map("created_at") + updatedAt DateTime @updatedAt @map("updated_at") + + // Relations + asins WorkAsin[] + + @@index([title]) + @@index([author]) + @@map("works") +} + +model WorkAsin { + id String @id @default(uuid()) + workId String @map("work_id") + asin String @unique + narrator String? + durationMinutes Int? @map("duration_minutes") + isCanonical Boolean @default(false) @map("is_canonical") + source String // 'dedup_auto' | 'admin_manual' + createdAt DateTime @default(now()) @map("created_at") + + // Relations + work Work @relation(fields: [workId], references: [id], onDelete: Cascade) + + @@index([workId]) + @@index([asin]) + @@map("work_asins") +} diff --git a/src/app/api/audiobooks/search/route.ts b/src/app/api/audiobooks/search/route.ts index 4093fcb..0641aca 100644 --- a/src/app/api/audiobooks/search/route.ts +++ b/src/app/api/audiobooks/search/route.ts @@ -6,6 +6,8 @@ import { NextRequest, NextResponse } from 'next/server'; import { getAudibleService } from '@/lib/integrations/audible.service'; import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher'; +import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks'; +import { persistDedupGroups } from '@/lib/services/works.service'; import { getCurrentUser } from '@/lib/middleware/auth'; import { RMABLogger } from '@/lib/utils/logger'; @@ -38,14 +40,22 @@ export async function GET(request: NextRequest) { const currentUser = getCurrentUser(request); const userId = currentUser?.sub || undefined; + // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries + const { books: dedupedResults, groups } = deduplicateAndCollectGroups(results.results); + + // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching + if (groups.length > 0) { + persistDedupGroups(groups).catch(() => {}); + } + // Enrich search results with availability and request status information - const enrichedResults = await enrichAudiobooksWithMatches(results.results, userId); + const enrichedResults = await enrichAudiobooksWithMatches(dedupedResults, userId); return NextResponse.json({ success: true, query: results.query, results: enrichedResults, - totalResults: results.totalResults, + totalResults: enrichedResults.length, page: results.page, hasMore: results.hasMore, }); diff --git a/src/app/api/authors/[asin]/books/route.ts b/src/app/api/authors/[asin]/books/route.ts index 0535d73..414345a 100644 --- a/src/app/api/authors/[asin]/books/route.ts +++ b/src/app/api/authors/[asin]/books/route.ts @@ -6,6 +6,8 @@ import { NextRequest, NextResponse } from 'next/server'; import { getAudibleService } from '@/lib/integrations/audible.service'; import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher'; +import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks'; +import { persistDedupGroups } from '@/lib/services/works.service'; import { getCurrentUser } from '@/lib/middleware/auth'; import { RMABLogger } from '@/lib/utils/logger'; @@ -53,9 +55,17 @@ export async function GET( const audibleService = getAudibleService(); const result = await audibleService.searchByAuthorAsin(authorName.trim(), asin, page); + // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries + const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(result.books); + + // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching + if (groups.length > 0) { + persistDedupGroups(groups).catch(() => {}); + } + // Enrich with library availability and request status const userId = currentUser.sub || undefined; - const enrichedBooks = await enrichAudiobooksWithMatches(result.books, userId); + const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId); logger.info(`Author books complete: "${authorName}" → ${enrichedBooks.length} books (page ${page})`); @@ -64,7 +74,7 @@ export async function GET( books: enrichedBooks, authorName: authorName.trim(), authorAsin: asin, - totalBooks: result.totalResults || enrichedBooks.length, + totalBooks: enrichedBooks.length, hasMore: result.hasMore, page: result.page, }); diff --git a/src/app/api/series/[asin]/route.ts b/src/app/api/series/[asin]/route.ts index 43271fb..3fe13ab 100644 --- a/src/app/api/series/[asin]/route.ts +++ b/src/app/api/series/[asin]/route.ts @@ -8,6 +8,8 @@ import { getCurrentUser } from '@/lib/middleware/auth'; import { RMABLogger } from '@/lib/utils/logger'; import { scrapeSeriesPage } from '@/lib/integrations/audible-series'; import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher'; +import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks'; +import { persistDedupGroups } from '@/lib/services/works.service'; const logger = RMABLogger.create('API.Series.Detail'); @@ -49,9 +51,17 @@ export async function GET( ); } + // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries + const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(detail.books); + + // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching + if (groups.length > 0) { + persistDedupGroups(groups).catch(() => {}); + } + // Enrich books with library availability and request status const userId = currentUser.sub || undefined; - const enrichedBooks = await enrichAudiobooksWithMatches(detail.books, userId); + const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId); logger.info(`Series detail complete: "${detail.title}" (${enrichedBooks.length} books, page ${page})`); diff --git a/src/lib/integrations/audible-series.ts b/src/lib/integrations/audible-series.ts index f5df693..7cf976b 100644 --- a/src/lib/integrations/audible-series.ts +++ b/src/lib/integrations/audible-series.ts @@ -14,8 +14,10 @@ import { getLanguageForRegion, buildContainsSelector, stripPrefixes, + type LanguageConfig, } from '../constants/language-config'; import { RMABLogger } from '../utils/logger'; +import { parseRuntime } from '../utils/parse-runtime'; import { randomDelay } from '../utils/scrape-resilience'; const logger = RMABLogger.create('Audible.Series'); @@ -311,7 +313,7 @@ export async function scrapeSeriesPage(asin: string, page: number = 1): Promise< undefined; // Parse all books from the series page - const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes); + const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes, langConfig); // Use actual book count if we got more from scraping const bookCount = Math.max(summary.bookCount, books.length); @@ -403,7 +405,8 @@ function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCoun function parseSeriesBooks( $: cheerio.CheerioAPI, authorPrefixes: string[], - narratorPrefixes: string[] + narratorPrefixes: string[], + langConfig: LanguageConfig ): AudibleAudiobook[] { const books: AudibleAudiobook[] = []; const seenAsins = new Set(); @@ -453,6 +456,11 @@ function parseSeriesBooks( const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null; const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined; + // Duration + const runtimeText = $el.find('.runtimeLabel').text().trim() || + $el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim(); + const durationMinutes = parseRuntime(runtimeText, langConfig); + books.push({ asin: bookAsin, title, @@ -461,6 +469,7 @@ function parseSeriesBooks( narrator: stripPrefixes(narratorText, narratorPrefixes), coverArtUrl, rating, + durationMinutes, }); }); diff --git a/src/lib/integrations/audible.service.ts b/src/lib/integrations/audible.service.ts index 229c421..de32076 100644 --- a/src/lib/integrations/audible.service.ts +++ b/src/lib/integrations/audible.service.ts @@ -23,6 +23,7 @@ import { AdaptivePacer, FetchResultMeta, } from '../utils/scrape-resilience'; +import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime'; // Module-level logger const logger = RMABLogger.create('Audible'); @@ -1134,33 +1135,11 @@ export class AudibleService { } /** - * Parse runtime text to minutes using language-specific patterns + * Parse runtime text to minutes using language-specific patterns. + * Delegates to shared utility in src/lib/utils/parse-runtime.ts. */ private parseRuntime(runtimeText: string): number | undefined { - if (!runtimeText) return undefined; - - const langConfig = this.getLangConfig(); - let totalMinutes = 0; - - // Try each hour pattern until one matches - for (const pattern of langConfig.scraping.runtimeHourPatterns) { - const match = runtimeText.match(pattern); - if (match) { - totalMinutes += parseInt(match[1]) * 60; - break; - } - } - - // Try each minute pattern until one matches - for (const pattern of langConfig.scraping.runtimeMinutePatterns) { - const match = runtimeText.match(pattern); - if (match) { - totalMinutes += parseInt(match[1]); - break; - } - } - - return totalMinutes > 0 ? totalMinutes : undefined; + return parseRuntimeUtil(runtimeText, this.getLangConfig()); } /** diff --git a/src/lib/services/request-creator.service.ts b/src/lib/services/request-creator.service.ts index 864c233..c89eda7 100644 --- a/src/lib/services/request-creator.service.ts +++ b/src/lib/services/request-creator.service.ts @@ -12,6 +12,7 @@ import { getJobQueueService } from '@/lib/services/job-queue.service'; import { findPlexMatch } from '@/lib/utils/audiobook-matcher'; import { getAudibleService } from '@/lib/integrations/audible.service'; import { RMABLogger } from '@/lib/utils/logger'; +import { seedAsin } from '@/lib/services/works.service'; const logger = RMABLogger.create('RequestCreator'); @@ -147,6 +148,15 @@ export async function createRequestForUser( } } + // Seed works table for cross-ASIN matching (Layer 2: request-time seeding) + seedAsin( + audiobook.asin, + audiobookRecord.title, + audiobookRecord.author, + audiobookRecord.narrator || undefined, + undefined // duration not available at request time + ).catch(() => {}); + // Check if user already has an active request for this audiobook const existingRequest = await prisma.request.findFirst({ where: { diff --git a/src/lib/services/works.service.ts b/src/lib/services/works.service.ts new file mode 100644 index 0000000..45d989d --- /dev/null +++ b/src/lib/services/works.service.ts @@ -0,0 +1,248 @@ +/** + * Component: Works Service + * Documentation: documentation/integrations/audible.md + * + * Manages the works table — persistent cross-ASIN audiobook identity mapping. + * Layer 1: Auto-populated from dedup logic when users browse search/author/series pages. + * Layer 2: Seeded at request time to ensure requested ASINs are tracked. + */ + +import { prisma } from '@/lib/db'; +import { RMABLogger } from '@/lib/utils/logger'; +import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks'; + +const logger = RMABLogger.create('WorksService'); + +// --------------------------------------------------------------------------- +// Layer 1: Persist dedup groups (fire-and-forget from API routes) +// --------------------------------------------------------------------------- + +/** + * Persist dedup groups to the works table. For each group of 2+ ASINs that + * were identified as the same audiobook, create or update a Work record + * linking all ASINs together. + * + * Safe to call fire-and-forget — never throws. + */ +export async function persistDedupGroups(groups: DedupGroup[]): Promise { + try { + for (const group of groups) { + await persistSingleGroup(group); + } + } catch (error) { + logger.error('Failed to persist dedup groups', { + error: error instanceof Error ? error.message : String(error), + groupCount: groups.length, + }); + } +} + +/** + * Persist a single dedup group. Handles merging when ASINs span multiple + * existing works. + */ +async function persistSingleGroup(group: DedupGroup): Promise { + const { canonicalAsin, allAsins, title, author, narrator, durationMinutes } = group; + + // Find which of these ASINs already exist in work_asins + const existingEntries = await prisma.workAsin.findMany({ + where: { asin: { in: allAsins } }, + select: { asin: true, workId: true }, + }); + + // Collect unique work IDs that already contain any of our ASINs + const existingWorkIds = [...new Set(existingEntries.map(e => e.workId))]; + const existingAsinSet = new Set(existingEntries.map(e => e.asin)); + + if (existingWorkIds.length === 0) { + // No existing works — create a new one with all ASINs + const work = await prisma.work.create({ + data: { title, author }, + }); + + await Promise.all( + allAsins.map(asin => + prisma.workAsin.create({ + data: { + workId: work.id, + asin, + narrator: asin === canonicalAsin ? narrator : undefined, + durationMinutes: asin === canonicalAsin ? durationMinutes : undefined, + isCanonical: asin === canonicalAsin, + source: 'dedup_auto', + }, + }) + ) + ); + + logger.debug('Created new work', { workId: work.id, asinCount: allAsins.length }); + } else { + // Use the first existing work as the target + const targetWorkId = existingWorkIds[0]; + + // If multiple existing works, merge them into the target + if (existingWorkIds.length > 1) { + const mergeWorkIds = existingWorkIds.slice(1); + + // Move all ASINs from other works to the target + await prisma.workAsin.updateMany({ + where: { workId: { in: mergeWorkIds } }, + data: { workId: targetWorkId }, + }); + + // Delete the now-empty works + await prisma.work.deleteMany({ + where: { id: { in: mergeWorkIds } }, + }); + + logger.debug('Merged works', { + targetWorkId, + mergedWorkIds: mergeWorkIds, + }); + } + + // Add any new ASINs that don't already exist + const newAsins = allAsins.filter(a => !existingAsinSet.has(a)); + if (newAsins.length > 0) { + await Promise.all( + newAsins.map(asin => + prisma.workAsin.create({ + data: { + workId: targetWorkId, + asin, + narrator: asin === canonicalAsin ? narrator : undefined, + durationMinutes: asin === canonicalAsin ? durationMinutes : undefined, + isCanonical: asin === canonicalAsin, + source: 'dedup_auto', + }, + }) + ) + ); + + logger.debug('Added ASINs to existing work', { + workId: targetWorkId, + newAsinCount: newAsins.length, + }); + } + + // Update canonical status: ensure the canonical ASIN is marked + await prisma.workAsin.updateMany({ + where: { workId: targetWorkId, asin: canonicalAsin }, + data: { isCanonical: true }, + }); + } +} + +// --------------------------------------------------------------------------- +// Layer 2: Seed ASIN at request time +// --------------------------------------------------------------------------- + +/** + * Ensure an ASIN is tracked in the works table. Creates a single-ASIN work + * if the ASIN isn't already present. Called at request creation time. + * + * Safe to call fire-and-forget — never throws. + */ +export async function seedAsin( + asin: string, + title: string, + author: string, + narrator?: string, + durationMinutes?: number +): Promise { + try { + // Check if ASIN already tracked + const existing = await prisma.workAsin.findUnique({ + where: { asin }, + }); + if (existing) return; + + // Create a new single-ASIN work + const work = await prisma.work.create({ + data: { title, author }, + }); + + await prisma.workAsin.create({ + data: { + workId: work.id, + asin, + narrator, + durationMinutes, + isCanonical: true, + source: 'dedup_auto', + }, + }); + + logger.debug('Seeded ASIN', { workId: work.id, asin }); + } catch (error) { + logger.error('Failed to seed ASIN', { + error: error instanceof Error ? error.message : String(error), + asin, + }); + } +} + +// --------------------------------------------------------------------------- +// Sibling ASIN lookup (for library matching expansion) +// --------------------------------------------------------------------------- + +/** + * Given a list of ASINs, return a map of each input ASIN to its sibling ASINs + * (other ASINs in the same work, NOT including the input ASIN itself). + * + * ASINs not found in the works table are simply omitted from the result. + */ +export async function getSiblingAsins( + asins: string[] +): Promise> { + const result = new Map(); + if (asins.length === 0) return result; + + // Step 1: Find which input ASINs are in work_asins and their work IDs + const inputEntries = await prisma.workAsin.findMany({ + where: { asin: { in: asins } }, + select: { asin: true, workId: true }, + }); + + if (inputEntries.length === 0) return result; + + // Build map of workId -> input ASINs in that work + const workIdToInputAsins = new Map(); + for (const entry of inputEntries) { + const list = workIdToInputAsins.get(entry.workId); + if (list) { + list.push(entry.asin); + } else { + workIdToInputAsins.set(entry.workId, [entry.asin]); + } + } + + // Step 2: Get ALL ASINs in those works + const workIds = [...workIdToInputAsins.keys()]; + const allWorkAsins = await prisma.workAsin.findMany({ + where: { workId: { in: workIds } }, + select: { asin: true, workId: true }, + }); + + // Build map of workId -> all ASINs + const workIdToAllAsins = new Map(); + for (const entry of allWorkAsins) { + const list = workIdToAllAsins.get(entry.workId); + if (list) { + list.push(entry.asin); + } else { + workIdToAllAsins.set(entry.workId, [entry.asin]); + } + } + + // Step 3: For each input ASIN, compute siblings (all ASINs in same work minus self) + for (const entry of inputEntries) { + const allInWork = workIdToAllAsins.get(entry.workId) || []; + const siblings = allInWork.filter(a => a !== entry.asin); + if (siblings.length > 0) { + result.set(entry.asin, siblings); + } + } + + return result; +} diff --git a/src/lib/utils/audiobook-matcher.ts b/src/lib/utils/audiobook-matcher.ts index 1181552..ee49ff7 100644 --- a/src/lib/utils/audiobook-matcher.ts +++ b/src/lib/utils/audiobook-matcher.ts @@ -8,6 +8,7 @@ import { prisma } from '@/lib/db'; import { LibraryItem } from '@/lib/services/library'; +import { getSiblingAsins } from '@/lib/services/works.service'; import { RMABLogger } from './logger'; // Module-level logger @@ -178,6 +179,61 @@ export async function enrichAudiobooksWithMatches( } } + // Works-table sibling expansion: check if unmatched ASINs have siblings in the library + try { + const unmatchedAsins = results.filter(r => !r.isAvailable).map(r => r.asin); + if (unmatchedAsins.length > 0) { + const siblingMap = await getSiblingAsins(unmatchedAsins); + if (siblingMap.size > 0) { + // Collect all sibling ASINs for a single batch library query + const allSiblingAsins = new Set(); + for (const siblings of siblingMap.values()) { + for (const s of siblings) allSiblingAsins.add(s); + } + + if (allSiblingAsins.size > 0) { + const siblingLibraryMatches = await prisma.plexLibrary.findMany({ + where: { asin: { in: [...allSiblingAsins] } }, + select: { asin: true, plexGuid: true }, + }); + const libraryAsinSet = new Set( + siblingLibraryMatches.filter(m => m.asin).map(m => m.asin!.toLowerCase()) + ); + + // Update results where a sibling ASIN is found in the library + for (const result of results) { + if (result.isAvailable) continue; + const siblings = siblingMap.get(result.asin); + if (!siblings) continue; + const matchedSiblingAsin = siblings.find(s => libraryAsinSet.has(s.toLowerCase())); + if (matchedSiblingAsin) { + const libMatch = siblingLibraryMatches.find( + m => m.asin?.toLowerCase() === matchedSiblingAsin.toLowerCase() + ); + (result as any).isAvailable = true; + (result as any).plexGuid = libMatch?.plexGuid || null; + } + } + + const siblingMatchCount = results.filter(r => { + if (!r.isAvailable) return false; + return siblingMap.has(r.asin); + }).length; + logger.debug('Sibling expansion', { + unmatchedCount: unmatchedAsins.length, + siblingGroupsFound: siblingMap.size, + siblingMatches: siblingMatchCount, + }); + } + } + } + } catch (error) { + // Works table expansion is best-effort — direct matches still work + logger.error('Sibling ASIN expansion failed', { + error: error instanceof Error ? error.message : String(error), + }); + } + // Always enrich with request status (check ANY user's requests) const asins = audiobooks.map(book => book.asin); @@ -307,6 +363,19 @@ export async function getAvailableAsins(): Promise> { for (const item of completedRequests) { if (item.audibleAsin) asins.add(item.audibleAsin); } + + // Expand with works-table sibling ASINs + try { + if (asins.size > 0) { + const siblingMap = await getSiblingAsins([...asins]); + for (const siblings of siblingMap.values()) { + for (const s of siblings) asins.add(s); + } + } + } catch { + // Works table expansion is best-effort + } + return asins; } diff --git a/src/lib/utils/deduplicate-audiobooks.ts b/src/lib/utils/deduplicate-audiobooks.ts new file mode 100644 index 0000000..1bc426f --- /dev/null +++ b/src/lib/utils/deduplicate-audiobooks.ts @@ -0,0 +1,201 @@ +/** + * Component: Audiobook Deduplication Utility + * Documentation: documentation/integrations/audible.md + * + * Deduplicates audiobook listings that represent the same recording + * under different ASINs (publisher re-listings, rights transfers, etc.). + * + * Dedup key: normalized title + normalized narrator + * Duration tolerance: max(longerDuration * 0.01, 5) minutes + * Missing duration treated as compatible (graceful degradation). + */ + +import type { AudibleAudiobook } from '../integrations/audible.service'; + +// --------------------------------------------------------------------------- +// Title / narrator normalization +// --------------------------------------------------------------------------- + +/** Patterns in parentheses or brackets to strip (edition markers, format labels) */ +const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi; + +/** Trailing subtitle after colon or long dash */ +const SUBTITLE_RE = /\s*[:]\s+.+$/; +const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/; + +/** Trailing descriptors like "A Novel", "A Memoir" */ +const TRAILING_DESCRIPTOR_RE = /\s*[-:,]?\s+a\s+(novel|memoir|thriller|mystery|romance|story|tale|novella)\s*$/i; + +/** + * Normalize a title for dedup comparison. + * Strips subtitles, edition markers, and trailing descriptors. + */ +export function normalizeTitle(title: string): string { + let t = title.toLowerCase(); + // Remove parenthesized/bracketed edition markers + t = t.replace(EDITION_PAREN_RE, ''); + // Remove trailing descriptors before subtitle stripping + t = t.replace(TRAILING_DESCRIPTOR_RE, ''); + // Remove subtitle after colon + t = t.replace(SUBTITLE_RE, ''); + // Remove subtitle after long dash (but not short hyphenated words) + t = t.replace(LONG_DASH_SUBTITLE_RE, ''); + // Collapse whitespace and trim + return t.replace(/\s+/g, ' ').trim(); +} + +/** Normalize narrator for comparison. */ +function normalizeNarrator(narrator?: string): string { + return (narrator || '').toLowerCase().trim(); +} + +// --------------------------------------------------------------------------- +// Duration compatibility +// --------------------------------------------------------------------------- + +/** + * Check if two durations are compatible (represent the same recording). + * Tolerance: max(longerDuration * 0.01, 5) minutes. + * Missing duration on either side is treated as compatible. + */ +export function areDurationsCompatible(a?: number, b?: number): boolean { + if (a == null || b == null) return true; + const longer = Math.max(a, b); + const tolerance = Math.max(longer * 0.01, 5); + return Math.abs(a - b) <= tolerance; +} + +// --------------------------------------------------------------------------- +// Metadata scoring (for picking best representative) +// --------------------------------------------------------------------------- + +function metadataScore(book: AudibleAudiobook): number { + let score = 0; + if (book.coverArtUrl) score++; + if (book.rating != null) score++; + if (book.durationMinutes != null) score++; + if (book.description) score++; + if (book.narrator) score++; + if (book.releaseDate) score++; + if (book.genres && book.genres.length > 0) score++; + return score; +} + +// --------------------------------------------------------------------------- +// Dedup group types (for works-table persistence) +// --------------------------------------------------------------------------- + +/** Metadata about a group of ASINs that were collapsed during dedup. */ +export interface DedupGroup { + canonicalAsin: string; // ASIN of the "winner" (best metadata score) + allAsins: string[]; // All ASINs in this group (including canonical) + title: string; // Author from the canonical entry + author: string; // Author from the canonical entry + narrator?: string; // Narrator from the canonical entry + durationMinutes?: number; // Duration from the canonical entry +} + +/** Result of deduplication with group collection. */ +export interface DeduplicateResult { + books: AudibleAudiobook[]; // The deduped list (same as deduplicateAudiobooks returns) + groups: DedupGroup[]; // Groups where 2+ ASINs were collapsed +} + +// --------------------------------------------------------------------------- +// Main dedup functions +// --------------------------------------------------------------------------- + +/** + * Deduplicate audiobook listings by normalized title + narrator + duration. + * + * Same narrator + compatible duration + similar title = same recording -> collapse. + * Different narrator = different production -> keep both. + * Duration outside tolerance = different content (abridged vs unabridged) -> keep both. + * + * Preserves original ordering (position of first appearance). + */ +export function deduplicateAudiobooks(books: AudibleAudiobook[]): AudibleAudiobook[] { + return deduplicateAndCollectGroups(books).books; +} + +/** + * Deduplicate audiobooks AND return grouping metadata for works-table persistence. + * Returns both the deduped list and the groups where 2+ ASINs were collapsed. + */ +export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): DeduplicateResult { + if (books.length <= 1) return { books: [...books], groups: [] }; + + // Group by normalized title + narrator + const titleNarratorGroups = new Map(); + const insertionOrder: string[] = []; + + for (const book of books) { + const key = `${normalizeTitle(book.title)}|||${normalizeNarrator(book.narrator)}`; + const group = titleNarratorGroups.get(key); + if (group) { + group.push(book); + } else { + titleNarratorGroups.set(key, [book]); + insertionOrder.push(key); + } + } + + const result: AudibleAudiobook[] = []; + const dedupGroups: DedupGroup[] = []; + + for (const key of insertionOrder) { + const group = titleNarratorGroups.get(key)!; + if (group.length === 1) { + result.push(group[0]); + continue; + } + + // Within a title+narrator group, further split by duration compatibility. + // Build sub-groups where all members are duration-compatible with the + // representative (first member). A book joins the first compatible sub-group. + const subGroups: AudibleAudiobook[][] = []; + + for (const book of group) { + let placed = false; + for (const sg of subGroups) { + // Check compatibility against the representative (first member) + if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) { + sg.push(book); + placed = true; + break; + } + } + if (!placed) { + subGroups.push([book]); + } + } + + // From each sub-group, pick the best representative and collect group metadata + for (const sg of subGroups) { + let best = sg[0]; + let bestScore = metadataScore(best); + for (let i = 1; i < sg.length; i++) { + const score = metadataScore(sg[i]); + if (score > bestScore) { + best = sg[i]; + bestScore = score; + } + } + result.push(best); + + // Collect group metadata for works-table persistence (only multi-ASIN groups) + if (sg.length >= 2) { + dedupGroups.push({ + canonicalAsin: best.asin, + allAsins: sg.map(b => b.asin), + title: best.title, + author: best.author, + narrator: best.narrator, + durationMinutes: best.durationMinutes, + }); + } + } + } + + return { books: result, groups: dedupGroups }; +} diff --git a/src/lib/utils/parse-runtime.ts b/src/lib/utils/parse-runtime.ts new file mode 100644 index 0000000..148dcb7 --- /dev/null +++ b/src/lib/utils/parse-runtime.ts @@ -0,0 +1,44 @@ +/** + * Component: Runtime Parsing Utility + * Documentation: documentation/integrations/audible.md + * + * Shared runtime/duration text parser extracted from AudibleService. + * Handles all i18n patterns (English, German, Spanish, French) via + * language-specific regex patterns in LanguageConfig. + */ + +import type { LanguageConfig } from '../constants/language-config'; + +/** + * Parse runtime text (e.g. "12 hrs and 30 mins", "5 Std. 20 Min.") + * into total minutes using language-specific patterns. + * + * @param runtimeText - Raw runtime string from Audible HTML + * @param langConfig - Language configuration with hour/minute regex patterns + * @returns Total minutes, or undefined if no duration could be parsed + */ +export function parseRuntime(runtimeText: string, langConfig: LanguageConfig): number | undefined { + if (!runtimeText) return undefined; + + let totalMinutes = 0; + + // Try each hour pattern until one matches + for (const pattern of langConfig.scraping.runtimeHourPatterns) { + const match = runtimeText.match(pattern); + if (match) { + totalMinutes += parseInt(match[1]) * 60; + break; + } + } + + // Try each minute pattern until one matches + for (const pattern of langConfig.scraping.runtimeMinutePatterns) { + const match = runtimeText.match(pattern); + if (match) { + totalMinutes += parseInt(match[1]); + break; + } + } + + return totalMinutes > 0 ? totalMinutes : undefined; +} diff --git a/tests/helpers/prisma.ts b/tests/helpers/prisma.ts index fc551c1..dfcb5ac 100644 --- a/tests/helpers/prisma.ts +++ b/tests/helpers/prisma.ts @@ -47,6 +47,8 @@ export const createPrismaMock = () => ({ bookDateSwipe: createModelMock(), goodreadsShelf: createModelMock(), goodreadsBookMapping: createModelMock(), + work: createModelMock(), + workAsin: createModelMock(), $queryRaw: vi.fn(), $disconnect: vi.fn(), }); diff --git a/tests/services/works.service.test.ts b/tests/services/works.service.test.ts new file mode 100644 index 0000000..5efca96 --- /dev/null +++ b/tests/services/works.service.test.ts @@ -0,0 +1,306 @@ +/** + * Component: Works Service Tests + * Documentation: documentation/integrations/audible.md + */ + +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import { createPrismaMock } from '../helpers/prisma'; +import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks'; + +const prismaMock = createPrismaMock(); + +vi.mock('@/lib/db', () => ({ + prisma: prismaMock, +})); + +vi.mock('@/lib/utils/logger', () => ({ + RMABLogger: { + create: () => ({ + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }), + }, +})); + +describe('persistDedupGroups', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.resetModules(); + }); + + it('creates new work + work_asins for a fresh group', async () => { + prismaMock.workAsin.findMany.mockResolvedValue([]); + prismaMock.work.create.mockResolvedValue({ id: 'work-1' }); + prismaMock.workAsin.create.mockResolvedValue({}); + prismaMock.workAsin.updateMany.mockResolvedValue({ count: 0 }); + + const { persistDedupGroups } = await import('@/lib/services/works.service'); + + const groups: DedupGroup[] = [{ + canonicalAsin: 'ASIN_A', + allAsins: ['ASIN_A', 'ASIN_B'], + title: 'Test Book', + author: 'Test Author', + narrator: 'Test Narrator', + durationMinutes: 600, + }]; + + await persistDedupGroups(groups); + + expect(prismaMock.work.create).toHaveBeenCalledWith({ + data: { title: 'Test Book', author: 'Test Author' }, + }); + expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2); + + // Canonical ASIN should have narrator, duration, isCanonical=true + expect(prismaMock.workAsin.create).toHaveBeenCalledWith({ + data: expect.objectContaining({ + workId: 'work-1', + asin: 'ASIN_A', + narrator: 'Test Narrator', + durationMinutes: 600, + isCanonical: true, + source: 'dedup_auto', + }), + }); + + // Non-canonical ASIN should have isCanonical=false + expect(prismaMock.workAsin.create).toHaveBeenCalledWith({ + data: expect.objectContaining({ + workId: 'work-1', + asin: 'ASIN_B', + isCanonical: false, + source: 'dedup_auto', + }), + }); + }); + + it('adds new ASINs to existing work when canonical already exists', async () => { + prismaMock.workAsin.findMany.mockResolvedValue([ + { asin: 'ASIN_A', workId: 'existing-work' }, + ]); + prismaMock.workAsin.create.mockResolvedValue({}); + prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 }); + + const { persistDedupGroups } = await import('@/lib/services/works.service'); + + const groups: DedupGroup[] = [{ + canonicalAsin: 'ASIN_A', + allAsins: ['ASIN_A', 'ASIN_B', 'ASIN_C'], + title: 'Test Book', + author: 'Test Author', + narrator: 'Narrator', + durationMinutes: 500, + }]; + + await persistDedupGroups(groups); + + // Should NOT create a new work + expect(prismaMock.work.create).not.toHaveBeenCalled(); + + // Should create entries for ASIN_B and ASIN_C only (ASIN_A already exists) + expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2); + expect(prismaMock.workAsin.create).toHaveBeenCalledWith({ + data: expect.objectContaining({ + workId: 'existing-work', + asin: 'ASIN_B', + }), + }); + expect(prismaMock.workAsin.create).toHaveBeenCalledWith({ + data: expect.objectContaining({ + workId: 'existing-work', + asin: 'ASIN_C', + }), + }); + }); + + it('merges two separate works when dedup groups them together', async () => { + // ASIN_A is in work-1, ASIN_B is in work-2 + prismaMock.workAsin.findMany.mockResolvedValue([ + { asin: 'ASIN_A', workId: 'work-1' }, + { asin: 'ASIN_B', workId: 'work-2' }, + ]); + prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 }); + prismaMock.work.deleteMany.mockResolvedValue({ count: 1 }); + + const { persistDedupGroups } = await import('@/lib/services/works.service'); + + const groups: DedupGroup[] = [{ + canonicalAsin: 'ASIN_A', + allAsins: ['ASIN_A', 'ASIN_B'], + title: 'Merged Book', + author: 'Author', + }]; + + await persistDedupGroups(groups); + + // Should move work-2 ASINs to work-1 + expect(prismaMock.workAsin.updateMany).toHaveBeenCalledWith({ + where: { workId: { in: ['work-2'] } }, + data: { workId: 'work-1' }, + }); + + // Should delete work-2 + expect(prismaMock.work.deleteMany).toHaveBeenCalledWith({ + where: { id: { in: ['work-2'] } }, + }); + }); + + it('silently catches and logs errors without throwing', async () => { + prismaMock.workAsin.findMany.mockRejectedValue(new Error('DB connection failed')); + + const { persistDedupGroups } = await import('@/lib/services/works.service'); + + const groups: DedupGroup[] = [{ + canonicalAsin: 'ASIN_A', + allAsins: ['ASIN_A', 'ASIN_B'], + title: 'Test', + author: 'Auth', + }]; + + // Should not throw + await expect(persistDedupGroups(groups)).resolves.toBeUndefined(); + }); +}); + +describe('seedAsin', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.resetModules(); + }); + + it('creates single-ASIN work for new ASIN', async () => { + prismaMock.workAsin.findUnique.mockResolvedValue(null); + prismaMock.work.create.mockResolvedValue({ id: 'new-work' }); + prismaMock.workAsin.create.mockResolvedValue({}); + + const { seedAsin } = await import('@/lib/services/works.service'); + + await seedAsin('NEW_ASIN', 'New Book', 'Author', 'Narrator', 300); + + expect(prismaMock.work.create).toHaveBeenCalledWith({ + data: { title: 'New Book', author: 'Author' }, + }); + expect(prismaMock.workAsin.create).toHaveBeenCalledWith({ + data: { + workId: 'new-work', + asin: 'NEW_ASIN', + narrator: 'Narrator', + durationMinutes: 300, + isCanonical: true, + source: 'dedup_auto', + }, + }); + }); + + it('does nothing for already-tracked ASIN', async () => { + prismaMock.workAsin.findUnique.mockResolvedValue({ + id: 'existing', + asin: 'EXISTING_ASIN', + workId: 'work-1', + }); + + const { seedAsin } = await import('@/lib/services/works.service'); + + await seedAsin('EXISTING_ASIN', 'Book', 'Author'); + + expect(prismaMock.work.create).not.toHaveBeenCalled(); + expect(prismaMock.workAsin.create).not.toHaveBeenCalled(); + }); + + it('silently catches and logs errors without throwing', async () => { + prismaMock.workAsin.findUnique.mockRejectedValue(new Error('DB error')); + + const { seedAsin } = await import('@/lib/services/works.service'); + + await expect(seedAsin('ASIN', 'Book', 'Auth')).resolves.toBeUndefined(); + }); +}); + +describe('getSiblingAsins', () => { + beforeEach(() => { + vi.clearAllMocks(); + vi.resetModules(); + }); + + it('returns sibling ASINs correctly', async () => { + // First query: find input ASINs and their work IDs + prismaMock.workAsin.findMany + .mockResolvedValueOnce([ + { asin: 'ASIN_A', workId: 'work-1' }, + { asin: 'ASIN_C', workId: 'work-2' }, + ]) + // Second query: all ASINs in those works + .mockResolvedValueOnce([ + { asin: 'ASIN_A', workId: 'work-1' }, + { asin: 'ASIN_B', workId: 'work-1' }, + { asin: 'ASIN_C', workId: 'work-2' }, + { asin: 'ASIN_D', workId: 'work-2' }, + { asin: 'ASIN_E', workId: 'work-2' }, + ]); + + const { getSiblingAsins } = await import('@/lib/services/works.service'); + + const result = await getSiblingAsins(['ASIN_A', 'ASIN_C']); + + expect(result.get('ASIN_A')).toEqual(['ASIN_B']); + expect(result.get('ASIN_C')).toEqual(['ASIN_D', 'ASIN_E']); + }); + + it('returns empty map for unknown ASINs', async () => { + prismaMock.workAsin.findMany.mockResolvedValue([]); + + const { getSiblingAsins } = await import('@/lib/services/works.service'); + + const result = await getSiblingAsins(['UNKNOWN']); + + expect(result.size).toBe(0); + }); + + it('returns empty map for empty input', async () => { + const { getSiblingAsins } = await import('@/lib/services/works.service'); + + const result = await getSiblingAsins([]); + + expect(result.size).toBe(0); + // Should not query DB + expect(prismaMock.workAsin.findMany).not.toHaveBeenCalled(); + }); + + it('excludes the input ASIN itself from siblings', async () => { + prismaMock.workAsin.findMany + .mockResolvedValueOnce([ + { asin: 'ASIN_A', workId: 'work-1' }, + ]) + .mockResolvedValueOnce([ + { asin: 'ASIN_A', workId: 'work-1' }, + { asin: 'ASIN_B', workId: 'work-1' }, + ]); + + const { getSiblingAsins } = await import('@/lib/services/works.service'); + + const result = await getSiblingAsins(['ASIN_A']); + + expect(result.get('ASIN_A')).toEqual(['ASIN_B']); + expect(result.get('ASIN_A')).not.toContain('ASIN_A'); + }); + + it('omits ASINs with no siblings (single-ASIN works)', async () => { + prismaMock.workAsin.findMany + .mockResolvedValueOnce([ + { asin: 'ASIN_LONELY', workId: 'work-solo' }, + ]) + .mockResolvedValueOnce([ + { asin: 'ASIN_LONELY', workId: 'work-solo' }, + ]); + + const { getSiblingAsins } = await import('@/lib/services/works.service'); + + const result = await getSiblingAsins(['ASIN_LONELY']); + + // No siblings means it shouldn't be in the map at all + expect(result.has('ASIN_LONELY')).toBe(false); + }); +}); diff --git a/tests/utils/deduplicate-audiobooks.test.ts b/tests/utils/deduplicate-audiobooks.test.ts new file mode 100644 index 0000000..c60a4a5 --- /dev/null +++ b/tests/utils/deduplicate-audiobooks.test.ts @@ -0,0 +1,434 @@ +/** + * Component: Audiobook Deduplication Tests + * Documentation: documentation/integrations/audible.md + */ + +import { describe, expect, it } from 'vitest'; +import { + deduplicateAudiobooks, + deduplicateAndCollectGroups, + normalizeTitle, + areDurationsCompatible, +} from '@/lib/utils/deduplicate-audiobooks'; +import type { AudibleAudiobook } from '@/lib/integrations/audible.service'; + +// --------------------------------------------------------------------------- +// Helper: minimal AudibleAudiobook factory +// --------------------------------------------------------------------------- + +function makeBook(overrides: Partial & { asin: string; title: string; author: string }): AudibleAudiobook { + return { + narrator: undefined, + coverArtUrl: undefined, + durationMinutes: undefined, + rating: undefined, + description: undefined, + releaseDate: undefined, + genres: undefined, + series: undefined, + seriesPart: undefined, + seriesAsin: undefined, + authorAsin: undefined, + ...overrides, + }; +} + +// --------------------------------------------------------------------------- +// normalizeTitle +// --------------------------------------------------------------------------- + +describe('normalizeTitle', () => { + it('lowercases', () => { + expect(normalizeTitle('The Black Prism')).toBe('the black prism'); + }); + + it('strips (Unabridged)', () => { + expect(normalizeTitle('The Black Prism (Unabridged)')).toBe('the black prism'); + }); + + it('strips [Abridged Edition]', () => { + expect(normalizeTitle('The Black Prism [Abridged Edition]')).toBe('the black prism'); + }); + + it('strips (2024 Remastered Edition)', () => { + expect(normalizeTitle('The Hobbit (2024 Remastered Edition)')).toBe('the hobbit'); + }); + + it('strips subtitle after colon', () => { + expect(normalizeTitle('The Black Prism: Lightbringer, Book 1')).toBe('the black prism'); + }); + + it('strips subtitle after long dash', () => { + expect(normalizeTitle('The Black Prism \u2014 A Lightbringer Novel')).toBe('the black prism'); + }); + + it('strips trailing "A Novel"', () => { + expect(normalizeTitle('The Black Prism: A Novel')).toBe('the black prism'); + }); + + it('strips (Audiobook)', () => { + expect(normalizeTitle('The Hobbit (Audiobook)')).toBe('the hobbit'); + }); + + it('strips (Dramatized Adaptation)', () => { + expect(normalizeTitle('The Black Prism (Dramatized Adaptation)')).toBe('the black prism'); + }); + + it('strips (Full Cast Narration)', () => { + expect(normalizeTitle('The Black Prism (Full Cast Narration)')).toBe('the black prism'); + }); + + it('collapses whitespace', () => { + expect(normalizeTitle(' The Black Prism ')).toBe('the black prism'); + }); + + it('handles empty string', () => { + expect(normalizeTitle('')).toBe(''); + }); + + it('preserves hyphenated words (not subtitles)', () => { + // "well-known" has a short dash, not a subtitle separator + expect(normalizeTitle('A Well-Known Book')).toBe('a well-known book'); + }); +}); + +// --------------------------------------------------------------------------- +// areDurationsCompatible +// --------------------------------------------------------------------------- + +describe('areDurationsCompatible', () => { + it('returns true when both undefined', () => { + expect(areDurationsCompatible(undefined, undefined)).toBe(true); + }); + + it('returns true when one undefined', () => { + expect(areDurationsCompatible(600, undefined)).toBe(true); + expect(areDurationsCompatible(undefined, 600)).toBe(true); + }); + + it('returns true for identical durations', () => { + expect(areDurationsCompatible(600, 600)).toBe(true); + }); + + it('uses 1% of longer duration as tolerance for long books', () => { + // Two 40-hour books (2400 min): tolerance = max(2400*0.01, 5) = 24 min + expect(areDurationsCompatible(2400, 2424)).toBe(true); // exactly at tolerance + expect(areDurationsCompatible(2400, 2425)).toBe(false); // just over + }); + + it('uses 5-minute minimum tolerance for short books', () => { + // Two 2-hour books (120 min): tolerance = max(120*0.01, 5) = max(1.2, 5) = 5 min + expect(areDurationsCompatible(120, 125)).toBe(true); // exactly at 5-min minimum + expect(areDurationsCompatible(120, 126)).toBe(false); // just over + }); + + it('keeps abridged vs unabridged separate (large duration gap)', () => { + // Unabridged: 720 min (12 hrs), Abridged: 360 min (6 hrs) + expect(areDurationsCompatible(720, 360)).toBe(false); + }); + + it('symmetry: order does not matter', () => { + expect(areDurationsCompatible(2400, 2424)).toBe(true); + expect(areDurationsCompatible(2424, 2400)).toBe(true); + expect(areDurationsCompatible(120, 126)).toBe(false); + expect(areDurationsCompatible(126, 120)).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// deduplicateAudiobooks +// --------------------------------------------------------------------------- + +describe('deduplicateAudiobooks', () => { + it('returns empty array for empty input', () => { + expect(deduplicateAudiobooks([])).toEqual([]); + }); + + it('returns single book unchanged', () => { + const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Author' }); + expect(deduplicateAudiobooks([book])).toEqual([book]); + }); + + it('passes through all-unique books unchanged', () => { + const books = [ + makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }), + makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }), + makeBook({ asin: 'A3', title: 'Book Three', author: 'Auth', narrator: 'Nar B', durationMinutes: 700 }), + ]; + expect(deduplicateAudiobooks(books)).toHaveLength(3); + }); + + it('collapses simple duplicates (same title + narrator + similar duration)', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(1); + }); + + it('keeps books with different narrators (different production)', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Full Cast', durationMinutes: 480 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(2); + }); + + it('keeps abridged vs unabridged (same narrator, very different duration)', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }), + makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(2); + }); + + it('collapses when one book has missing duration', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: undefined }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(1); + }); + + it('collapses when both books have missing duration', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(1); + }); + + it('collapses title variants with edition markers', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism (Unabridged)', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1258 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(1); + }); + + it('collapses title variants with subtitles', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(1); + }); + + it('picks the representative with most metadata', () => { + const sparse = makeBook({ + asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', + narrator: 'Simon Vance', durationMinutes: 1260, + }); + const rich = makeBook({ + asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', + narrator: 'Simon Vance', durationMinutes: 1262, + coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book', + }); + const result = deduplicateAudiobooks([sparse, rich]); + expect(result).toHaveLength(1); + expect(result[0].asin).toBe('A2'); // rich entry wins + }); + + it('preserves original order (first-seen position)', () => { + const books = [ + makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }), + makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 400 }), + makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }), + makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(3); + expect(result.map(b => b.title)).toEqual(['Alpha', 'Beta', 'Charlie']); + }); + + it('handles Lightbringer-style scenario: unabridged + dramatized', () => { + // Simon Vance full narration (long) + const vance1 = makeBook({ + asin: 'SV1', title: 'The Black Prism', author: 'Brent Weeks', + narrator: 'Simon Vance', durationMinutes: 1260, + coverArtUrl: 'cover1.jpg', rating: 4.7, + }); + // Re-listed Simon Vance (same duration, different ASIN) + const vance2 = makeBook({ + asin: 'SV2', title: 'The Black Prism: Lightbringer Book 1', author: 'Brent Weeks', + narrator: 'Simon Vance', durationMinutes: 1262, + }); + // Dramatized with full cast (shorter, different narrator) + const drama = makeBook({ + asin: 'DR1', title: 'The Black Prism (Dramatized Adaptation)', author: 'Brent Weeks', + narrator: 'Full Cast', durationMinutes: 480, + coverArtUrl: 'cover-drama.jpg', + }); + + const result = deduplicateAudiobooks([vance1, vance2, drama]); + expect(result).toHaveLength(2); + // Simon Vance should collapse to 1, Full Cast stays + expect(result.find(b => b.narrator === 'Simon Vance')).toBeTruthy(); + expect(result.find(b => b.narrator === 'Full Cast')).toBeTruthy(); + // Should pick the richer entry for Simon Vance + const svResult = result.find(b => b.narrator === 'Simon Vance')!; + expect(svResult.asin).toBe('SV1'); // has cover + rating + }); + + it('uses percentage tolerance for very long audiobooks', () => { + // Two 40-hour books: tolerance = max(2400*0.01, 5) = 24 min + const books = [ + makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }), + makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2420 }), + ]; + expect(deduplicateAudiobooks(books)).toHaveLength(1); + + // Beyond tolerance + const booksFar = [ + makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }), + makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2430 }), + ]; + expect(deduplicateAudiobooks(booksFar)).toHaveLength(2); + }); + + it('treats missing narrator as its own group', () => { + // Two entries with same title but no narrator - should collapse + const books = [ + makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }), + makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 302 }), + ]; + expect(deduplicateAudiobooks(books)).toHaveLength(1); + }); + + it('does not collapse empty-narrator with named narrator', () => { + const books = [ + makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }), + makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'John Smith', durationMinutes: 302 }), + ]; + expect(deduplicateAudiobooks(books)).toHaveLength(2); + }); +}); + +// --------------------------------------------------------------------------- +// deduplicateAndCollectGroups +// --------------------------------------------------------------------------- + +describe('deduplicateAndCollectGroups', () => { + it('returns empty groups array when no duplicates', () => { + const books = [ + makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }), + makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }), + ]; + const { books: result, groups } = deduplicateAndCollectGroups(books); + expect(result).toHaveLength(2); + expect(groups).toHaveLength(0); + }); + + it('returns empty groups for empty input', () => { + const { books: result, groups } = deduplicateAndCollectGroups([]); + expect(result).toHaveLength(0); + expect(groups).toHaveLength(0); + }); + + it('returns empty groups for single book', () => { + const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Auth' }); + const { books: result, groups } = deduplicateAndCollectGroups([book]); + expect(result).toHaveLength(1); + expect(groups).toHaveLength(0); + }); + + it('returns group with 2 ASINs when 2 books match', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }), + ]; + const { books: result, groups } = deduplicateAndCollectGroups(books); + expect(result).toHaveLength(1); + expect(groups).toHaveLength(1); + expect(groups[0].allAsins).toHaveLength(2); + expect(groups[0].allAsins).toContain('A1'); + expect(groups[0].allAsins).toContain('A2'); + }); + + it('returns group with 3+ ASINs for multi-duplicate scenario', () => { + const books = [ + makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }), + makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 662 }), + makeBook({ asin: 'A3', title: 'The Hobbit (Unabridged)', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 658 }), + ]; + const { books: result, groups } = deduplicateAndCollectGroups(books); + expect(result).toHaveLength(1); + expect(groups).toHaveLength(1); + expect(groups[0].allAsins).toHaveLength(3); + expect(groups[0].allAsins).toContain('A1'); + expect(groups[0].allAsins).toContain('A2'); + expect(groups[0].allAsins).toContain('A3'); + }); + + it('canonicalAsin is the one with highest metadata score', () => { + const sparse = makeBook({ + asin: 'SPARSE', title: 'The Black Prism', author: 'Brent Weeks', + narrator: 'Simon Vance', durationMinutes: 1260, + }); + const rich = makeBook({ + asin: 'RICH', title: 'The Black Prism', author: 'Brent Weeks', + narrator: 'Simon Vance', durationMinutes: 1262, + coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book', + }); + const { groups } = deduplicateAndCollectGroups([sparse, rich]); + expect(groups).toHaveLength(1); + expect(groups[0].canonicalAsin).toBe('RICH'); + }); + + it('groups only include entries with 2+ ASINs', () => { + const books = [ + makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }), + makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }), + makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }), + ]; + const { groups } = deduplicateAndCollectGroups(books); + // Only Alpha group should appear (Beta is a singleton) + expect(groups).toHaveLength(1); + expect(groups[0].allAsins).toContain('A1'); + expect(groups[0].allAsins).toContain('A2'); + }); + + it('duration-incompatible books produce separate entries (no group for singletons)', () => { + // Same title/narrator but very different durations (abridged vs unabridged) + const books = [ + makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }), + makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }), + ]; + const { books: result, groups } = deduplicateAndCollectGroups(books); + expect(result).toHaveLength(2); // Not collapsed + expect(groups).toHaveLength(0); // No multi-ASIN groups + }); + + it('books field matches what deduplicateAudiobooks returns', () => { + const books = [ + makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300, coverArtUrl: 'img.jpg', rating: 4.5 }), + makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }), + makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }), + makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 600 }), + makeBook({ asin: 'C2', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 601 }), + ]; + const dedupOnly = deduplicateAudiobooks(books); + const { books: withGroups } = deduplicateAndCollectGroups(books); + expect(withGroups.map(b => b.asin)).toEqual(dedupOnly.map(b => b.asin)); + }); + + it('includes narrator and durationMinutes from canonical entry in group', () => { + const books = [ + makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 480 }), + makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 482, coverArtUrl: 'img.jpg', rating: 4.0 }), + ]; + const { groups } = deduplicateAndCollectGroups(books); + expect(groups).toHaveLength(1); + expect(groups[0].canonicalAsin).toBe('A2'); // richer metadata + expect(groups[0].narrator).toBe('Jane Doe'); + expect(groups[0].durationMinutes).toBe(482); + expect(groups[0].author).toBe('Auth'); + }); +});