Add works table and ASIN deduping

Add persistent cross-ASIN "works" mapping and client-side deduplication to improve library matching. Introduces a Prisma migration and models (Work, WorkAsin) plus src/lib/services/works.service for persisting dedup groups, seeding ASINs at request time, and sibling lookup. Adds a deduplication utility (deduplicate-audiobooks) that normalizes titles/narrators, compares durations, and returns grouping metadata; API routes (search, author, series) now deduplicate results before enrichment and fire-and-forget persist groups. Adds sibling-ASIN expansion into audiobook matcher and expands getAvailableAsins accordingly. Extracts runtime parsing into a shared parse-runtime util and updates audible scrapers/services to use it. Includes unit tests for dedup logic and works service and updates test Prisma mocks.
2026-06-05 13:50:11 +00:00 · 2026-03-03 13:31:46 -05:00
parent ff80d995c5
commit 610873af6b
15 changed files with 1446 additions and 32 deletions
@@ -0,0 +1,42 @@
 -- CreateTable
 CREATE TABLE "works" (
    "id" TEXT NOT NULL,
    "title" TEXT NOT NULL,
    "author" TEXT NOT NULL,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    "updated_at" TIMESTAMP(3) NOT NULL,
    CONSTRAINT "works_pkey" PRIMARY KEY ("id")
 );
 -- CreateTable
 CREATE TABLE "work_asins" (
    "id" TEXT NOT NULL,
    "work_id" TEXT NOT NULL,
    "asin" TEXT NOT NULL,
    "narrator" TEXT,
    "duration_minutes" INTEGER,
    "is_canonical" BOOLEAN NOT NULL DEFAULT false,
    "source" TEXT NOT NULL,
    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
    CONSTRAINT "work_asins_pkey" PRIMARY KEY ("id")
 );
 -- CreateIndex
 CREATE INDEX "works_title_idx" ON "works"("title");
 -- CreateIndex
 CREATE INDEX "works_author_idx" ON "works"("author");
 -- CreateIndex
 CREATE UNIQUE INDEX "work_asins_asin_key" ON "work_asins"("asin");
 -- CreateIndex
 CREATE INDEX "work_asins_work_id_idx" ON "work_asins"("work_id");
 -- CreateIndex
 CREATE INDEX "work_asins_asin_idx" ON "work_asins"("asin");
 -- AddForeignKey
 ALTER TABLE "work_asins" ADD CONSTRAINT "work_asins_work_id_fkey" FOREIGN KEY ("work_id") REFERENCES "works"("id") ON DELETE CASCADE ON UPDATE CASCADE;
@@ -531,3 +531,43 @@ model GoodreadsBookMapping {
  @@index([audibleAsin])
  @@map("goodreads_book_mappings")
 }
 // ============================================================================
 // WORKS TABLE
 // Cross-ASIN audiobook identity mapping — links multiple Audible ASINs
 // to a single logical work for library matching across editions.
 // Documentation: documentation/integrations/audible.md
 // ============================================================================
 model Work {
  id        String     @id @default(uuid())
  title     String
  author    String
  createdAt DateTime   @default(now()) @map("created_at")
  updatedAt DateTime   @updatedAt @map("updated_at")
  // Relations
  asins WorkAsin[]
  @@index([title])
  @@index([author])
  @@map("works")
 }
 model WorkAsin {
  id              String   @id @default(uuid())
  workId          String   @map("work_id")
  asin            String   @unique
  narrator        String?
  durationMinutes Int?     @map("duration_minutes")
  isCanonical     Boolean  @default(false) @map("is_canonical")
  source          String   // 'dedup_auto' | 'admin_manual'
  createdAt       DateTime @default(now()) @map("created_at")
  // Relations
  work Work @relation(fields: [workId], references: [id], onDelete: Cascade)
  @@index([workId])
  @@index([asin])
  @@map("work_asins")
 }
@@ -6,6 +6,8 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
 import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
 import { persistDedupGroups } from '@/lib/services/works.service';
 import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';
@@ -38,14 +40,22 @@ export async function GET(request: NextRequest) {
    const currentUser = getCurrentUser(request);
    const userId = currentUser?.sub || undefined;
    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
    const { books: dedupedResults, groups } = deduplicateAndCollectGroups(results.results);
    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
    if (groups.length > 0) {
      persistDedupGroups(groups).catch(() => {});
    }
    // Enrich search results with availability and request status information
-    const enrichedResults = await enrichAudiobooksWithMatches(results.results, userId);
+    const enrichedResults = await enrichAudiobooksWithMatches(dedupedResults, userId);
    return NextResponse.json({
      success: true,
      query: results.query,
      results: enrichedResults,
-      totalResults: results.totalResults,
+      totalResults: enrichedResults.length,
      page: results.page,
      hasMore: results.hasMore,
    });
@@ -6,6 +6,8 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
 import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
 import { persistDedupGroups } from '@/lib/services/works.service';
 import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';
@@ -53,9 +55,17 @@ export async function GET(
    const audibleService = getAudibleService();
    const result = await audibleService.searchByAuthorAsin(authorName.trim(), asin, page);
    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
    const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(result.books);
    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
    if (groups.length > 0) {
      persistDedupGroups(groups).catch(() => {});
    }
    // Enrich with library availability and request status
    const userId = currentUser.sub || undefined;
-    const enrichedBooks = await enrichAudiobooksWithMatches(result.books, userId);
+    const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);
    logger.info(`Author books complete: "${authorName}" → ${enrichedBooks.length} books (page ${page})`);
@@ -64,7 +74,7 @@ export async function GET(
      books: enrichedBooks,
      authorName: authorName.trim(),
      authorAsin: asin,
-      totalBooks: result.totalResults || enrichedBooks.length,
+      totalBooks: enrichedBooks.length,
      hasMore: result.hasMore,
      page: result.page,
    });
@@ -8,6 +8,8 @@ import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';
 import { scrapeSeriesPage } from '@/lib/integrations/audible-series';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
 import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
 import { persistDedupGroups } from '@/lib/services/works.service';
 const logger = RMABLogger.create('API.Series.Detail');
@@ -49,9 +51,17 @@ export async function GET(
      );
    }
    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
    const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(detail.books);
    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
    if (groups.length > 0) {
      persistDedupGroups(groups).catch(() => {});
    }
    // Enrich books with library availability and request status
    const userId = currentUser.sub || undefined;
-    const enrichedBooks = await enrichAudiobooksWithMatches(detail.books, userId);
+    const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);
    logger.info(`Series detail complete: "${detail.title}" (${enrichedBooks.length} books, page ${page})`);
@@ -14,8 +14,10 @@ import {
  getLanguageForRegion,
  buildContainsSelector,
  stripPrefixes,
  type LanguageConfig,
 } from '../constants/language-config';
 import { RMABLogger } from '../utils/logger';
 import { parseRuntime } from '../utils/parse-runtime';
 import { randomDelay } from '../utils/scrape-resilience';
 const logger = RMABLogger.create('Audible.Series');
@@ -311,7 +313,7 @@ export async function scrapeSeriesPage(asin: string, page: number = 1): Promise<
      undefined;
    // Parse all books from the series page
-    const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes);
+    const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes, langConfig);
    // Use actual book count if we got more from scraping
    const bookCount = Math.max(summary.bookCount, books.length);
@@ -403,7 +405,8 @@ function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCoun
 function parseSeriesBooks(
  $: cheerio.CheerioAPI,
  authorPrefixes: string[],
-  narratorPrefixes: string[]
+  narratorPrefixes: string[],
  langConfig: LanguageConfig
 ): AudibleAudiobook[] {
  const books: AudibleAudiobook[] = [];
  const seenAsins = new Set<string>();
@@ -453,6 +456,11 @@ function parseSeriesBooks(
    const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
    const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;
    // Duration
    const runtimeText = $el.find('.runtimeLabel').text().trim() ||
      $el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
    const durationMinutes = parseRuntime(runtimeText, langConfig);
    books.push({
      asin: bookAsin,
      title,
@@ -461,6 +469,7 @@ function parseSeriesBooks(
      narrator: stripPrefixes(narratorText, narratorPrefixes),
      coverArtUrl,
      rating,
      durationMinutes,
    });
  });
@@ -23,6 +23,7 @@ import {
  AdaptivePacer,
  FetchResultMeta,
 } from '../utils/scrape-resilience';
 import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime';
 // Module-level logger
 const logger = RMABLogger.create('Audible');
@@ -1134,33 +1135,11 @@ export class AudibleService {
  }
  /**
-   * Parse runtime text to minutes using language-specific patterns
+   * Parse runtime text to minutes using language-specific patterns.
   * Delegates to shared utility in src/lib/utils/parse-runtime.ts.
   */
  private parseRuntime(runtimeText: string): number | undefined {
-    if (!runtimeText) return undefined;
+    return parseRuntimeUtil(runtimeText, this.getLangConfig());
    const langConfig = this.getLangConfig();
    let totalMinutes = 0;
    // Try each hour pattern until one matches
    for (const pattern of langConfig.scraping.runtimeHourPatterns) {
      const match = runtimeText.match(pattern);
      if (match) {
        totalMinutes += parseInt(match[1]) * 60;
        break;
      }
    }
    // Try each minute pattern until one matches
    for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
      const match = runtimeText.match(pattern);
      if (match) {
        totalMinutes += parseInt(match[1]);
        break;
      }
    }
    return totalMinutes > 0 ? totalMinutes : undefined;
  }
  /**
@@ -12,6 +12,7 @@ import { getJobQueueService } from '@/lib/services/job-queue.service';
 import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { RMABLogger } from '@/lib/utils/logger';
 import { seedAsin } from '@/lib/services/works.service';
 const logger = RMABLogger.create('RequestCreator');
@@ -147,6 +148,15 @@ export async function createRequestForUser(
    }
  }
  // Seed works table for cross-ASIN matching (Layer 2: request-time seeding)
  seedAsin(
    audiobook.asin,
    audiobookRecord.title,
    audiobookRecord.author,
    audiobookRecord.narrator || undefined,
    undefined // duration not available at request time
  ).catch(() => {});
  // Check if user already has an active request for this audiobook
  const existingRequest = await prisma.request.findFirst({
    where: {
@@ -0,0 +1,248 @@
 /**
 * Component: Works Service
 * Documentation: documentation/integrations/audible.md
 *
 * Manages the works table — persistent cross-ASIN audiobook identity mapping.
 * Layer 1: Auto-populated from dedup logic when users browse search/author/series pages.
 * Layer 2: Seeded at request time to ensure requested ASINs are tracked.
 */
 import { prisma } from '@/lib/db';
 import { RMABLogger } from '@/lib/utils/logger';
 import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
 const logger = RMABLogger.create('WorksService');
 // ---------------------------------------------------------------------------
 // Layer 1: Persist dedup groups (fire-and-forget from API routes)
 // ---------------------------------------------------------------------------
 /**
 * Persist dedup groups to the works table. For each group of 2+ ASINs that
 * were identified as the same audiobook, create or update a Work record
 * linking all ASINs together.
 *
 * Safe to call fire-and-forget — never throws.
 */
 export async function persistDedupGroups(groups: DedupGroup[]): Promise<void> {
  try {
    for (const group of groups) {
      await persistSingleGroup(group);
    }
  } catch (error) {
    logger.error('Failed to persist dedup groups', {
      error: error instanceof Error ? error.message : String(error),
      groupCount: groups.length,
    });
  }
 }
 /**
 * Persist a single dedup group. Handles merging when ASINs span multiple
 * existing works.
 */
 async function persistSingleGroup(group: DedupGroup): Promise<void> {
  const { canonicalAsin, allAsins, title, author, narrator, durationMinutes } = group;
  // Find which of these ASINs already exist in work_asins
  const existingEntries = await prisma.workAsin.findMany({
    where: { asin: { in: allAsins } },
    select: { asin: true, workId: true },
  });
  // Collect unique work IDs that already contain any of our ASINs
  const existingWorkIds = [...new Set(existingEntries.map(e => e.workId))];
  const existingAsinSet = new Set(existingEntries.map(e => e.asin));
  if (existingWorkIds.length === 0) {
    // No existing works — create a new one with all ASINs
    const work = await prisma.work.create({
      data: { title, author },
    });
    await Promise.all(
      allAsins.map(asin =>
        prisma.workAsin.create({
          data: {
            workId: work.id,
            asin,
            narrator: asin === canonicalAsin ? narrator : undefined,
            durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
            isCanonical: asin === canonicalAsin,
            source: 'dedup_auto',
          },
        })
      )
    );
    logger.debug('Created new work', { workId: work.id, asinCount: allAsins.length });
  } else {
    // Use the first existing work as the target
    const targetWorkId = existingWorkIds[0];
    // If multiple existing works, merge them into the target
    if (existingWorkIds.length > 1) {
      const mergeWorkIds = existingWorkIds.slice(1);
      // Move all ASINs from other works to the target
      await prisma.workAsin.updateMany({
        where: { workId: { in: mergeWorkIds } },
        data: { workId: targetWorkId },
      });
      // Delete the now-empty works
      await prisma.work.deleteMany({
        where: { id: { in: mergeWorkIds } },
      });
      logger.debug('Merged works', {
        targetWorkId,
        mergedWorkIds: mergeWorkIds,
      });
    }
    // Add any new ASINs that don't already exist
    const newAsins = allAsins.filter(a => !existingAsinSet.has(a));
    if (newAsins.length > 0) {
      await Promise.all(
        newAsins.map(asin =>
          prisma.workAsin.create({
            data: {
              workId: targetWorkId,
              asin,
              narrator: asin === canonicalAsin ? narrator : undefined,
              durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
              isCanonical: asin === canonicalAsin,
              source: 'dedup_auto',
            },
          })
        )
      );
      logger.debug('Added ASINs to existing work', {
        workId: targetWorkId,
        newAsinCount: newAsins.length,
      });
    }
    // Update canonical status: ensure the canonical ASIN is marked
    await prisma.workAsin.updateMany({
      where: { workId: targetWorkId, asin: canonicalAsin },
      data: { isCanonical: true },
    });
  }
 }
 // ---------------------------------------------------------------------------
 // Layer 2: Seed ASIN at request time
 // ---------------------------------------------------------------------------
 /**
 * Ensure an ASIN is tracked in the works table. Creates a single-ASIN work
 * if the ASIN isn't already present. Called at request creation time.
 *
 * Safe to call fire-and-forget — never throws.
 */
 export async function seedAsin(
  asin: string,
  title: string,
  author: string,
  narrator?: string,
  durationMinutes?: number
 ): Promise<void> {
  try {
    // Check if ASIN already tracked
    const existing = await prisma.workAsin.findUnique({
      where: { asin },
    });
    if (existing) return;
    // Create a new single-ASIN work
    const work = await prisma.work.create({
      data: { title, author },
    });
    await prisma.workAsin.create({
      data: {
        workId: work.id,
        asin,
        narrator,
        durationMinutes,
        isCanonical: true,
        source: 'dedup_auto',
      },
    });
    logger.debug('Seeded ASIN', { workId: work.id, asin });
  } catch (error) {
    logger.error('Failed to seed ASIN', {
      error: error instanceof Error ? error.message : String(error),
      asin,
    });
  }
 }
 // ---------------------------------------------------------------------------
 // Sibling ASIN lookup (for library matching expansion)
 // ---------------------------------------------------------------------------
 /**
 * Given a list of ASINs, return a map of each input ASIN to its sibling ASINs
 * (other ASINs in the same work, NOT including the input ASIN itself).
 *
 * ASINs not found in the works table are simply omitted from the result.
 */
 export async function getSiblingAsins(
  asins: string[]
 ): Promise<Map<string, string[]>> {
  const result = new Map<string, string[]>();
  if (asins.length === 0) return result;
  // Step 1: Find which input ASINs are in work_asins and their work IDs
  const inputEntries = await prisma.workAsin.findMany({
    where: { asin: { in: asins } },
    select: { asin: true, workId: true },
  });
  if (inputEntries.length === 0) return result;
  // Build map of workId -> input ASINs in that work
  const workIdToInputAsins = new Map<string, string[]>();
  for (const entry of inputEntries) {
    const list = workIdToInputAsins.get(entry.workId);
    if (list) {
      list.push(entry.asin);
    } else {
      workIdToInputAsins.set(entry.workId, [entry.asin]);
    }
  }
  // Step 2: Get ALL ASINs in those works
  const workIds = [...workIdToInputAsins.keys()];
  const allWorkAsins = await prisma.workAsin.findMany({
    where: { workId: { in: workIds } },
    select: { asin: true, workId: true },
  });
  // Build map of workId -> all ASINs
  const workIdToAllAsins = new Map<string, string[]>();
  for (const entry of allWorkAsins) {
    const list = workIdToAllAsins.get(entry.workId);
    if (list) {
      list.push(entry.asin);
    } else {
      workIdToAllAsins.set(entry.workId, [entry.asin]);
    }
  }
  // Step 3: For each input ASIN, compute siblings (all ASINs in same work minus self)
  for (const entry of inputEntries) {
    const allInWork = workIdToAllAsins.get(entry.workId) || [];
    const siblings = allInWork.filter(a => a !== entry.asin);
    if (siblings.length > 0) {
      result.set(entry.asin, siblings);
    }
  }
  return result;
 }
@@ -8,6 +8,7 @@
 import { prisma } from '@/lib/db';
 import { LibraryItem } from '@/lib/services/library';
 import { getSiblingAsins } from '@/lib/services/works.service';
 import { RMABLogger } from './logger';
 // Module-level logger
@@ -178,6 +179,61 @@ export async function enrichAudiobooksWithMatches(
    }
  }
  // Works-table sibling expansion: check if unmatched ASINs have siblings in the library
  try {
    const unmatchedAsins = results.filter(r => !r.isAvailable).map(r => r.asin);
    if (unmatchedAsins.length > 0) {
      const siblingMap = await getSiblingAsins(unmatchedAsins);
      if (siblingMap.size > 0) {
        // Collect all sibling ASINs for a single batch library query
        const allSiblingAsins = new Set<string>();
        for (const siblings of siblingMap.values()) {
          for (const s of siblings) allSiblingAsins.add(s);
        }
        if (allSiblingAsins.size > 0) {
          const siblingLibraryMatches = await prisma.plexLibrary.findMany({
            where: { asin: { in: [...allSiblingAsins] } },
            select: { asin: true, plexGuid: true },
          });
          const libraryAsinSet = new Set(
            siblingLibraryMatches.filter(m => m.asin).map(m => m.asin!.toLowerCase())
          );
          // Update results where a sibling ASIN is found in the library
          for (const result of results) {
            if (result.isAvailable) continue;
            const siblings = siblingMap.get(result.asin);
            if (!siblings) continue;
            const matchedSiblingAsin = siblings.find(s => libraryAsinSet.has(s.toLowerCase()));
            if (matchedSiblingAsin) {
              const libMatch = siblingLibraryMatches.find(
                m => m.asin?.toLowerCase() === matchedSiblingAsin.toLowerCase()
              );
              (result as any).isAvailable = true;
              (result as any).plexGuid = libMatch?.plexGuid || null;
            }
          }
          const siblingMatchCount = results.filter(r => {
            if (!r.isAvailable) return false;
            return siblingMap.has(r.asin);
          }).length;
          logger.debug('Sibling expansion', {
            unmatchedCount: unmatchedAsins.length,
            siblingGroupsFound: siblingMap.size,
            siblingMatches: siblingMatchCount,
          });
        }
      }
    }
  } catch (error) {
    // Works table expansion is best-effort — direct matches still work
    logger.error('Sibling ASIN expansion failed', {
      error: error instanceof Error ? error.message : String(error),
    });
  }
  // Always enrich with request status (check ANY user's requests)
  const asins = audiobooks.map(book => book.asin);
@@ -307,6 +363,19 @@ export async function getAvailableAsins(): Promise<Set<string>> {
  for (const item of completedRequests) {
    if (item.audibleAsin) asins.add(item.audibleAsin);
  }
  // Expand with works-table sibling ASINs
  try {
    if (asins.size > 0) {
      const siblingMap = await getSiblingAsins([...asins]);
      for (const siblings of siblingMap.values()) {
        for (const s of siblings) asins.add(s);
      }
    }
  } catch {
    // Works table expansion is best-effort
  }
  return asins;
 }
@@ -0,0 +1,201 @@
 /**
 * Component: Audiobook Deduplication Utility
 * Documentation: documentation/integrations/audible.md
 *
 * Deduplicates audiobook listings that represent the same recording
 * under different ASINs (publisher re-listings, rights transfers, etc.).
 *
 * Dedup key: normalized title + normalized narrator
 * Duration tolerance: max(longerDuration * 0.01, 5) minutes
 * Missing duration treated as compatible (graceful degradation).
 */
 import type { AudibleAudiobook } from '../integrations/audible.service';
 // ---------------------------------------------------------------------------
 // Title / narrator normalization
 // ---------------------------------------------------------------------------
 /** Patterns in parentheses or brackets to strip (edition markers, format labels) */
 const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi;
 /** Trailing subtitle after colon or long dash */
 const SUBTITLE_RE = /\s*[:]\s+.+$/;
 const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/;
 /** Trailing descriptors like "A Novel", "A Memoir" */
 const TRAILING_DESCRIPTOR_RE = /\s*[-:,]?\s+a\s+(novel|memoir|thriller|mystery|romance|story|tale|novella)\s*$/i;
 /**
 * Normalize a title for dedup comparison.
 * Strips subtitles, edition markers, and trailing descriptors.
 */
 export function normalizeTitle(title: string): string {
  let t = title.toLowerCase();
  // Remove parenthesized/bracketed edition markers
  t = t.replace(EDITION_PAREN_RE, '');
  // Remove trailing descriptors before subtitle stripping
  t = t.replace(TRAILING_DESCRIPTOR_RE, '');
  // Remove subtitle after colon
  t = t.replace(SUBTITLE_RE, '');
  // Remove subtitle after long dash (but not short hyphenated words)
  t = t.replace(LONG_DASH_SUBTITLE_RE, '');
  // Collapse whitespace and trim
  return t.replace(/\s+/g, ' ').trim();
 }
 /** Normalize narrator for comparison. */
 function normalizeNarrator(narrator?: string): string {
  return (narrator || '').toLowerCase().trim();
 }
 // ---------------------------------------------------------------------------
 // Duration compatibility
 // ---------------------------------------------------------------------------
 /**
 * Check if two durations are compatible (represent the same recording).
 * Tolerance: max(longerDuration * 0.01, 5) minutes.
 * Missing duration on either side is treated as compatible.
 */
 export function areDurationsCompatible(a?: number, b?: number): boolean {
  if (a == null || b == null) return true;
  const longer = Math.max(a, b);
  const tolerance = Math.max(longer * 0.01, 5);
  return Math.abs(a - b) <= tolerance;
 }
 // ---------------------------------------------------------------------------
 // Metadata scoring (for picking best representative)
 // ---------------------------------------------------------------------------
 function metadataScore(book: AudibleAudiobook): number {
  let score = 0;
  if (book.coverArtUrl) score++;
  if (book.rating != null) score++;
  if (book.durationMinutes != null) score++;
  if (book.description) score++;
  if (book.narrator) score++;
  if (book.releaseDate) score++;
  if (book.genres && book.genres.length > 0) score++;
  return score;
 }
 // ---------------------------------------------------------------------------
 // Dedup group types (for works-table persistence)
 // ---------------------------------------------------------------------------
 /** Metadata about a group of ASINs that were collapsed during dedup. */
 export interface DedupGroup {
  canonicalAsin: string;     // ASIN of the "winner" (best metadata score)
  allAsins: string[];        // All ASINs in this group (including canonical)
  title: string;             // Author from the canonical entry
  author: string;            // Author from the canonical entry
  narrator?: string;         // Narrator from the canonical entry
  durationMinutes?: number;  // Duration from the canonical entry
 }
 /** Result of deduplication with group collection. */
 export interface DeduplicateResult {
  books: AudibleAudiobook[];  // The deduped list (same as deduplicateAudiobooks returns)
  groups: DedupGroup[];       // Groups where 2+ ASINs were collapsed
 }
 // ---------------------------------------------------------------------------
 // Main dedup functions
 // ---------------------------------------------------------------------------
 /**
 * Deduplicate audiobook listings by normalized title + narrator + duration.
 *
 * Same narrator + compatible duration + similar title = same recording -> collapse.
 * Different narrator = different production -> keep both.
 * Duration outside tolerance = different content (abridged vs unabridged) -> keep both.
 *
 * Preserves original ordering (position of first appearance).
 */
 export function deduplicateAudiobooks(books: AudibleAudiobook[]): AudibleAudiobook[] {
  return deduplicateAndCollectGroups(books).books;
 }
 /**
 * Deduplicate audiobooks AND return grouping metadata for works-table persistence.
 * Returns both the deduped list and the groups where 2+ ASINs were collapsed.
 */
 export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): DeduplicateResult {
  if (books.length <= 1) return { books: [...books], groups: [] };
  // Group by normalized title + narrator
  const titleNarratorGroups = new Map<string, AudibleAudiobook[]>();
  const insertionOrder: string[] = [];
  for (const book of books) {
    const key = `${normalizeTitle(book.title)}|||${normalizeNarrator(book.narrator)}`;
    const group = titleNarratorGroups.get(key);
    if (group) {
      group.push(book);
    } else {
      titleNarratorGroups.set(key, [book]);
      insertionOrder.push(key);
    }
  }
  const result: AudibleAudiobook[] = [];
  const dedupGroups: DedupGroup[] = [];
  for (const key of insertionOrder) {
    const group = titleNarratorGroups.get(key)!;
    if (group.length === 1) {
      result.push(group[0]);
      continue;
    }
    // Within a title+narrator group, further split by duration compatibility.
    // Build sub-groups where all members are duration-compatible with the
    // representative (first member). A book joins the first compatible sub-group.
    const subGroups: AudibleAudiobook[][] = [];
    for (const book of group) {
      let placed = false;
      for (const sg of subGroups) {
        // Check compatibility against the representative (first member)
        if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) {
          sg.push(book);
          placed = true;
          break;
        }
      }
      if (!placed) {
        subGroups.push([book]);
      }
    }
    // From each sub-group, pick the best representative and collect group metadata
    for (const sg of subGroups) {
      let best = sg[0];
      let bestScore = metadataScore(best);
      for (let i = 1; i < sg.length; i++) {
        const score = metadataScore(sg[i]);
        if (score > bestScore) {
          best = sg[i];
          bestScore = score;
        }
      }
      result.push(best);
      // Collect group metadata for works-table persistence (only multi-ASIN groups)
      if (sg.length >= 2) {
        dedupGroups.push({
          canonicalAsin: best.asin,
          allAsins: sg.map(b => b.asin),
          title: best.title,
          author: best.author,
          narrator: best.narrator,
          durationMinutes: best.durationMinutes,
        });
      }
    }
  }
  return { books: result, groups: dedupGroups };
 }
@@ -0,0 +1,44 @@
 /**
 * Component: Runtime Parsing Utility
 * Documentation: documentation/integrations/audible.md
 *
 * Shared runtime/duration text parser extracted from AudibleService.
 * Handles all i18n patterns (English, German, Spanish, French) via
 * language-specific regex patterns in LanguageConfig.
 */
 import type { LanguageConfig } from '../constants/language-config';
 /**
 * Parse runtime text (e.g. "12 hrs and 30 mins", "5 Std. 20 Min.")
 * into total minutes using language-specific patterns.
 *
 * @param runtimeText - Raw runtime string from Audible HTML
 * @param langConfig  - Language configuration with hour/minute regex patterns
 * @returns Total minutes, or undefined if no duration could be parsed
 */
 export function parseRuntime(runtimeText: string, langConfig: LanguageConfig): number | undefined {
  if (!runtimeText) return undefined;
  let totalMinutes = 0;
  // Try each hour pattern until one matches
  for (const pattern of langConfig.scraping.runtimeHourPatterns) {
    const match = runtimeText.match(pattern);
    if (match) {
      totalMinutes += parseInt(match[1]) * 60;
      break;
    }
  }
  // Try each minute pattern until one matches
  for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
    const match = runtimeText.match(pattern);
    if (match) {
      totalMinutes += parseInt(match[1]);
      break;
    }
  }
  return totalMinutes > 0 ? totalMinutes : undefined;
 }
@@ -47,6 +47,8 @@ export const createPrismaMock = () => ({
  bookDateSwipe: createModelMock(),
  goodreadsShelf: createModelMock(),
  goodreadsBookMapping: createModelMock(),
  work: createModelMock(),
  workAsin: createModelMock(),
  $queryRaw: vi.fn(),
  $disconnect: vi.fn(),
 });
@@ -0,0 +1,306 @@
 /**
 * Component: Works Service Tests
 * Documentation: documentation/integrations/audible.md
 */
 import { beforeEach, describe, expect, it, vi } from 'vitest';
 import { createPrismaMock } from '../helpers/prisma';
 import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
 const prismaMock = createPrismaMock();
 vi.mock('@/lib/db', () => ({
  prisma: prismaMock,
 }));
 vi.mock('@/lib/utils/logger', () => ({
  RMABLogger: {
    create: () => ({
      debug: vi.fn(),
      info: vi.fn(),
      warn: vi.fn(),
      error: vi.fn(),
    }),
  },
 }));
 describe('persistDedupGroups', () => {
  beforeEach(() => {
    vi.clearAllMocks();
    vi.resetModules();
  });
  it('creates new work + work_asins for a fresh group', async () => {
    prismaMock.workAsin.findMany.mockResolvedValue([]);
    prismaMock.work.create.mockResolvedValue({ id: 'work-1' });
    prismaMock.workAsin.create.mockResolvedValue({});
    prismaMock.workAsin.updateMany.mockResolvedValue({ count: 0 });
    const { persistDedupGroups } = await import('@/lib/services/works.service');
    const groups: DedupGroup[] = [{
      canonicalAsin: 'ASIN_A',
      allAsins: ['ASIN_A', 'ASIN_B'],
      title: 'Test Book',
      author: 'Test Author',
      narrator: 'Test Narrator',
      durationMinutes: 600,
    }];
    await persistDedupGroups(groups);
    expect(prismaMock.work.create).toHaveBeenCalledWith({
      data: { title: 'Test Book', author: 'Test Author' },
    });
    expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
    // Canonical ASIN should have narrator, duration, isCanonical=true
    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
      data: expect.objectContaining({
        workId: 'work-1',
        asin: 'ASIN_A',
        narrator: 'Test Narrator',
        durationMinutes: 600,
        isCanonical: true,
        source: 'dedup_auto',
      }),
    });
    // Non-canonical ASIN should have isCanonical=false
    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
      data: expect.objectContaining({
        workId: 'work-1',
        asin: 'ASIN_B',
        isCanonical: false,
        source: 'dedup_auto',
      }),
    });
  });
  it('adds new ASINs to existing work when canonical already exists', async () => {
    prismaMock.workAsin.findMany.mockResolvedValue([
      { asin: 'ASIN_A', workId: 'existing-work' },
    ]);
    prismaMock.workAsin.create.mockResolvedValue({});
    prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
    const { persistDedupGroups } = await import('@/lib/services/works.service');
    const groups: DedupGroup[] = [{
      canonicalAsin: 'ASIN_A',
      allAsins: ['ASIN_A', 'ASIN_B', 'ASIN_C'],
      title: 'Test Book',
      author: 'Test Author',
      narrator: 'Narrator',
      durationMinutes: 500,
    }];
    await persistDedupGroups(groups);
    // Should NOT create a new work
    expect(prismaMock.work.create).not.toHaveBeenCalled();
    // Should create entries for ASIN_B and ASIN_C only (ASIN_A already exists)
    expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
      data: expect.objectContaining({
        workId: 'existing-work',
        asin: 'ASIN_B',
      }),
    });
    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
      data: expect.objectContaining({
        workId: 'existing-work',
        asin: 'ASIN_C',
      }),
    });
  });
  it('merges two separate works when dedup groups them together', async () => {
    // ASIN_A is in work-1, ASIN_B is in work-2
    prismaMock.workAsin.findMany.mockResolvedValue([
      { asin: 'ASIN_A', workId: 'work-1' },
      { asin: 'ASIN_B', workId: 'work-2' },
    ]);
    prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
    prismaMock.work.deleteMany.mockResolvedValue({ count: 1 });
    const { persistDedupGroups } = await import('@/lib/services/works.service');
    const groups: DedupGroup[] = [{
      canonicalAsin: 'ASIN_A',
      allAsins: ['ASIN_A', 'ASIN_B'],
      title: 'Merged Book',
      author: 'Author',
    }];
    await persistDedupGroups(groups);
    // Should move work-2 ASINs to work-1
    expect(prismaMock.workAsin.updateMany).toHaveBeenCalledWith({
      where: { workId: { in: ['work-2'] } },
      data: { workId: 'work-1' },
    });
    // Should delete work-2
    expect(prismaMock.work.deleteMany).toHaveBeenCalledWith({
      where: { id: { in: ['work-2'] } },
    });
  });
  it('silently catches and logs errors without throwing', async () => {
    prismaMock.workAsin.findMany.mockRejectedValue(new Error('DB connection failed'));
    const { persistDedupGroups } = await import('@/lib/services/works.service');
    const groups: DedupGroup[] = [{
      canonicalAsin: 'ASIN_A',
      allAsins: ['ASIN_A', 'ASIN_B'],
      title: 'Test',
      author: 'Auth',
    }];
    // Should not throw
    await expect(persistDedupGroups(groups)).resolves.toBeUndefined();
  });
 });
 describe('seedAsin', () => {
  beforeEach(() => {
    vi.clearAllMocks();
    vi.resetModules();
  });
  it('creates single-ASIN work for new ASIN', async () => {
    prismaMock.workAsin.findUnique.mockResolvedValue(null);
    prismaMock.work.create.mockResolvedValue({ id: 'new-work' });
    prismaMock.workAsin.create.mockResolvedValue({});
    const { seedAsin } = await import('@/lib/services/works.service');
    await seedAsin('NEW_ASIN', 'New Book', 'Author', 'Narrator', 300);
    expect(prismaMock.work.create).toHaveBeenCalledWith({
      data: { title: 'New Book', author: 'Author' },
    });
    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
      data: {
        workId: 'new-work',
        asin: 'NEW_ASIN',
        narrator: 'Narrator',
        durationMinutes: 300,
        isCanonical: true,
        source: 'dedup_auto',
      },
    });
  });
  it('does nothing for already-tracked ASIN', async () => {
    prismaMock.workAsin.findUnique.mockResolvedValue({
      id: 'existing',
      asin: 'EXISTING_ASIN',
      workId: 'work-1',
    });
    const { seedAsin } = await import('@/lib/services/works.service');
    await seedAsin('EXISTING_ASIN', 'Book', 'Author');
    expect(prismaMock.work.create).not.toHaveBeenCalled();
    expect(prismaMock.workAsin.create).not.toHaveBeenCalled();
  });
  it('silently catches and logs errors without throwing', async () => {
    prismaMock.workAsin.findUnique.mockRejectedValue(new Error('DB error'));
    const { seedAsin } = await import('@/lib/services/works.service');
    await expect(seedAsin('ASIN', 'Book', 'Auth')).resolves.toBeUndefined();
  });
 });
 describe('getSiblingAsins', () => {
  beforeEach(() => {
    vi.clearAllMocks();
    vi.resetModules();
  });
  it('returns sibling ASINs correctly', async () => {
    // First query: find input ASINs and their work IDs
    prismaMock.workAsin.findMany
      .mockResolvedValueOnce([
        { asin: 'ASIN_A', workId: 'work-1' },
        { asin: 'ASIN_C', workId: 'work-2' },
      ])
      // Second query: all ASINs in those works
      .mockResolvedValueOnce([
        { asin: 'ASIN_A', workId: 'work-1' },
        { asin: 'ASIN_B', workId: 'work-1' },
        { asin: 'ASIN_C', workId: 'work-2' },
        { asin: 'ASIN_D', workId: 'work-2' },
        { asin: 'ASIN_E', workId: 'work-2' },
      ]);
    const { getSiblingAsins } = await import('@/lib/services/works.service');
    const result = await getSiblingAsins(['ASIN_A', 'ASIN_C']);
    expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
    expect(result.get('ASIN_C')).toEqual(['ASIN_D', 'ASIN_E']);
  });
  it('returns empty map for unknown ASINs', async () => {
    prismaMock.workAsin.findMany.mockResolvedValue([]);
    const { getSiblingAsins } = await import('@/lib/services/works.service');
    const result = await getSiblingAsins(['UNKNOWN']);
    expect(result.size).toBe(0);
  });
  it('returns empty map for empty input', async () => {
    const { getSiblingAsins } = await import('@/lib/services/works.service');
    const result = await getSiblingAsins([]);
    expect(result.size).toBe(0);
    // Should not query DB
    expect(prismaMock.workAsin.findMany).not.toHaveBeenCalled();
  });
  it('excludes the input ASIN itself from siblings', async () => {
    prismaMock.workAsin.findMany
      .mockResolvedValueOnce([
        { asin: 'ASIN_A', workId: 'work-1' },
      ])
      .mockResolvedValueOnce([
        { asin: 'ASIN_A', workId: 'work-1' },
        { asin: 'ASIN_B', workId: 'work-1' },
      ]);
    const { getSiblingAsins } = await import('@/lib/services/works.service');
    const result = await getSiblingAsins(['ASIN_A']);
    expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
    expect(result.get('ASIN_A')).not.toContain('ASIN_A');
  });
  it('omits ASINs with no siblings (single-ASIN works)', async () => {
    prismaMock.workAsin.findMany
      .mockResolvedValueOnce([
        { asin: 'ASIN_LONELY', workId: 'work-solo' },
      ])
      .mockResolvedValueOnce([
        { asin: 'ASIN_LONELY', workId: 'work-solo' },
      ]);
    const { getSiblingAsins } = await import('@/lib/services/works.service');
    const result = await getSiblingAsins(['ASIN_LONELY']);
    // No siblings means it shouldn't be in the map at all
    expect(result.has('ASIN_LONELY')).toBe(false);
  });
 });
@@ -0,0 +1,434 @@
 /**
 * Component: Audiobook Deduplication Tests
 * Documentation: documentation/integrations/audible.md
 */
 import { describe, expect, it } from 'vitest';
 import {
  deduplicateAudiobooks,
  deduplicateAndCollectGroups,
  normalizeTitle,
  areDurationsCompatible,
 } from '@/lib/utils/deduplicate-audiobooks';
 import type { AudibleAudiobook } from '@/lib/integrations/audible.service';
 // ---------------------------------------------------------------------------
 // Helper: minimal AudibleAudiobook factory
 // ---------------------------------------------------------------------------
 function makeBook(overrides: Partial<AudibleAudiobook> & { asin: string; title: string; author: string }): AudibleAudiobook {
  return {
    narrator: undefined,
    coverArtUrl: undefined,
    durationMinutes: undefined,
    rating: undefined,
    description: undefined,
    releaseDate: undefined,
    genres: undefined,
    series: undefined,
    seriesPart: undefined,
    seriesAsin: undefined,
    authorAsin: undefined,
    ...overrides,
  };
 }
 // ---------------------------------------------------------------------------
 // normalizeTitle
 // ---------------------------------------------------------------------------
 describe('normalizeTitle', () => {
  it('lowercases', () => {
    expect(normalizeTitle('The Black Prism')).toBe('the black prism');
  });
  it('strips (Unabridged)', () => {
    expect(normalizeTitle('The Black Prism (Unabridged)')).toBe('the black prism');
  });
  it('strips [Abridged Edition]', () => {
    expect(normalizeTitle('The Black Prism [Abridged Edition]')).toBe('the black prism');
  });
  it('strips (2024 Remastered Edition)', () => {
    expect(normalizeTitle('The Hobbit (2024 Remastered Edition)')).toBe('the hobbit');
  });
  it('strips subtitle after colon', () => {
    expect(normalizeTitle('The Black Prism: Lightbringer, Book 1')).toBe('the black prism');
  });
  it('strips subtitle after long dash', () => {
    expect(normalizeTitle('The Black Prism \u2014 A Lightbringer Novel')).toBe('the black prism');
  });
  it('strips trailing "A Novel"', () => {
    expect(normalizeTitle('The Black Prism: A Novel')).toBe('the black prism');
  });
  it('strips (Audiobook)', () => {
    expect(normalizeTitle('The Hobbit (Audiobook)')).toBe('the hobbit');
  });
  it('strips (Dramatized Adaptation)', () => {
    expect(normalizeTitle('The Black Prism (Dramatized Adaptation)')).toBe('the black prism');
  });
  it('strips (Full Cast Narration)', () => {
    expect(normalizeTitle('The Black Prism (Full Cast Narration)')).toBe('the black prism');
  });
  it('collapses whitespace', () => {
    expect(normalizeTitle('  The   Black   Prism  ')).toBe('the black prism');
  });
  it('handles empty string', () => {
    expect(normalizeTitle('')).toBe('');
  });
  it('preserves hyphenated words (not subtitles)', () => {
    // "well-known" has a short dash, not a subtitle separator
    expect(normalizeTitle('A Well-Known Book')).toBe('a well-known book');
  });
 });
 // ---------------------------------------------------------------------------
 // areDurationsCompatible
 // ---------------------------------------------------------------------------
 describe('areDurationsCompatible', () => {
  it('returns true when both undefined', () => {
    expect(areDurationsCompatible(undefined, undefined)).toBe(true);
  });
  it('returns true when one undefined', () => {
    expect(areDurationsCompatible(600, undefined)).toBe(true);
    expect(areDurationsCompatible(undefined, 600)).toBe(true);
  });
  it('returns true for identical durations', () => {
    expect(areDurationsCompatible(600, 600)).toBe(true);
  });
  it('uses 1% of longer duration as tolerance for long books', () => {
    // Two 40-hour books (2400 min): tolerance = max(2400*0.01, 5) = 24 min
    expect(areDurationsCompatible(2400, 2424)).toBe(true);  // exactly at tolerance
    expect(areDurationsCompatible(2400, 2425)).toBe(false); // just over
  });
  it('uses 5-minute minimum tolerance for short books', () => {
    // Two 2-hour books (120 min): tolerance = max(120*0.01, 5) = max(1.2, 5) = 5 min
    expect(areDurationsCompatible(120, 125)).toBe(true);  // exactly at 5-min minimum
    expect(areDurationsCompatible(120, 126)).toBe(false); // just over
  });
  it('keeps abridged vs unabridged separate (large duration gap)', () => {
    // Unabridged: 720 min (12 hrs), Abridged: 360 min (6 hrs)
    expect(areDurationsCompatible(720, 360)).toBe(false);
  });
  it('symmetry: order does not matter', () => {
    expect(areDurationsCompatible(2400, 2424)).toBe(true);
    expect(areDurationsCompatible(2424, 2400)).toBe(true);
    expect(areDurationsCompatible(120, 126)).toBe(false);
    expect(areDurationsCompatible(126, 120)).toBe(false);
  });
 });
 // ---------------------------------------------------------------------------
 // deduplicateAudiobooks
 // ---------------------------------------------------------------------------
 describe('deduplicateAudiobooks', () => {
  it('returns empty array for empty input', () => {
    expect(deduplicateAudiobooks([])).toEqual([]);
  });
  it('returns single book unchanged', () => {
    const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Author' });
    expect(deduplicateAudiobooks([book])).toEqual([book]);
  });
  it('passes through all-unique books unchanged', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
      makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
      makeBook({ asin: 'A3', title: 'Book Three', author: 'Auth', narrator: 'Nar B', durationMinutes: 700 }),
    ];
    expect(deduplicateAudiobooks(books)).toHaveLength(3);
  });
  it('collapses simple duplicates (same title + narrator + similar duration)', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(1);
  });
  it('keeps books with different narrators (different production)', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Full Cast', durationMinutes: 480 }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(2);
  });
  it('keeps abridged vs unabridged (same narrator, very different duration)', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
      makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(2);
  });
  it('collapses when one book has missing duration', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: undefined }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(1);
  });
  it('collapses when both books have missing duration', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(1);
  });
  it('collapses title variants with edition markers', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Black Prism (Unabridged)', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1258 }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(1);
  });
  it('collapses title variants with subtitles', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(1);
  });
  it('picks the representative with most metadata', () => {
    const sparse = makeBook({
      asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks',
      narrator: 'Simon Vance', durationMinutes: 1260,
    });
    const rich = makeBook({
      asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks',
      narrator: 'Simon Vance', durationMinutes: 1262,
      coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
    });
    const result = deduplicateAudiobooks([sparse, rich]);
    expect(result).toHaveLength(1);
    expect(result[0].asin).toBe('A2'); // rich entry wins
  });
  it('preserves original order (first-seen position)', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
      makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 400 }),
      makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
      makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
    ];
    const result = deduplicateAudiobooks(books);
    expect(result).toHaveLength(3);
    expect(result.map(b => b.title)).toEqual(['Alpha', 'Beta', 'Charlie']);
  });
  it('handles Lightbringer-style scenario: unabridged + dramatized', () => {
    // Simon Vance full narration (long)
    const vance1 = makeBook({
      asin: 'SV1', title: 'The Black Prism', author: 'Brent Weeks',
      narrator: 'Simon Vance', durationMinutes: 1260,
      coverArtUrl: 'cover1.jpg', rating: 4.7,
    });
    // Re-listed Simon Vance (same duration, different ASIN)
    const vance2 = makeBook({
      asin: 'SV2', title: 'The Black Prism: Lightbringer Book 1', author: 'Brent Weeks',
      narrator: 'Simon Vance', durationMinutes: 1262,
    });
    // Dramatized with full cast (shorter, different narrator)
    const drama = makeBook({
      asin: 'DR1', title: 'The Black Prism (Dramatized Adaptation)', author: 'Brent Weeks',
      narrator: 'Full Cast', durationMinutes: 480,
      coverArtUrl: 'cover-drama.jpg',
    });
    const result = deduplicateAudiobooks([vance1, vance2, drama]);
    expect(result).toHaveLength(2);
    // Simon Vance should collapse to 1, Full Cast stays
    expect(result.find(b => b.narrator === 'Simon Vance')).toBeTruthy();
    expect(result.find(b => b.narrator === 'Full Cast')).toBeTruthy();
    // Should pick the richer entry for Simon Vance
    const svResult = result.find(b => b.narrator === 'Simon Vance')!;
    expect(svResult.asin).toBe('SV1'); // has cover + rating
  });
  it('uses percentage tolerance for very long audiobooks', () => {
    // Two 40-hour books: tolerance = max(2400*0.01, 5) = 24 min
    const books = [
      makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
      makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2420 }),
    ];
    expect(deduplicateAudiobooks(books)).toHaveLength(1);
    // Beyond tolerance
    const booksFar = [
      makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
      makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2430 }),
    ];
    expect(deduplicateAudiobooks(booksFar)).toHaveLength(2);
  });
  it('treats missing narrator as its own group', () => {
    // Two entries with same title but no narrator - should collapse
    const books = [
      makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
      makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 302 }),
    ];
    expect(deduplicateAudiobooks(books)).toHaveLength(1);
  });
  it('does not collapse empty-narrator with named narrator', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
      makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'John Smith', durationMinutes: 302 }),
    ];
    expect(deduplicateAudiobooks(books)).toHaveLength(2);
  });
 });
 // ---------------------------------------------------------------------------
 // deduplicateAndCollectGroups
 // ---------------------------------------------------------------------------
 describe('deduplicateAndCollectGroups', () => {
  it('returns empty groups array when no duplicates', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
      makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
    ];
    const { books: result, groups } = deduplicateAndCollectGroups(books);
    expect(result).toHaveLength(2);
    expect(groups).toHaveLength(0);
  });
  it('returns empty groups for empty input', () => {
    const { books: result, groups } = deduplicateAndCollectGroups([]);
    expect(result).toHaveLength(0);
    expect(groups).toHaveLength(0);
  });
  it('returns empty groups for single book', () => {
    const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Auth' });
    const { books: result, groups } = deduplicateAndCollectGroups([book]);
    expect(result).toHaveLength(1);
    expect(groups).toHaveLength(0);
  });
  it('returns group with 2 ASINs when 2 books match', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
    ];
    const { books: result, groups } = deduplicateAndCollectGroups(books);
    expect(result).toHaveLength(1);
    expect(groups).toHaveLength(1);
    expect(groups[0].allAsins).toHaveLength(2);
    expect(groups[0].allAsins).toContain('A1');
    expect(groups[0].allAsins).toContain('A2');
  });
  it('returns group with 3+ ASINs for multi-duplicate scenario', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
      makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 662 }),
      makeBook({ asin: 'A3', title: 'The Hobbit (Unabridged)', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 658 }),
    ];
    const { books: result, groups } = deduplicateAndCollectGroups(books);
    expect(result).toHaveLength(1);
    expect(groups).toHaveLength(1);
    expect(groups[0].allAsins).toHaveLength(3);
    expect(groups[0].allAsins).toContain('A1');
    expect(groups[0].allAsins).toContain('A2');
    expect(groups[0].allAsins).toContain('A3');
  });
  it('canonicalAsin is the one with highest metadata score', () => {
    const sparse = makeBook({
      asin: 'SPARSE', title: 'The Black Prism', author: 'Brent Weeks',
      narrator: 'Simon Vance', durationMinutes: 1260,
    });
    const rich = makeBook({
      asin: 'RICH', title: 'The Black Prism', author: 'Brent Weeks',
      narrator: 'Simon Vance', durationMinutes: 1262,
      coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
    });
    const { groups } = deduplicateAndCollectGroups([sparse, rich]);
    expect(groups).toHaveLength(1);
    expect(groups[0].canonicalAsin).toBe('RICH');
  });
  it('groups only include entries with 2+ ASINs', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
      makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
      makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
    ];
    const { groups } = deduplicateAndCollectGroups(books);
    // Only Alpha group should appear (Beta is a singleton)
    expect(groups).toHaveLength(1);
    expect(groups[0].allAsins).toContain('A1');
    expect(groups[0].allAsins).toContain('A2');
  });
  it('duration-incompatible books produce separate entries (no group for singletons)', () => {
    // Same title/narrator but very different durations (abridged vs unabridged)
    const books = [
      makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
      makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
    ];
    const { books: result, groups } = deduplicateAndCollectGroups(books);
    expect(result).toHaveLength(2); // Not collapsed
    expect(groups).toHaveLength(0); // No multi-ASIN groups
  });
  it('books field matches what deduplicateAudiobooks returns', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300, coverArtUrl: 'img.jpg', rating: 4.5 }),
      makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
      makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
      makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 600 }),
      makeBook({ asin: 'C2', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 601 }),
    ];
    const dedupOnly = deduplicateAudiobooks(books);
    const { books: withGroups } = deduplicateAndCollectGroups(books);
    expect(withGroups.map(b => b.asin)).toEqual(dedupOnly.map(b => b.asin));
  });
  it('includes narrator and durationMinutes from canonical entry in group', () => {
    const books = [
      makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 480 }),
      makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 482, coverArtUrl: 'img.jpg', rating: 4.0 }),
    ];
    const { groups } = deduplicateAndCollectGroups(books);
    expect(groups).toHaveLength(1);
    expect(groups[0].canonicalAsin).toBe('A2'); // richer metadata
    expect(groups[0].narrator).toBe('Jane Doe');
    expect(groups[0].durationMinutes).toBe(482);
    expect(groups[0].author).toBe('Auth');
  });
 });