Add works table and ASIN deduping

Add persistent cross-ASIN "works" mapping and client-side deduplication to improve library matching. Introduces a Prisma migration and models (Work, WorkAsin) plus src/lib/services/works.service for persisting dedup groups, seeding ASINs at request time, and sibling lookup. Adds a deduplication utility (deduplicate-audiobooks) that normalizes titles/narrators, compares durations, and returns grouping metadata; API routes (search, author, series) now deduplicate results before enrichment and fire-and-forget persist groups. Adds sibling-ASIN expansion into audiobook matcher and expands getAvailableAsins accordingly. Extracts runtime parsing into a shared parse-runtime util and updates audible scrapers/services to use it. Includes unit tests for dedup logic and works service and updates test Prisma mocks.
2026-07-17 18:21:08 +00:00 · 2026-03-03 13:31:46 -05:00
parent ff80d995c5
commit 610873af6b
15 changed files with 1446 additions and 32 deletions
@@ -0,0 +1,42 @@
+-- CreateTable
+CREATE TABLE "works" (
+    "id" TEXT NOT NULL,
+    "title" TEXT NOT NULL,
+    "author" TEXT NOT NULL,
+    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+    "updated_at" TIMESTAMP(3) NOT NULL,
+
+    CONSTRAINT "works_pkey" PRIMARY KEY ("id")
+);
+
+-- CreateTable
+CREATE TABLE "work_asins" (
+    "id" TEXT NOT NULL,
+    "work_id" TEXT NOT NULL,
+    "asin" TEXT NOT NULL,
+    "narrator" TEXT,
+    "duration_minutes" INTEGER,
+    "is_canonical" BOOLEAN NOT NULL DEFAULT false,
+    "source" TEXT NOT NULL,
+    "created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
+
+    CONSTRAINT "work_asins_pkey" PRIMARY KEY ("id")
+);
+
+-- CreateIndex
+CREATE INDEX "works_title_idx" ON "works"("title");
+
+-- CreateIndex
+CREATE INDEX "works_author_idx" ON "works"("author");
+
+-- CreateIndex
+CREATE UNIQUE INDEX "work_asins_asin_key" ON "work_asins"("asin");
+
+-- CreateIndex
+CREATE INDEX "work_asins_work_id_idx" ON "work_asins"("work_id");
+
+-- CreateIndex
+CREATE INDEX "work_asins_asin_idx" ON "work_asins"("asin");
+
+-- AddForeignKey
+ALTER TABLE "work_asins" ADD CONSTRAINT "work_asins_work_id_fkey" FOREIGN KEY ("work_id") REFERENCES "works"("id") ON DELETE CASCADE ON UPDATE CASCADE;
@@ -531,3 +531,43 @@ model GoodreadsBookMapping {
  @@index([audibleAsin])
  @@map("goodreads_book_mappings")
 }
+
+// ============================================================================
+// WORKS TABLE
+// Cross-ASIN audiobook identity mapping — links multiple Audible ASINs
+// to a single logical work for library matching across editions.
+// Documentation: documentation/integrations/audible.md
+// ============================================================================
+
+model Work {
+  id        String     @id @default(uuid())
+  title     String
+  author    String
+  createdAt DateTime   @default(now()) @map("created_at")
+  updatedAt DateTime   @updatedAt @map("updated_at")
+
+  // Relations
+  asins WorkAsin[]
+
+  @@index([title])
+  @@index([author])
+  @@map("works")
+}
+
+model WorkAsin {
+  id              String   @id @default(uuid())
+  workId          String   @map("work_id")
+  asin            String   @unique
+  narrator        String?
+  durationMinutes Int?     @map("duration_minutes")
+  isCanonical     Boolean  @default(false) @map("is_canonical")
+  source          String   // 'dedup_auto' | 'admin_manual'
+  createdAt       DateTime @default(now()) @map("created_at")
+
+  // Relations
+  work Work @relation(fields: [workId], references: [id], onDelete: Cascade)
+
+  @@index([workId])
+  @@index([asin])
+  @@map("work_asins")
+}
@@ -6,6 +6,8 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
+import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
+import { persistDedupGroups } from '@/lib/services/works.service';
 import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';

@@ -38,14 +40,22 @@ export async function GET(request: NextRequest) {
    const currentUser = getCurrentUser(request);
    const userId = currentUser?.sub || undefined;

+    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
+    const { books: dedupedResults, groups } = deduplicateAndCollectGroups(results.results);
+
+    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
+    if (groups.length > 0) {
+      persistDedupGroups(groups).catch(() => {});
+    }
+
    // Enrich search results with availability and request status information
-    const enrichedResults = await enrichAudiobooksWithMatches(results.results, userId);
+    const enrichedResults = await enrichAudiobooksWithMatches(dedupedResults, userId);

    return NextResponse.json({
      success: true,
      query: results.query,
      results: enrichedResults,
-      totalResults: results.totalResults,
+      totalResults: enrichedResults.length,
      page: results.page,
      hasMore: results.hasMore,
    });
@@ -6,6 +6,8 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
+import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
+import { persistDedupGroups } from '@/lib/services/works.service';
 import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';

@@ -53,9 +55,17 @@ export async function GET(
    const audibleService = getAudibleService();
    const result = await audibleService.searchByAuthorAsin(authorName.trim(), asin, page);

+    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
+    const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(result.books);
+
+    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
+    if (groups.length > 0) {
+      persistDedupGroups(groups).catch(() => {});
+    }
+
    // Enrich with library availability and request status
    const userId = currentUser.sub || undefined;
-    const enrichedBooks = await enrichAudiobooksWithMatches(result.books, userId);
+    const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);

    logger.info(`Author books complete: "${authorName}" → ${enrichedBooks.length} books (page ${page})`);

@@ -64,7 +74,7 @@ export async function GET(
      books: enrichedBooks,
      authorName: authorName.trim(),
      authorAsin: asin,
-      totalBooks: result.totalResults || enrichedBooks.length,
+      totalBooks: enrichedBooks.length,
      hasMore: result.hasMore,
      page: result.page,
    });
@@ -8,6 +8,8 @@ import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';
 import { scrapeSeriesPage } from '@/lib/integrations/audible-series';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
+import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
+import { persistDedupGroups } from '@/lib/services/works.service';

 const logger = RMABLogger.create('API.Series.Detail');

@@ -49,9 +51,17 @@ export async function GET(
      );
    }

+    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
+    const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(detail.books);
+
+    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
+    if (groups.length > 0) {
+      persistDedupGroups(groups).catch(() => {});
+    }
+
    // Enrich books with library availability and request status
    const userId = currentUser.sub || undefined;
-    const enrichedBooks = await enrichAudiobooksWithMatches(detail.books, userId);
+    const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);

    logger.info(`Series detail complete: "${detail.title}" (${enrichedBooks.length} books, page ${page})`);

@@ -14,8 +14,10 @@ import {
  getLanguageForRegion,
  buildContainsSelector,
  stripPrefixes,
+  type LanguageConfig,
 } from '../constants/language-config';
 import { RMABLogger } from '../utils/logger';
+import { parseRuntime } from '../utils/parse-runtime';
 import { randomDelay } from '../utils/scrape-resilience';

 const logger = RMABLogger.create('Audible.Series');
@@ -311,7 +313,7 @@ export async function scrapeSeriesPage(asin: string, page: number = 1): Promise<
      undefined;

    // Parse all books from the series page
-    const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes);
+    const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes, langConfig);

    // Use actual book count if we got more from scraping
    const bookCount = Math.max(summary.bookCount, books.length);
@@ -403,7 +405,8 @@ function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCoun
 function parseSeriesBooks(
  $: cheerio.CheerioAPI,
  authorPrefixes: string[],
-  narratorPrefixes: string[]
+  narratorPrefixes: string[],
+  langConfig: LanguageConfig
 ): AudibleAudiobook[] {
  const books: AudibleAudiobook[] = [];
  const seenAsins = new Set<string>();
@@ -453,6 +456,11 @@ function parseSeriesBooks(
    const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
    const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;

+    // Duration
+    const runtimeText = $el.find('.runtimeLabel').text().trim() ||
+      $el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
+    const durationMinutes = parseRuntime(runtimeText, langConfig);
+
    books.push({
      asin: bookAsin,
      title,
@@ -461,6 +469,7 @@ function parseSeriesBooks(
      narrator: stripPrefixes(narratorText, narratorPrefixes),
      coverArtUrl,
      rating,
+      durationMinutes,
    });
  });

@@ -23,6 +23,7 @@ import {
  AdaptivePacer,
  FetchResultMeta,
 } from '../utils/scrape-resilience';
+import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime';

 // Module-level logger
 const logger = RMABLogger.create('Audible');
@@ -1134,33 +1135,11 @@ export class AudibleService {
  }

  /**
-   * Parse runtime text to minutes using language-specific patterns
+   * Parse runtime text to minutes using language-specific patterns.
+   * Delegates to shared utility in src/lib/utils/parse-runtime.ts.
   */
  private parseRuntime(runtimeText: string): number | undefined {
-    if (!runtimeText) return undefined;
-
-    const langConfig = this.getLangConfig();
-    let totalMinutes = 0;
-
-    // Try each hour pattern until one matches
-    for (const pattern of langConfig.scraping.runtimeHourPatterns) {
-      const match = runtimeText.match(pattern);
-      if (match) {
-        totalMinutes += parseInt(match[1]) * 60;
-        break;
-      }
-    }
-
-    // Try each minute pattern until one matches
-    for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
-      const match = runtimeText.match(pattern);
-      if (match) {
-        totalMinutes += parseInt(match[1]);
-        break;
-      }
-    }
-
-    return totalMinutes > 0 ? totalMinutes : undefined;
+    return parseRuntimeUtil(runtimeText, this.getLangConfig());
  }

  /**
@@ -12,6 +12,7 @@ import { getJobQueueService } from '@/lib/services/job-queue.service';
 import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { RMABLogger } from '@/lib/utils/logger';
+import { seedAsin } from '@/lib/services/works.service';

 const logger = RMABLogger.create('RequestCreator');

@@ -147,6 +148,15 @@ export async function createRequestForUser(
    }
  }

+  // Seed works table for cross-ASIN matching (Layer 2: request-time seeding)
+  seedAsin(
+    audiobook.asin,
+    audiobookRecord.title,
+    audiobookRecord.author,
+    audiobookRecord.narrator || undefined,
+    undefined // duration not available at request time
+  ).catch(() => {});
+
  // Check if user already has an active request for this audiobook
  const existingRequest = await prisma.request.findFirst({
    where: {
@@ -0,0 +1,248 @@
+/**
+ * Component: Works Service
+ * Documentation: documentation/integrations/audible.md
+ *
+ * Manages the works table — persistent cross-ASIN audiobook identity mapping.
+ * Layer 1: Auto-populated from dedup logic when users browse search/author/series pages.
+ * Layer 2: Seeded at request time to ensure requested ASINs are tracked.
+ */
+
+import { prisma } from '@/lib/db';
+import { RMABLogger } from '@/lib/utils/logger';
+import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
+
+const logger = RMABLogger.create('WorksService');
+
+// ---------------------------------------------------------------------------
+// Layer 1: Persist dedup groups (fire-and-forget from API routes)
+// ---------------------------------------------------------------------------
+
+/**
+ * Persist dedup groups to the works table. For each group of 2+ ASINs that
+ * were identified as the same audiobook, create or update a Work record
+ * linking all ASINs together.
+ *
+ * Safe to call fire-and-forget — never throws.
+ */
+export async function persistDedupGroups(groups: DedupGroup[]): Promise<void> {
+  try {
+    for (const group of groups) {
+      await persistSingleGroup(group);
+    }
+  } catch (error) {
+    logger.error('Failed to persist dedup groups', {
+      error: error instanceof Error ? error.message : String(error),
+      groupCount: groups.length,
+    });
+  }
+}
+
+/**
+ * Persist a single dedup group. Handles merging when ASINs span multiple
+ * existing works.
+ */
+async function persistSingleGroup(group: DedupGroup): Promise<void> {
+  const { canonicalAsin, allAsins, title, author, narrator, durationMinutes } = group;
+
+  // Find which of these ASINs already exist in work_asins
+  const existingEntries = await prisma.workAsin.findMany({
+    where: { asin: { in: allAsins } },
+    select: { asin: true, workId: true },
+  });
+
+  // Collect unique work IDs that already contain any of our ASINs
+  const existingWorkIds = [...new Set(existingEntries.map(e => e.workId))];
+  const existingAsinSet = new Set(existingEntries.map(e => e.asin));
+
+  if (existingWorkIds.length === 0) {
+    // No existing works — create a new one with all ASINs
+    const work = await prisma.work.create({
+      data: { title, author },
+    });
+
+    await Promise.all(
+      allAsins.map(asin =>
+        prisma.workAsin.create({
+          data: {
+            workId: work.id,
+            asin,
+            narrator: asin === canonicalAsin ? narrator : undefined,
+            durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
+            isCanonical: asin === canonicalAsin,
+            source: 'dedup_auto',
+          },
+        })
+      )
+    );
+
+    logger.debug('Created new work', { workId: work.id, asinCount: allAsins.length });
+  } else {
+    // Use the first existing work as the target
+    const targetWorkId = existingWorkIds[0];
+
+    // If multiple existing works, merge them into the target
+    if (existingWorkIds.length > 1) {
+      const mergeWorkIds = existingWorkIds.slice(1);
+
+      // Move all ASINs from other works to the target
+      await prisma.workAsin.updateMany({
+        where: { workId: { in: mergeWorkIds } },
+        data: { workId: targetWorkId },
+      });
+
+      // Delete the now-empty works
+      await prisma.work.deleteMany({
+        where: { id: { in: mergeWorkIds } },
+      });
+
+      logger.debug('Merged works', {
+        targetWorkId,
+        mergedWorkIds: mergeWorkIds,
+      });
+    }
+
+    // Add any new ASINs that don't already exist
+    const newAsins = allAsins.filter(a => !existingAsinSet.has(a));
+    if (newAsins.length > 0) {
+      await Promise.all(
+        newAsins.map(asin =>
+          prisma.workAsin.create({
+            data: {
+              workId: targetWorkId,
+              asin,
+              narrator: asin === canonicalAsin ? narrator : undefined,
+              durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
+              isCanonical: asin === canonicalAsin,
+              source: 'dedup_auto',
+            },
+          })
+        )
+      );
+
+      logger.debug('Added ASINs to existing work', {
+        workId: targetWorkId,
+        newAsinCount: newAsins.length,
+      });
+    }
+
+    // Update canonical status: ensure the canonical ASIN is marked
+    await prisma.workAsin.updateMany({
+      where: { workId: targetWorkId, asin: canonicalAsin },
+      data: { isCanonical: true },
+    });
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Layer 2: Seed ASIN at request time
+// ---------------------------------------------------------------------------
+
+/**
+ * Ensure an ASIN is tracked in the works table. Creates a single-ASIN work
+ * if the ASIN isn't already present. Called at request creation time.
+ *
+ * Safe to call fire-and-forget — never throws.
+ */
+export async function seedAsin(
+  asin: string,
+  title: string,
+  author: string,
+  narrator?: string,
+  durationMinutes?: number
+): Promise<void> {
+  try {
+    // Check if ASIN already tracked
+    const existing = await prisma.workAsin.findUnique({
+      where: { asin },
+    });
+    if (existing) return;
+
+    // Create a new single-ASIN work
+    const work = await prisma.work.create({
+      data: { title, author },
+    });
+
+    await prisma.workAsin.create({
+      data: {
+        workId: work.id,
+        asin,
+        narrator,
+        durationMinutes,
+        isCanonical: true,
+        source: 'dedup_auto',
+      },
+    });
+
+    logger.debug('Seeded ASIN', { workId: work.id, asin });
+  } catch (error) {
+    logger.error('Failed to seed ASIN', {
+      error: error instanceof Error ? error.message : String(error),
+      asin,
+    });
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Sibling ASIN lookup (for library matching expansion)
+// ---------------------------------------------------------------------------
+
+/**
+ * Given a list of ASINs, return a map of each input ASIN to its sibling ASINs
+ * (other ASINs in the same work, NOT including the input ASIN itself).
+ *
+ * ASINs not found in the works table are simply omitted from the result.
+ */
+export async function getSiblingAsins(
+  asins: string[]
+): Promise<Map<string, string[]>> {
+  const result = new Map<string, string[]>();
+  if (asins.length === 0) return result;
+
+  // Step 1: Find which input ASINs are in work_asins and their work IDs
+  const inputEntries = await prisma.workAsin.findMany({
+    where: { asin: { in: asins } },
+    select: { asin: true, workId: true },
+  });
+
+  if (inputEntries.length === 0) return result;
+
+  // Build map of workId -> input ASINs in that work
+  const workIdToInputAsins = new Map<string, string[]>();
+  for (const entry of inputEntries) {
+    const list = workIdToInputAsins.get(entry.workId);
+    if (list) {
+      list.push(entry.asin);
+    } else {
+      workIdToInputAsins.set(entry.workId, [entry.asin]);
+    }
+  }
+
+  // Step 2: Get ALL ASINs in those works
+  const workIds = [...workIdToInputAsins.keys()];
+  const allWorkAsins = await prisma.workAsin.findMany({
+    where: { workId: { in: workIds } },
+    select: { asin: true, workId: true },
+  });
+
+  // Build map of workId -> all ASINs
+  const workIdToAllAsins = new Map<string, string[]>();
+  for (const entry of allWorkAsins) {
+    const list = workIdToAllAsins.get(entry.workId);
+    if (list) {
+      list.push(entry.asin);
+    } else {
+      workIdToAllAsins.set(entry.workId, [entry.asin]);
+    }
+  }
+
+  // Step 3: For each input ASIN, compute siblings (all ASINs in same work minus self)
+  for (const entry of inputEntries) {
+    const allInWork = workIdToAllAsins.get(entry.workId) || [];
+    const siblings = allInWork.filter(a => a !== entry.asin);
+    if (siblings.length > 0) {
+      result.set(entry.asin, siblings);
+    }
+  }
+
+  return result;
+}
@@ -8,6 +8,7 @@

 import { prisma } from '@/lib/db';
 import { LibraryItem } from '@/lib/services/library';
+import { getSiblingAsins } from '@/lib/services/works.service';
 import { RMABLogger } from './logger';

 // Module-level logger
@@ -178,6 +179,61 @@ export async function enrichAudiobooksWithMatches(
    }
  }

+  // Works-table sibling expansion: check if unmatched ASINs have siblings in the library
+  try {
+    const unmatchedAsins = results.filter(r => !r.isAvailable).map(r => r.asin);
+    if (unmatchedAsins.length > 0) {
+      const siblingMap = await getSiblingAsins(unmatchedAsins);
+      if (siblingMap.size > 0) {
+        // Collect all sibling ASINs for a single batch library query
+        const allSiblingAsins = new Set<string>();
+        for (const siblings of siblingMap.values()) {
+          for (const s of siblings) allSiblingAsins.add(s);
+        }
+
+        if (allSiblingAsins.size > 0) {
+          const siblingLibraryMatches = await prisma.plexLibrary.findMany({
+            where: { asin: { in: [...allSiblingAsins] } },
+            select: { asin: true, plexGuid: true },
+          });
+          const libraryAsinSet = new Set(
+            siblingLibraryMatches.filter(m => m.asin).map(m => m.asin!.toLowerCase())
+          );
+
+          // Update results where a sibling ASIN is found in the library
+          for (const result of results) {
+            if (result.isAvailable) continue;
+            const siblings = siblingMap.get(result.asin);
+            if (!siblings) continue;
+            const matchedSiblingAsin = siblings.find(s => libraryAsinSet.has(s.toLowerCase()));
+            if (matchedSiblingAsin) {
+              const libMatch = siblingLibraryMatches.find(
+                m => m.asin?.toLowerCase() === matchedSiblingAsin.toLowerCase()
+              );
+              (result as any).isAvailable = true;
+              (result as any).plexGuid = libMatch?.plexGuid || null;
+            }
+          }
+
+          const siblingMatchCount = results.filter(r => {
+            if (!r.isAvailable) return false;
+            return siblingMap.has(r.asin);
+          }).length;
+          logger.debug('Sibling expansion', {
+            unmatchedCount: unmatchedAsins.length,
+            siblingGroupsFound: siblingMap.size,
+            siblingMatches: siblingMatchCount,
+          });
+        }
+      }
+    }
+  } catch (error) {
+    // Works table expansion is best-effort — direct matches still work
+    logger.error('Sibling ASIN expansion failed', {
+      error: error instanceof Error ? error.message : String(error),
+    });
+  }
+
  // Always enrich with request status (check ANY user's requests)
  const asins = audiobooks.map(book => book.asin);

@@ -307,6 +363,19 @@ export async function getAvailableAsins(): Promise<Set<string>> {
  for (const item of completedRequests) {
    if (item.audibleAsin) asins.add(item.audibleAsin);
  }
+
+  // Expand with works-table sibling ASINs
+  try {
+    if (asins.size > 0) {
+      const siblingMap = await getSiblingAsins([...asins]);
+      for (const siblings of siblingMap.values()) {
+        for (const s of siblings) asins.add(s);
+      }
+    }
+  } catch {
+    // Works table expansion is best-effort
+  }
+
  return asins;
 }

@@ -0,0 +1,201 @@
+/**
+ * Component: Audiobook Deduplication Utility
+ * Documentation: documentation/integrations/audible.md
+ *
+ * Deduplicates audiobook listings that represent the same recording
+ * under different ASINs (publisher re-listings, rights transfers, etc.).
+ *
+ * Dedup key: normalized title + normalized narrator
+ * Duration tolerance: max(longerDuration * 0.01, 5) minutes
+ * Missing duration treated as compatible (graceful degradation).
+ */
+
+import type { AudibleAudiobook } from '../integrations/audible.service';
+
+// ---------------------------------------------------------------------------
+// Title / narrator normalization
+// ---------------------------------------------------------------------------
+
+/** Patterns in parentheses or brackets to strip (edition markers, format labels) */
+const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi;
+
+/** Trailing subtitle after colon or long dash */
+const SUBTITLE_RE = /\s*[:]\s+.+$/;
+const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/;
+
+/** Trailing descriptors like "A Novel", "A Memoir" */
+const TRAILING_DESCRIPTOR_RE = /\s*[-:,]?\s+a\s+(novel|memoir|thriller|mystery|romance|story|tale|novella)\s*$/i;
+
+/**
+ * Normalize a title for dedup comparison.
+ * Strips subtitles, edition markers, and trailing descriptors.
+ */
+export function normalizeTitle(title: string): string {
+  let t = title.toLowerCase();
+  // Remove parenthesized/bracketed edition markers
+  t = t.replace(EDITION_PAREN_RE, '');
+  // Remove trailing descriptors before subtitle stripping
+  t = t.replace(TRAILING_DESCRIPTOR_RE, '');
+  // Remove subtitle after colon
+  t = t.replace(SUBTITLE_RE, '');
+  // Remove subtitle after long dash (but not short hyphenated words)
+  t = t.replace(LONG_DASH_SUBTITLE_RE, '');
+  // Collapse whitespace and trim
+  return t.replace(/\s+/g, ' ').trim();
+}
+
+/** Normalize narrator for comparison. */
+function normalizeNarrator(narrator?: string): string {
+  return (narrator || '').toLowerCase().trim();
+}
+
+// ---------------------------------------------------------------------------
+// Duration compatibility
+// ---------------------------------------------------------------------------
+
+/**
+ * Check if two durations are compatible (represent the same recording).
+ * Tolerance: max(longerDuration * 0.01, 5) minutes.
+ * Missing duration on either side is treated as compatible.
+ */
+export function areDurationsCompatible(a?: number, b?: number): boolean {
+  if (a == null || b == null) return true;
+  const longer = Math.max(a, b);
+  const tolerance = Math.max(longer * 0.01, 5);
+  return Math.abs(a - b) <= tolerance;
+}
+
+// ---------------------------------------------------------------------------
+// Metadata scoring (for picking best representative)
+// ---------------------------------------------------------------------------
+
+function metadataScore(book: AudibleAudiobook): number {
+  let score = 0;
+  if (book.coverArtUrl) score++;
+  if (book.rating != null) score++;
+  if (book.durationMinutes != null) score++;
+  if (book.description) score++;
+  if (book.narrator) score++;
+  if (book.releaseDate) score++;
+  if (book.genres && book.genres.length > 0) score++;
+  return score;
+}
+
+// ---------------------------------------------------------------------------
+// Dedup group types (for works-table persistence)
+// ---------------------------------------------------------------------------
+
+/** Metadata about a group of ASINs that were collapsed during dedup. */
+export interface DedupGroup {
+  canonicalAsin: string;     // ASIN of the "winner" (best metadata score)
+  allAsins: string[];        // All ASINs in this group (including canonical)
+  title: string;             // Author from the canonical entry
+  author: string;            // Author from the canonical entry
+  narrator?: string;         // Narrator from the canonical entry
+  durationMinutes?: number;  // Duration from the canonical entry
+}
+
+/** Result of deduplication with group collection. */
+export interface DeduplicateResult {
+  books: AudibleAudiobook[];  // The deduped list (same as deduplicateAudiobooks returns)
+  groups: DedupGroup[];       // Groups where 2+ ASINs were collapsed
+}
+
+// ---------------------------------------------------------------------------
+// Main dedup functions
+// ---------------------------------------------------------------------------
+
+/**
+ * Deduplicate audiobook listings by normalized title + narrator + duration.
+ *
+ * Same narrator + compatible duration + similar title = same recording -> collapse.
+ * Different narrator = different production -> keep both.
+ * Duration outside tolerance = different content (abridged vs unabridged) -> keep both.
+ *
+ * Preserves original ordering (position of first appearance).
+ */
+export function deduplicateAudiobooks(books: AudibleAudiobook[]): AudibleAudiobook[] {
+  return deduplicateAndCollectGroups(books).books;
+}
+
+/**
+ * Deduplicate audiobooks AND return grouping metadata for works-table persistence.
+ * Returns both the deduped list and the groups where 2+ ASINs were collapsed.
+ */
+export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): DeduplicateResult {
+  if (books.length <= 1) return { books: [...books], groups: [] };
+
+  // Group by normalized title + narrator
+  const titleNarratorGroups = new Map<string, AudibleAudiobook[]>();
+  const insertionOrder: string[] = [];
+
+  for (const book of books) {
+    const key = `${normalizeTitle(book.title)}|||${normalizeNarrator(book.narrator)}`;
+    const group = titleNarratorGroups.get(key);
+    if (group) {
+      group.push(book);
+    } else {
+      titleNarratorGroups.set(key, [book]);
+      insertionOrder.push(key);
+    }
+  }
+
+  const result: AudibleAudiobook[] = [];
+  const dedupGroups: DedupGroup[] = [];
+
+  for (const key of insertionOrder) {
+    const group = titleNarratorGroups.get(key)!;
+    if (group.length === 1) {
+      result.push(group[0]);
+      continue;
+    }
+
+    // Within a title+narrator group, further split by duration compatibility.
+    // Build sub-groups where all members are duration-compatible with the
+    // representative (first member). A book joins the first compatible sub-group.
+    const subGroups: AudibleAudiobook[][] = [];
+
+    for (const book of group) {
+      let placed = false;
+      for (const sg of subGroups) {
+        // Check compatibility against the representative (first member)
+        if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) {
+          sg.push(book);
+          placed = true;
+          break;
+        }
+      }
+      if (!placed) {
+        subGroups.push([book]);
+      }
+    }
+
+    // From each sub-group, pick the best representative and collect group metadata
+    for (const sg of subGroups) {
+      let best = sg[0];
+      let bestScore = metadataScore(best);
+      for (let i = 1; i < sg.length; i++) {
+        const score = metadataScore(sg[i]);
+        if (score > bestScore) {
+          best = sg[i];
+          bestScore = score;
+        }
+      }
+      result.push(best);
+
+      // Collect group metadata for works-table persistence (only multi-ASIN groups)
+      if (sg.length >= 2) {
+        dedupGroups.push({
+          canonicalAsin: best.asin,
+          allAsins: sg.map(b => b.asin),
+          title: best.title,
+          author: best.author,
+          narrator: best.narrator,
+          durationMinutes: best.durationMinutes,
+        });
+      }
+    }
+  }
+
+  return { books: result, groups: dedupGroups };
+}
@@ -0,0 +1,44 @@
+/**
+ * Component: Runtime Parsing Utility
+ * Documentation: documentation/integrations/audible.md
+ *
+ * Shared runtime/duration text parser extracted from AudibleService.
+ * Handles all i18n patterns (English, German, Spanish, French) via
+ * language-specific regex patterns in LanguageConfig.
+ */
+
+import type { LanguageConfig } from '../constants/language-config';
+
+/**
+ * Parse runtime text (e.g. "12 hrs and 30 mins", "5 Std. 20 Min.")
+ * into total minutes using language-specific patterns.
+ *
+ * @param runtimeText - Raw runtime string from Audible HTML
+ * @param langConfig  - Language configuration with hour/minute regex patterns
+ * @returns Total minutes, or undefined if no duration could be parsed
+ */
+export function parseRuntime(runtimeText: string, langConfig: LanguageConfig): number | undefined {
+  if (!runtimeText) return undefined;
+
+  let totalMinutes = 0;
+
+  // Try each hour pattern until one matches
+  for (const pattern of langConfig.scraping.runtimeHourPatterns) {
+    const match = runtimeText.match(pattern);
+    if (match) {
+      totalMinutes += parseInt(match[1]) * 60;
+      break;
+    }
+  }
+
+  // Try each minute pattern until one matches
+  for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
+    const match = runtimeText.match(pattern);
+    if (match) {
+      totalMinutes += parseInt(match[1]);
+      break;
+    }
+  }
+
+  return totalMinutes > 0 ? totalMinutes : undefined;
+}
@@ -47,6 +47,8 @@ export const createPrismaMock = () => ({
  bookDateSwipe: createModelMock(),
  goodreadsShelf: createModelMock(),
  goodreadsBookMapping: createModelMock(),
+  work: createModelMock(),
+  workAsin: createModelMock(),
  $queryRaw: vi.fn(),
  $disconnect: vi.fn(),
 });
@@ -0,0 +1,306 @@
+/**
+ * Component: Works Service Tests
+ * Documentation: documentation/integrations/audible.md
+ */
+
+import { beforeEach, describe, expect, it, vi } from 'vitest';
+import { createPrismaMock } from '../helpers/prisma';
+import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
+
+const prismaMock = createPrismaMock();
+
+vi.mock('@/lib/db', () => ({
+  prisma: prismaMock,
+}));
+
+vi.mock('@/lib/utils/logger', () => ({
+  RMABLogger: {
+    create: () => ({
+      debug: vi.fn(),
+      info: vi.fn(),
+      warn: vi.fn(),
+      error: vi.fn(),
+    }),
+  },
+}));
+
+describe('persistDedupGroups', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.resetModules();
+  });
+
+  it('creates new work + work_asins for a fresh group', async () => {
+    prismaMock.workAsin.findMany.mockResolvedValue([]);
+    prismaMock.work.create.mockResolvedValue({ id: 'work-1' });
+    prismaMock.workAsin.create.mockResolvedValue({});
+    prismaMock.workAsin.updateMany.mockResolvedValue({ count: 0 });
+
+    const { persistDedupGroups } = await import('@/lib/services/works.service');
+
+    const groups: DedupGroup[] = [{
+      canonicalAsin: 'ASIN_A',
+      allAsins: ['ASIN_A', 'ASIN_B'],
+      title: 'Test Book',
+      author: 'Test Author',
+      narrator: 'Test Narrator',
+      durationMinutes: 600,
+    }];
+
+    await persistDedupGroups(groups);
+
+    expect(prismaMock.work.create).toHaveBeenCalledWith({
+      data: { title: 'Test Book', author: 'Test Author' },
+    });
+    expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
+
+    // Canonical ASIN should have narrator, duration, isCanonical=true
+    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
+      data: expect.objectContaining({
+        workId: 'work-1',
+        asin: 'ASIN_A',
+        narrator: 'Test Narrator',
+        durationMinutes: 600,
+        isCanonical: true,
+        source: 'dedup_auto',
+      }),
+    });
+
+    // Non-canonical ASIN should have isCanonical=false
+    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
+      data: expect.objectContaining({
+        workId: 'work-1',
+        asin: 'ASIN_B',
+        isCanonical: false,
+        source: 'dedup_auto',
+      }),
+    });
+  });
+
+  it('adds new ASINs to existing work when canonical already exists', async () => {
+    prismaMock.workAsin.findMany.mockResolvedValue([
+      { asin: 'ASIN_A', workId: 'existing-work' },
+    ]);
+    prismaMock.workAsin.create.mockResolvedValue({});
+    prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
+
+    const { persistDedupGroups } = await import('@/lib/services/works.service');
+
+    const groups: DedupGroup[] = [{
+      canonicalAsin: 'ASIN_A',
+      allAsins: ['ASIN_A', 'ASIN_B', 'ASIN_C'],
+      title: 'Test Book',
+      author: 'Test Author',
+      narrator: 'Narrator',
+      durationMinutes: 500,
+    }];
+
+    await persistDedupGroups(groups);
+
+    // Should NOT create a new work
+    expect(prismaMock.work.create).not.toHaveBeenCalled();
+
+    // Should create entries for ASIN_B and ASIN_C only (ASIN_A already exists)
+    expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
+    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
+      data: expect.objectContaining({
+        workId: 'existing-work',
+        asin: 'ASIN_B',
+      }),
+    });
+    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
+      data: expect.objectContaining({
+        workId: 'existing-work',
+        asin: 'ASIN_C',
+      }),
+    });
+  });
+
+  it('merges two separate works when dedup groups them together', async () => {
+    // ASIN_A is in work-1, ASIN_B is in work-2
+    prismaMock.workAsin.findMany.mockResolvedValue([
+      { asin: 'ASIN_A', workId: 'work-1' },
+      { asin: 'ASIN_B', workId: 'work-2' },
+    ]);
+    prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
+    prismaMock.work.deleteMany.mockResolvedValue({ count: 1 });
+
+    const { persistDedupGroups } = await import('@/lib/services/works.service');
+
+    const groups: DedupGroup[] = [{
+      canonicalAsin: 'ASIN_A',
+      allAsins: ['ASIN_A', 'ASIN_B'],
+      title: 'Merged Book',
+      author: 'Author',
+    }];
+
+    await persistDedupGroups(groups);
+
+    // Should move work-2 ASINs to work-1
+    expect(prismaMock.workAsin.updateMany).toHaveBeenCalledWith({
+      where: { workId: { in: ['work-2'] } },
+      data: { workId: 'work-1' },
+    });
+
+    // Should delete work-2
+    expect(prismaMock.work.deleteMany).toHaveBeenCalledWith({
+      where: { id: { in: ['work-2'] } },
+    });
+  });
+
+  it('silently catches and logs errors without throwing', async () => {
+    prismaMock.workAsin.findMany.mockRejectedValue(new Error('DB connection failed'));
+
+    const { persistDedupGroups } = await import('@/lib/services/works.service');
+
+    const groups: DedupGroup[] = [{
+      canonicalAsin: 'ASIN_A',
+      allAsins: ['ASIN_A', 'ASIN_B'],
+      title: 'Test',
+      author: 'Auth',
+    }];
+
+    // Should not throw
+    await expect(persistDedupGroups(groups)).resolves.toBeUndefined();
+  });
+});
+
+describe('seedAsin', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.resetModules();
+  });
+
+  it('creates single-ASIN work for new ASIN', async () => {
+    prismaMock.workAsin.findUnique.mockResolvedValue(null);
+    prismaMock.work.create.mockResolvedValue({ id: 'new-work' });
+    prismaMock.workAsin.create.mockResolvedValue({});
+
+    const { seedAsin } = await import('@/lib/services/works.service');
+
+    await seedAsin('NEW_ASIN', 'New Book', 'Author', 'Narrator', 300);
+
+    expect(prismaMock.work.create).toHaveBeenCalledWith({
+      data: { title: 'New Book', author: 'Author' },
+    });
+    expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
+      data: {
+        workId: 'new-work',
+        asin: 'NEW_ASIN',
+        narrator: 'Narrator',
+        durationMinutes: 300,
+        isCanonical: true,
+        source: 'dedup_auto',
+      },
+    });
+  });
+
+  it('does nothing for already-tracked ASIN', async () => {
+    prismaMock.workAsin.findUnique.mockResolvedValue({
+      id: 'existing',
+      asin: 'EXISTING_ASIN',
+      workId: 'work-1',
+    });
+
+    const { seedAsin } = await import('@/lib/services/works.service');
+
+    await seedAsin('EXISTING_ASIN', 'Book', 'Author');
+
+    expect(prismaMock.work.create).not.toHaveBeenCalled();
+    expect(prismaMock.workAsin.create).not.toHaveBeenCalled();
+  });
+
+  it('silently catches and logs errors without throwing', async () => {
+    prismaMock.workAsin.findUnique.mockRejectedValue(new Error('DB error'));
+
+    const { seedAsin } = await import('@/lib/services/works.service');
+
+    await expect(seedAsin('ASIN', 'Book', 'Auth')).resolves.toBeUndefined();
+  });
+});
+
+describe('getSiblingAsins', () => {
+  beforeEach(() => {
+    vi.clearAllMocks();
+    vi.resetModules();
+  });
+
+  it('returns sibling ASINs correctly', async () => {
+    // First query: find input ASINs and their work IDs
+    prismaMock.workAsin.findMany
+      .mockResolvedValueOnce([
+        { asin: 'ASIN_A', workId: 'work-1' },
+        { asin: 'ASIN_C', workId: 'work-2' },
+      ])
+      // Second query: all ASINs in those works
+      .mockResolvedValueOnce([
+        { asin: 'ASIN_A', workId: 'work-1' },
+        { asin: 'ASIN_B', workId: 'work-1' },
+        { asin: 'ASIN_C', workId: 'work-2' },
+        { asin: 'ASIN_D', workId: 'work-2' },
+        { asin: 'ASIN_E', workId: 'work-2' },
+      ]);
+
+    const { getSiblingAsins } = await import('@/lib/services/works.service');
+
+    const result = await getSiblingAsins(['ASIN_A', 'ASIN_C']);
+
+    expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
+    expect(result.get('ASIN_C')).toEqual(['ASIN_D', 'ASIN_E']);
+  });
+
+  it('returns empty map for unknown ASINs', async () => {
+    prismaMock.workAsin.findMany.mockResolvedValue([]);
+
+    const { getSiblingAsins } = await import('@/lib/services/works.service');
+
+    const result = await getSiblingAsins(['UNKNOWN']);
+
+    expect(result.size).toBe(0);
+  });
+
+  it('returns empty map for empty input', async () => {
+    const { getSiblingAsins } = await import('@/lib/services/works.service');
+
+    const result = await getSiblingAsins([]);
+
+    expect(result.size).toBe(0);
+    // Should not query DB
+    expect(prismaMock.workAsin.findMany).not.toHaveBeenCalled();
+  });
+
+  it('excludes the input ASIN itself from siblings', async () => {
+    prismaMock.workAsin.findMany
+      .mockResolvedValueOnce([
+        { asin: 'ASIN_A', workId: 'work-1' },
+      ])
+      .mockResolvedValueOnce([
+        { asin: 'ASIN_A', workId: 'work-1' },
+        { asin: 'ASIN_B', workId: 'work-1' },
+      ]);
+
+    const { getSiblingAsins } = await import('@/lib/services/works.service');
+
+    const result = await getSiblingAsins(['ASIN_A']);
+
+    expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
+    expect(result.get('ASIN_A')).not.toContain('ASIN_A');
+  });
+
+  it('omits ASINs with no siblings (single-ASIN works)', async () => {
+    prismaMock.workAsin.findMany
+      .mockResolvedValueOnce([
+        { asin: 'ASIN_LONELY', workId: 'work-solo' },
+      ])
+      .mockResolvedValueOnce([
+        { asin: 'ASIN_LONELY', workId: 'work-solo' },
+      ]);
+
+    const { getSiblingAsins } = await import('@/lib/services/works.service');
+
+    const result = await getSiblingAsins(['ASIN_LONELY']);
+
+    // No siblings means it shouldn't be in the map at all
+    expect(result.has('ASIN_LONELY')).toBe(false);
+  });
+});
@@ -0,0 +1,434 @@
+/**
+ * Component: Audiobook Deduplication Tests
+ * Documentation: documentation/integrations/audible.md
+ */
+
+import { describe, expect, it } from 'vitest';
+import {
+  deduplicateAudiobooks,
+  deduplicateAndCollectGroups,
+  normalizeTitle,
+  areDurationsCompatible,
+} from '@/lib/utils/deduplicate-audiobooks';
+import type { AudibleAudiobook } from '@/lib/integrations/audible.service';
+
+// ---------------------------------------------------------------------------
+// Helper: minimal AudibleAudiobook factory
+// ---------------------------------------------------------------------------
+
+function makeBook(overrides: Partial<AudibleAudiobook> & { asin: string; title: string; author: string }): AudibleAudiobook {
+  return {
+    narrator: undefined,
+    coverArtUrl: undefined,
+    durationMinutes: undefined,
+    rating: undefined,
+    description: undefined,
+    releaseDate: undefined,
+    genres: undefined,
+    series: undefined,
+    seriesPart: undefined,
+    seriesAsin: undefined,
+    authorAsin: undefined,
+    ...overrides,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// normalizeTitle
+// ---------------------------------------------------------------------------
+
+describe('normalizeTitle', () => {
+  it('lowercases', () => {
+    expect(normalizeTitle('The Black Prism')).toBe('the black prism');
+  });
+
+  it('strips (Unabridged)', () => {
+    expect(normalizeTitle('The Black Prism (Unabridged)')).toBe('the black prism');
+  });
+
+  it('strips [Abridged Edition]', () => {
+    expect(normalizeTitle('The Black Prism [Abridged Edition]')).toBe('the black prism');
+  });
+
+  it('strips (2024 Remastered Edition)', () => {
+    expect(normalizeTitle('The Hobbit (2024 Remastered Edition)')).toBe('the hobbit');
+  });
+
+  it('strips subtitle after colon', () => {
+    expect(normalizeTitle('The Black Prism: Lightbringer, Book 1')).toBe('the black prism');
+  });
+
+  it('strips subtitle after long dash', () => {
+    expect(normalizeTitle('The Black Prism \u2014 A Lightbringer Novel')).toBe('the black prism');
+  });
+
+  it('strips trailing "A Novel"', () => {
+    expect(normalizeTitle('The Black Prism: A Novel')).toBe('the black prism');
+  });
+
+  it('strips (Audiobook)', () => {
+    expect(normalizeTitle('The Hobbit (Audiobook)')).toBe('the hobbit');
+  });
+
+  it('strips (Dramatized Adaptation)', () => {
+    expect(normalizeTitle('The Black Prism (Dramatized Adaptation)')).toBe('the black prism');
+  });
+
+  it('strips (Full Cast Narration)', () => {
+    expect(normalizeTitle('The Black Prism (Full Cast Narration)')).toBe('the black prism');
+  });
+
+  it('collapses whitespace', () => {
+    expect(normalizeTitle('  The   Black   Prism  ')).toBe('the black prism');
+  });
+
+  it('handles empty string', () => {
+    expect(normalizeTitle('')).toBe('');
+  });
+
+  it('preserves hyphenated words (not subtitles)', () => {
+    // "well-known" has a short dash, not a subtitle separator
+    expect(normalizeTitle('A Well-Known Book')).toBe('a well-known book');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// areDurationsCompatible
+// ---------------------------------------------------------------------------
+
+describe('areDurationsCompatible', () => {
+  it('returns true when both undefined', () => {
+    expect(areDurationsCompatible(undefined, undefined)).toBe(true);
+  });
+
+  it('returns true when one undefined', () => {
+    expect(areDurationsCompatible(600, undefined)).toBe(true);
+    expect(areDurationsCompatible(undefined, 600)).toBe(true);
+  });
+
+  it('returns true for identical durations', () => {
+    expect(areDurationsCompatible(600, 600)).toBe(true);
+  });
+
+  it('uses 1% of longer duration as tolerance for long books', () => {
+    // Two 40-hour books (2400 min): tolerance = max(2400*0.01, 5) = 24 min
+    expect(areDurationsCompatible(2400, 2424)).toBe(true);  // exactly at tolerance
+    expect(areDurationsCompatible(2400, 2425)).toBe(false); // just over
+  });
+
+  it('uses 5-minute minimum tolerance for short books', () => {
+    // Two 2-hour books (120 min): tolerance = max(120*0.01, 5) = max(1.2, 5) = 5 min
+    expect(areDurationsCompatible(120, 125)).toBe(true);  // exactly at 5-min minimum
+    expect(areDurationsCompatible(120, 126)).toBe(false); // just over
+  });
+
+  it('keeps abridged vs unabridged separate (large duration gap)', () => {
+    // Unabridged: 720 min (12 hrs), Abridged: 360 min (6 hrs)
+    expect(areDurationsCompatible(720, 360)).toBe(false);
+  });
+
+  it('symmetry: order does not matter', () => {
+    expect(areDurationsCompatible(2400, 2424)).toBe(true);
+    expect(areDurationsCompatible(2424, 2400)).toBe(true);
+    expect(areDurationsCompatible(120, 126)).toBe(false);
+    expect(areDurationsCompatible(126, 120)).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// deduplicateAudiobooks
+// ---------------------------------------------------------------------------
+
+describe('deduplicateAudiobooks', () => {
+  it('returns empty array for empty input', () => {
+    expect(deduplicateAudiobooks([])).toEqual([]);
+  });
+
+  it('returns single book unchanged', () => {
+    const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Author' });
+    expect(deduplicateAudiobooks([book])).toEqual([book]);
+  });
+
+  it('passes through all-unique books unchanged', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
+      makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
+      makeBook({ asin: 'A3', title: 'Book Three', author: 'Auth', narrator: 'Nar B', durationMinutes: 700 }),
+    ];
+    expect(deduplicateAudiobooks(books)).toHaveLength(3);
+  });
+
+  it('collapses simple duplicates (same title + narrator + similar duration)', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
+      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(1);
+  });
+
+  it('keeps books with different narrators (different production)', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
+      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Full Cast', durationMinutes: 480 }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(2);
+  });
+
+  it('keeps abridged vs unabridged (same narrator, very different duration)', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
+      makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(2);
+  });
+
+  it('collapses when one book has missing duration', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
+      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: undefined }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(1);
+  });
+
+  it('collapses when both books have missing duration', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
+      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(1);
+  });
+
+  it('collapses title variants with edition markers', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Black Prism (Unabridged)', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
+      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1258 }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(1);
+  });
+
+  it('collapses title variants with subtitles', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
+      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(1);
+  });
+
+  it('picks the representative with most metadata', () => {
+    const sparse = makeBook({
+      asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks',
+      narrator: 'Simon Vance', durationMinutes: 1260,
+    });
+    const rich = makeBook({
+      asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks',
+      narrator: 'Simon Vance', durationMinutes: 1262,
+      coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
+    });
+    const result = deduplicateAudiobooks([sparse, rich]);
+    expect(result).toHaveLength(1);
+    expect(result[0].asin).toBe('A2'); // rich entry wins
+  });
+
+  it('preserves original order (first-seen position)', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
+      makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 400 }),
+      makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
+      makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
+    ];
+    const result = deduplicateAudiobooks(books);
+    expect(result).toHaveLength(3);
+    expect(result.map(b => b.title)).toEqual(['Alpha', 'Beta', 'Charlie']);
+  });
+
+  it('handles Lightbringer-style scenario: unabridged + dramatized', () => {
+    // Simon Vance full narration (long)
+    const vance1 = makeBook({
+      asin: 'SV1', title: 'The Black Prism', author: 'Brent Weeks',
+      narrator: 'Simon Vance', durationMinutes: 1260,
+      coverArtUrl: 'cover1.jpg', rating: 4.7,
+    });
+    // Re-listed Simon Vance (same duration, different ASIN)
+    const vance2 = makeBook({
+      asin: 'SV2', title: 'The Black Prism: Lightbringer Book 1', author: 'Brent Weeks',
+      narrator: 'Simon Vance', durationMinutes: 1262,
+    });
+    // Dramatized with full cast (shorter, different narrator)
+    const drama = makeBook({
+      asin: 'DR1', title: 'The Black Prism (Dramatized Adaptation)', author: 'Brent Weeks',
+      narrator: 'Full Cast', durationMinutes: 480,
+      coverArtUrl: 'cover-drama.jpg',
+    });
+
+    const result = deduplicateAudiobooks([vance1, vance2, drama]);
+    expect(result).toHaveLength(2);
+    // Simon Vance should collapse to 1, Full Cast stays
+    expect(result.find(b => b.narrator === 'Simon Vance')).toBeTruthy();
+    expect(result.find(b => b.narrator === 'Full Cast')).toBeTruthy();
+    // Should pick the richer entry for Simon Vance
+    const svResult = result.find(b => b.narrator === 'Simon Vance')!;
+    expect(svResult.asin).toBe('SV1'); // has cover + rating
+  });
+
+  it('uses percentage tolerance for very long audiobooks', () => {
+    // Two 40-hour books: tolerance = max(2400*0.01, 5) = 24 min
+    const books = [
+      makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
+      makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2420 }),
+    ];
+    expect(deduplicateAudiobooks(books)).toHaveLength(1);
+
+    // Beyond tolerance
+    const booksFar = [
+      makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
+      makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2430 }),
+    ];
+    expect(deduplicateAudiobooks(booksFar)).toHaveLength(2);
+  });
+
+  it('treats missing narrator as its own group', () => {
+    // Two entries with same title but no narrator - should collapse
+    const books = [
+      makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
+      makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 302 }),
+    ];
+    expect(deduplicateAudiobooks(books)).toHaveLength(1);
+  });
+
+  it('does not collapse empty-narrator with named narrator', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
+      makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'John Smith', durationMinutes: 302 }),
+    ];
+    expect(deduplicateAudiobooks(books)).toHaveLength(2);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// deduplicateAndCollectGroups
+// ---------------------------------------------------------------------------
+
+describe('deduplicateAndCollectGroups', () => {
+  it('returns empty groups array when no duplicates', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
+      makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
+    ];
+    const { books: result, groups } = deduplicateAndCollectGroups(books);
+    expect(result).toHaveLength(2);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('returns empty groups for empty input', () => {
+    const { books: result, groups } = deduplicateAndCollectGroups([]);
+    expect(result).toHaveLength(0);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('returns empty groups for single book', () => {
+    const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Auth' });
+    const { books: result, groups } = deduplicateAndCollectGroups([book]);
+    expect(result).toHaveLength(1);
+    expect(groups).toHaveLength(0);
+  });
+
+  it('returns group with 2 ASINs when 2 books match', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
+      makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
+    ];
+    const { books: result, groups } = deduplicateAndCollectGroups(books);
+    expect(result).toHaveLength(1);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].allAsins).toHaveLength(2);
+    expect(groups[0].allAsins).toContain('A1');
+    expect(groups[0].allAsins).toContain('A2');
+  });
+
+  it('returns group with 3+ ASINs for multi-duplicate scenario', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
+      makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 662 }),
+      makeBook({ asin: 'A3', title: 'The Hobbit (Unabridged)', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 658 }),
+    ];
+    const { books: result, groups } = deduplicateAndCollectGroups(books);
+    expect(result).toHaveLength(1);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].allAsins).toHaveLength(3);
+    expect(groups[0].allAsins).toContain('A1');
+    expect(groups[0].allAsins).toContain('A2');
+    expect(groups[0].allAsins).toContain('A3');
+  });
+
+  it('canonicalAsin is the one with highest metadata score', () => {
+    const sparse = makeBook({
+      asin: 'SPARSE', title: 'The Black Prism', author: 'Brent Weeks',
+      narrator: 'Simon Vance', durationMinutes: 1260,
+    });
+    const rich = makeBook({
+      asin: 'RICH', title: 'The Black Prism', author: 'Brent Weeks',
+      narrator: 'Simon Vance', durationMinutes: 1262,
+      coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
+    });
+    const { groups } = deduplicateAndCollectGroups([sparse, rich]);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].canonicalAsin).toBe('RICH');
+  });
+
+  it('groups only include entries with 2+ ASINs', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
+      makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
+      makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
+    ];
+    const { groups } = deduplicateAndCollectGroups(books);
+    // Only Alpha group should appear (Beta is a singleton)
+    expect(groups).toHaveLength(1);
+    expect(groups[0].allAsins).toContain('A1');
+    expect(groups[0].allAsins).toContain('A2');
+  });
+
+  it('duration-incompatible books produce separate entries (no group for singletons)', () => {
+    // Same title/narrator but very different durations (abridged vs unabridged)
+    const books = [
+      makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
+      makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
+    ];
+    const { books: result, groups } = deduplicateAndCollectGroups(books);
+    expect(result).toHaveLength(2); // Not collapsed
+    expect(groups).toHaveLength(0); // No multi-ASIN groups
+  });
+
+  it('books field matches what deduplicateAudiobooks returns', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300, coverArtUrl: 'img.jpg', rating: 4.5 }),
+      makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
+      makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
+      makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 600 }),
+      makeBook({ asin: 'C2', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 601 }),
+    ];
+    const dedupOnly = deduplicateAudiobooks(books);
+    const { books: withGroups } = deduplicateAndCollectGroups(books);
+    expect(withGroups.map(b => b.asin)).toEqual(dedupOnly.map(b => b.asin));
+  });
+
+  it('includes narrator and durationMinutes from canonical entry in group', () => {
+    const books = [
+      makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 480 }),
+      makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 482, coverArtUrl: 'img.jpg', rating: 4.0 }),
+    ];
+    const { groups } = deduplicateAndCollectGroups(books);
+    expect(groups).toHaveLength(1);
+    expect(groups[0].canonicalAsin).toBe('A2'); // richer metadata
+    expect(groups[0].narrator).toBe('Jane Doe');
+    expect(groups[0].durationMinutes).toBe(482);
+    expect(groups[0].author).toBe('Auth');
+  });
+});