Add works table and ASIN deduping

Add persistent cross-ASIN "works" mapping and client-side deduplication to improve library matching. Introduces a Prisma migration and models (Work, WorkAsin) plus src/lib/services/works.service for persisting dedup groups, seeding ASINs at request time, and sibling lookup. Adds a deduplication utility (deduplicate-audiobooks) that normalizes titles/narrators, compares durations, and returns grouping metadata; API routes (search, author, series) now deduplicate results before enrichment and fire-and-forget persist groups. Adds sibling-ASIN expansion into audiobook matcher and expands getAvailableAsins accordingly. Extracts runtime parsing into a shared parse-runtime util and updates audible scrapers/services to use it. Includes unit tests for dedup logic and works service and updates test Prisma mocks.
2026-07-18 02:31:10 +00:00 · 2026-03-03 13:31:46 -05:00
parent ff80d995c5
commit 610873af6b
15 changed files with 1446 additions and 32 deletions
@@ -6,6 +6,8 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
+import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
+import { persistDedupGroups } from '@/lib/services/works.service';
 import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';

@@ -38,14 +40,22 @@ export async function GET(request: NextRequest) {
    const currentUser = getCurrentUser(request);
    const userId = currentUser?.sub || undefined;

+    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
+    const { books: dedupedResults, groups } = deduplicateAndCollectGroups(results.results);
+
+    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
+    if (groups.length > 0) {
+      persistDedupGroups(groups).catch(() => {});
+    }
+
    // Enrich search results with availability and request status information
-    const enrichedResults = await enrichAudiobooksWithMatches(results.results, userId);
+    const enrichedResults = await enrichAudiobooksWithMatches(dedupedResults, userId);

    return NextResponse.json({
      success: true,
      query: results.query,
      results: enrichedResults,
-      totalResults: results.totalResults,
+      totalResults: enrichedResults.length,
      page: results.page,
      hasMore: results.hasMore,
    });
@@ -6,6 +6,8 @@
 import { NextRequest, NextResponse } from 'next/server';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
+import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
+import { persistDedupGroups } from '@/lib/services/works.service';
 import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';

@@ -53,9 +55,17 @@ export async function GET(
    const audibleService = getAudibleService();
    const result = await audibleService.searchByAuthorAsin(authorName.trim(), asin, page);

+    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
+    const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(result.books);
+
+    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
+    if (groups.length > 0) {
+      persistDedupGroups(groups).catch(() => {});
+    }
+
    // Enrich with library availability and request status
    const userId = currentUser.sub || undefined;
-    const enrichedBooks = await enrichAudiobooksWithMatches(result.books, userId);
+    const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);

    logger.info(`Author books complete: "${authorName}" → ${enrichedBooks.length} books (page ${page})`);

@@ -64,7 +74,7 @@ export async function GET(
      books: enrichedBooks,
      authorName: authorName.trim(),
      authorAsin: asin,
-      totalBooks: result.totalResults || enrichedBooks.length,
+      totalBooks: enrichedBooks.length,
      hasMore: result.hasMore,
      page: result.page,
    });
@@ -8,6 +8,8 @@ import { getCurrentUser } from '@/lib/middleware/auth';
 import { RMABLogger } from '@/lib/utils/logger';
 import { scrapeSeriesPage } from '@/lib/integrations/audible-series';
 import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
+import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
+import { persistDedupGroups } from '@/lib/services/works.service';

 const logger = RMABLogger.create('API.Series.Detail');

@@ -49,9 +51,17 @@ export async function GET(
      );
    }

+    // Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
+    const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(detail.books);
+
+    // Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
+    if (groups.length > 0) {
+      persistDedupGroups(groups).catch(() => {});
+    }
+
    // Enrich books with library availability and request status
    const userId = currentUser.sub || undefined;
-    const enrichedBooks = await enrichAudiobooksWithMatches(detail.books, userId);
+    const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);

    logger.info(`Series detail complete: "${detail.title}" (${enrichedBooks.length} books, page ${page})`);

@@ -14,8 +14,10 @@ import {
  getLanguageForRegion,
  buildContainsSelector,
  stripPrefixes,
+  type LanguageConfig,
 } from '../constants/language-config';
 import { RMABLogger } from '../utils/logger';
+import { parseRuntime } from '../utils/parse-runtime';
 import { randomDelay } from '../utils/scrape-resilience';

 const logger = RMABLogger.create('Audible.Series');
@@ -311,7 +313,7 @@ export async function scrapeSeriesPage(asin: string, page: number = 1): Promise<
      undefined;

    // Parse all books from the series page
-    const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes);
+    const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes, langConfig);

    // Use actual book count if we got more from scraping
    const bookCount = Math.max(summary.bookCount, books.length);
@@ -403,7 +405,8 @@ function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCoun
 function parseSeriesBooks(
  $: cheerio.CheerioAPI,
  authorPrefixes: string[],
-  narratorPrefixes: string[]
+  narratorPrefixes: string[],
+  langConfig: LanguageConfig
 ): AudibleAudiobook[] {
  const books: AudibleAudiobook[] = [];
  const seenAsins = new Set<string>();
@@ -453,6 +456,11 @@ function parseSeriesBooks(
    const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
    const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;

+    // Duration
+    const runtimeText = $el.find('.runtimeLabel').text().trim() ||
+      $el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
+    const durationMinutes = parseRuntime(runtimeText, langConfig);
+
    books.push({
      asin: bookAsin,
      title,
@@ -461,6 +469,7 @@ function parseSeriesBooks(
      narrator: stripPrefixes(narratorText, narratorPrefixes),
      coverArtUrl,
      rating,
+      durationMinutes,
    });
  });

@@ -23,6 +23,7 @@ import {
  AdaptivePacer,
  FetchResultMeta,
 } from '../utils/scrape-resilience';
+import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime';

 // Module-level logger
 const logger = RMABLogger.create('Audible');
@@ -1134,33 +1135,11 @@ export class AudibleService {
  }

  /**
-   * Parse runtime text to minutes using language-specific patterns
+   * Parse runtime text to minutes using language-specific patterns.
+   * Delegates to shared utility in src/lib/utils/parse-runtime.ts.
   */
  private parseRuntime(runtimeText: string): number | undefined {
-    if (!runtimeText) return undefined;
-
-    const langConfig = this.getLangConfig();
-    let totalMinutes = 0;
-
-    // Try each hour pattern until one matches
-    for (const pattern of langConfig.scraping.runtimeHourPatterns) {
-      const match = runtimeText.match(pattern);
-      if (match) {
-        totalMinutes += parseInt(match[1]) * 60;
-        break;
-      }
-    }
-
-    // Try each minute pattern until one matches
-    for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
-      const match = runtimeText.match(pattern);
-      if (match) {
-        totalMinutes += parseInt(match[1]);
-        break;
-      }
-    }
-
-    return totalMinutes > 0 ? totalMinutes : undefined;
+    return parseRuntimeUtil(runtimeText, this.getLangConfig());
  }

  /**
@@ -12,6 +12,7 @@ import { getJobQueueService } from '@/lib/services/job-queue.service';
 import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { RMABLogger } from '@/lib/utils/logger';
+import { seedAsin } from '@/lib/services/works.service';

 const logger = RMABLogger.create('RequestCreator');

@@ -147,6 +148,15 @@ export async function createRequestForUser(
    }
  }

+  // Seed works table for cross-ASIN matching (Layer 2: request-time seeding)
+  seedAsin(
+    audiobook.asin,
+    audiobookRecord.title,
+    audiobookRecord.author,
+    audiobookRecord.narrator || undefined,
+    undefined // duration not available at request time
+  ).catch(() => {});
+
  // Check if user already has an active request for this audiobook
  const existingRequest = await prisma.request.findFirst({
    where: {
@@ -0,0 +1,248 @@
+/**
+ * Component: Works Service
+ * Documentation: documentation/integrations/audible.md
+ *
+ * Manages the works table — persistent cross-ASIN audiobook identity mapping.
+ * Layer 1: Auto-populated from dedup logic when users browse search/author/series pages.
+ * Layer 2: Seeded at request time to ensure requested ASINs are tracked.
+ */
+
+import { prisma } from '@/lib/db';
+import { RMABLogger } from '@/lib/utils/logger';
+import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
+
+const logger = RMABLogger.create('WorksService');
+
+// ---------------------------------------------------------------------------
+// Layer 1: Persist dedup groups (fire-and-forget from API routes)
+// ---------------------------------------------------------------------------
+
+/**
+ * Persist dedup groups to the works table. For each group of 2+ ASINs that
+ * were identified as the same audiobook, create or update a Work record
+ * linking all ASINs together.
+ *
+ * Safe to call fire-and-forget — never throws.
+ */
+export async function persistDedupGroups(groups: DedupGroup[]): Promise<void> {
+  try {
+    for (const group of groups) {
+      await persistSingleGroup(group);
+    }
+  } catch (error) {
+    logger.error('Failed to persist dedup groups', {
+      error: error instanceof Error ? error.message : String(error),
+      groupCount: groups.length,
+    });
+  }
+}
+
+/**
+ * Persist a single dedup group. Handles merging when ASINs span multiple
+ * existing works.
+ */
+async function persistSingleGroup(group: DedupGroup): Promise<void> {
+  const { canonicalAsin, allAsins, title, author, narrator, durationMinutes } = group;
+
+  // Find which of these ASINs already exist in work_asins
+  const existingEntries = await prisma.workAsin.findMany({
+    where: { asin: { in: allAsins } },
+    select: { asin: true, workId: true },
+  });
+
+  // Collect unique work IDs that already contain any of our ASINs
+  const existingWorkIds = [...new Set(existingEntries.map(e => e.workId))];
+  const existingAsinSet = new Set(existingEntries.map(e => e.asin));
+
+  if (existingWorkIds.length === 0) {
+    // No existing works — create a new one with all ASINs
+    const work = await prisma.work.create({
+      data: { title, author },
+    });
+
+    await Promise.all(
+      allAsins.map(asin =>
+        prisma.workAsin.create({
+          data: {
+            workId: work.id,
+            asin,
+            narrator: asin === canonicalAsin ? narrator : undefined,
+            durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
+            isCanonical: asin === canonicalAsin,
+            source: 'dedup_auto',
+          },
+        })
+      )
+    );
+
+    logger.debug('Created new work', { workId: work.id, asinCount: allAsins.length });
+  } else {
+    // Use the first existing work as the target
+    const targetWorkId = existingWorkIds[0];
+
+    // If multiple existing works, merge them into the target
+    if (existingWorkIds.length > 1) {
+      const mergeWorkIds = existingWorkIds.slice(1);
+
+      // Move all ASINs from other works to the target
+      await prisma.workAsin.updateMany({
+        where: { workId: { in: mergeWorkIds } },
+        data: { workId: targetWorkId },
+      });
+
+      // Delete the now-empty works
+      await prisma.work.deleteMany({
+        where: { id: { in: mergeWorkIds } },
+      });
+
+      logger.debug('Merged works', {
+        targetWorkId,
+        mergedWorkIds: mergeWorkIds,
+      });
+    }
+
+    // Add any new ASINs that don't already exist
+    const newAsins = allAsins.filter(a => !existingAsinSet.has(a));
+    if (newAsins.length > 0) {
+      await Promise.all(
+        newAsins.map(asin =>
+          prisma.workAsin.create({
+            data: {
+              workId: targetWorkId,
+              asin,
+              narrator: asin === canonicalAsin ? narrator : undefined,
+              durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
+              isCanonical: asin === canonicalAsin,
+              source: 'dedup_auto',
+            },
+          })
+        )
+      );
+
+      logger.debug('Added ASINs to existing work', {
+        workId: targetWorkId,
+        newAsinCount: newAsins.length,
+      });
+    }
+
+    // Update canonical status: ensure the canonical ASIN is marked
+    await prisma.workAsin.updateMany({
+      where: { workId: targetWorkId, asin: canonicalAsin },
+      data: { isCanonical: true },
+    });
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Layer 2: Seed ASIN at request time
+// ---------------------------------------------------------------------------
+
+/**
+ * Ensure an ASIN is tracked in the works table. Creates a single-ASIN work
+ * if the ASIN isn't already present. Called at request creation time.
+ *
+ * Safe to call fire-and-forget — never throws.
+ */
+export async function seedAsin(
+  asin: string,
+  title: string,
+  author: string,
+  narrator?: string,
+  durationMinutes?: number
+): Promise<void> {
+  try {
+    // Check if ASIN already tracked
+    const existing = await prisma.workAsin.findUnique({
+      where: { asin },
+    });
+    if (existing) return;
+
+    // Create a new single-ASIN work
+    const work = await prisma.work.create({
+      data: { title, author },
+    });
+
+    await prisma.workAsin.create({
+      data: {
+        workId: work.id,
+        asin,
+        narrator,
+        durationMinutes,
+        isCanonical: true,
+        source: 'dedup_auto',
+      },
+    });
+
+    logger.debug('Seeded ASIN', { workId: work.id, asin });
+  } catch (error) {
+    logger.error('Failed to seed ASIN', {
+      error: error instanceof Error ? error.message : String(error),
+      asin,
+    });
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Sibling ASIN lookup (for library matching expansion)
+// ---------------------------------------------------------------------------
+
+/**
+ * Given a list of ASINs, return a map of each input ASIN to its sibling ASINs
+ * (other ASINs in the same work, NOT including the input ASIN itself).
+ *
+ * ASINs not found in the works table are simply omitted from the result.
+ */
+export async function getSiblingAsins(
+  asins: string[]
+): Promise<Map<string, string[]>> {
+  const result = new Map<string, string[]>();
+  if (asins.length === 0) return result;
+
+  // Step 1: Find which input ASINs are in work_asins and their work IDs
+  const inputEntries = await prisma.workAsin.findMany({
+    where: { asin: { in: asins } },
+    select: { asin: true, workId: true },
+  });
+
+  if (inputEntries.length === 0) return result;
+
+  // Build map of workId -> input ASINs in that work
+  const workIdToInputAsins = new Map<string, string[]>();
+  for (const entry of inputEntries) {
+    const list = workIdToInputAsins.get(entry.workId);
+    if (list) {
+      list.push(entry.asin);
+    } else {
+      workIdToInputAsins.set(entry.workId, [entry.asin]);
+    }
+  }
+
+  // Step 2: Get ALL ASINs in those works
+  const workIds = [...workIdToInputAsins.keys()];
+  const allWorkAsins = await prisma.workAsin.findMany({
+    where: { workId: { in: workIds } },
+    select: { asin: true, workId: true },
+  });
+
+  // Build map of workId -> all ASINs
+  const workIdToAllAsins = new Map<string, string[]>();
+  for (const entry of allWorkAsins) {
+    const list = workIdToAllAsins.get(entry.workId);
+    if (list) {
+      list.push(entry.asin);
+    } else {
+      workIdToAllAsins.set(entry.workId, [entry.asin]);
+    }
+  }
+
+  // Step 3: For each input ASIN, compute siblings (all ASINs in same work minus self)
+  for (const entry of inputEntries) {
+    const allInWork = workIdToAllAsins.get(entry.workId) || [];
+    const siblings = allInWork.filter(a => a !== entry.asin);
+    if (siblings.length > 0) {
+      result.set(entry.asin, siblings);
+    }
+  }
+
+  return result;
+}
@@ -8,6 +8,7 @@

 import { prisma } from '@/lib/db';
 import { LibraryItem } from '@/lib/services/library';
+import { getSiblingAsins } from '@/lib/services/works.service';
 import { RMABLogger } from './logger';

 // Module-level logger
@@ -178,6 +179,61 @@ export async function enrichAudiobooksWithMatches(
    }
  }

+  // Works-table sibling expansion: check if unmatched ASINs have siblings in the library
+  try {
+    const unmatchedAsins = results.filter(r => !r.isAvailable).map(r => r.asin);
+    if (unmatchedAsins.length > 0) {
+      const siblingMap = await getSiblingAsins(unmatchedAsins);
+      if (siblingMap.size > 0) {
+        // Collect all sibling ASINs for a single batch library query
+        const allSiblingAsins = new Set<string>();
+        for (const siblings of siblingMap.values()) {
+          for (const s of siblings) allSiblingAsins.add(s);
+        }
+
+        if (allSiblingAsins.size > 0) {
+          const siblingLibraryMatches = await prisma.plexLibrary.findMany({
+            where: { asin: { in: [...allSiblingAsins] } },
+            select: { asin: true, plexGuid: true },
+          });
+          const libraryAsinSet = new Set(
+            siblingLibraryMatches.filter(m => m.asin).map(m => m.asin!.toLowerCase())
+          );
+
+          // Update results where a sibling ASIN is found in the library
+          for (const result of results) {
+            if (result.isAvailable) continue;
+            const siblings = siblingMap.get(result.asin);
+            if (!siblings) continue;
+            const matchedSiblingAsin = siblings.find(s => libraryAsinSet.has(s.toLowerCase()));
+            if (matchedSiblingAsin) {
+              const libMatch = siblingLibraryMatches.find(
+                m => m.asin?.toLowerCase() === matchedSiblingAsin.toLowerCase()
+              );
+              (result as any).isAvailable = true;
+              (result as any).plexGuid = libMatch?.plexGuid || null;
+            }
+          }
+
+          const siblingMatchCount = results.filter(r => {
+            if (!r.isAvailable) return false;
+            return siblingMap.has(r.asin);
+          }).length;
+          logger.debug('Sibling expansion', {
+            unmatchedCount: unmatchedAsins.length,
+            siblingGroupsFound: siblingMap.size,
+            siblingMatches: siblingMatchCount,
+          });
+        }
+      }
+    }
+  } catch (error) {
+    // Works table expansion is best-effort — direct matches still work
+    logger.error('Sibling ASIN expansion failed', {
+      error: error instanceof Error ? error.message : String(error),
+    });
+  }
+
  // Always enrich with request status (check ANY user's requests)
  const asins = audiobooks.map(book => book.asin);

@@ -307,6 +363,19 @@ export async function getAvailableAsins(): Promise<Set<string>> {
  for (const item of completedRequests) {
    if (item.audibleAsin) asins.add(item.audibleAsin);
  }
+
+  // Expand with works-table sibling ASINs
+  try {
+    if (asins.size > 0) {
+      const siblingMap = await getSiblingAsins([...asins]);
+      for (const siblings of siblingMap.values()) {
+        for (const s of siblings) asins.add(s);
+      }
+    }
+  } catch {
+    // Works table expansion is best-effort
+  }
+
  return asins;
 }

@@ -0,0 +1,201 @@
+/**
+ * Component: Audiobook Deduplication Utility
+ * Documentation: documentation/integrations/audible.md
+ *
+ * Deduplicates audiobook listings that represent the same recording
+ * under different ASINs (publisher re-listings, rights transfers, etc.).
+ *
+ * Dedup key: normalized title + normalized narrator
+ * Duration tolerance: max(longerDuration * 0.01, 5) minutes
+ * Missing duration treated as compatible (graceful degradation).
+ */
+
+import type { AudibleAudiobook } from '../integrations/audible.service';
+
+// ---------------------------------------------------------------------------
+// Title / narrator normalization
+// ---------------------------------------------------------------------------
+
+/** Patterns in parentheses or brackets to strip (edition markers, format labels) */
+const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi;
+
+/** Trailing subtitle after colon or long dash */
+const SUBTITLE_RE = /\s*[:]\s+.+$/;
+const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/;
+
+/** Trailing descriptors like "A Novel", "A Memoir" */
+const TRAILING_DESCRIPTOR_RE = /\s*[-:,]?\s+a\s+(novel|memoir|thriller|mystery|romance|story|tale|novella)\s*$/i;
+
+/**
+ * Normalize a title for dedup comparison.
+ * Strips subtitles, edition markers, and trailing descriptors.
+ */
+export function normalizeTitle(title: string): string {
+  let t = title.toLowerCase();
+  // Remove parenthesized/bracketed edition markers
+  t = t.replace(EDITION_PAREN_RE, '');
+  // Remove trailing descriptors before subtitle stripping
+  t = t.replace(TRAILING_DESCRIPTOR_RE, '');
+  // Remove subtitle after colon
+  t = t.replace(SUBTITLE_RE, '');
+  // Remove subtitle after long dash (but not short hyphenated words)
+  t = t.replace(LONG_DASH_SUBTITLE_RE, '');
+  // Collapse whitespace and trim
+  return t.replace(/\s+/g, ' ').trim();
+}
+
+/** Normalize narrator for comparison. */
+function normalizeNarrator(narrator?: string): string {
+  return (narrator || '').toLowerCase().trim();
+}
+
+// ---------------------------------------------------------------------------
+// Duration compatibility
+// ---------------------------------------------------------------------------
+
+/**
+ * Check if two durations are compatible (represent the same recording).
+ * Tolerance: max(longerDuration * 0.01, 5) minutes.
+ * Missing duration on either side is treated as compatible.
+ */
+export function areDurationsCompatible(a?: number, b?: number): boolean {
+  if (a == null || b == null) return true;
+  const longer = Math.max(a, b);
+  const tolerance = Math.max(longer * 0.01, 5);
+  return Math.abs(a - b) <= tolerance;
+}
+
+// ---------------------------------------------------------------------------
+// Metadata scoring (for picking best representative)
+// ---------------------------------------------------------------------------
+
+function metadataScore(book: AudibleAudiobook): number {
+  let score = 0;
+  if (book.coverArtUrl) score++;
+  if (book.rating != null) score++;
+  if (book.durationMinutes != null) score++;
+  if (book.description) score++;
+  if (book.narrator) score++;
+  if (book.releaseDate) score++;
+  if (book.genres && book.genres.length > 0) score++;
+  return score;
+}
+
+// ---------------------------------------------------------------------------
+// Dedup group types (for works-table persistence)
+// ---------------------------------------------------------------------------
+
+/** Metadata about a group of ASINs that were collapsed during dedup. */
+export interface DedupGroup {
+  canonicalAsin: string;     // ASIN of the "winner" (best metadata score)
+  allAsins: string[];        // All ASINs in this group (including canonical)
+  title: string;             // Author from the canonical entry
+  author: string;            // Author from the canonical entry
+  narrator?: string;         // Narrator from the canonical entry
+  durationMinutes?: number;  // Duration from the canonical entry
+}
+
+/** Result of deduplication with group collection. */
+export interface DeduplicateResult {
+  books: AudibleAudiobook[];  // The deduped list (same as deduplicateAudiobooks returns)
+  groups: DedupGroup[];       // Groups where 2+ ASINs were collapsed
+}
+
+// ---------------------------------------------------------------------------
+// Main dedup functions
+// ---------------------------------------------------------------------------
+
+/**
+ * Deduplicate audiobook listings by normalized title + narrator + duration.
+ *
+ * Same narrator + compatible duration + similar title = same recording -> collapse.
+ * Different narrator = different production -> keep both.
+ * Duration outside tolerance = different content (abridged vs unabridged) -> keep both.
+ *
+ * Preserves original ordering (position of first appearance).
+ */
+export function deduplicateAudiobooks(books: AudibleAudiobook[]): AudibleAudiobook[] {
+  return deduplicateAndCollectGroups(books).books;
+}
+
+/**
+ * Deduplicate audiobooks AND return grouping metadata for works-table persistence.
+ * Returns both the deduped list and the groups where 2+ ASINs were collapsed.
+ */
+export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): DeduplicateResult {
+  if (books.length <= 1) return { books: [...books], groups: [] };
+
+  // Group by normalized title + narrator
+  const titleNarratorGroups = new Map<string, AudibleAudiobook[]>();
+  const insertionOrder: string[] = [];
+
+  for (const book of books) {
+    const key = `${normalizeTitle(book.title)}|||${normalizeNarrator(book.narrator)}`;
+    const group = titleNarratorGroups.get(key);
+    if (group) {
+      group.push(book);
+    } else {
+      titleNarratorGroups.set(key, [book]);
+      insertionOrder.push(key);
+    }
+  }
+
+  const result: AudibleAudiobook[] = [];
+  const dedupGroups: DedupGroup[] = [];
+
+  for (const key of insertionOrder) {
+    const group = titleNarratorGroups.get(key)!;
+    if (group.length === 1) {
+      result.push(group[0]);
+      continue;
+    }
+
+    // Within a title+narrator group, further split by duration compatibility.
+    // Build sub-groups where all members are duration-compatible with the
+    // representative (first member). A book joins the first compatible sub-group.
+    const subGroups: AudibleAudiobook[][] = [];
+
+    for (const book of group) {
+      let placed = false;
+      for (const sg of subGroups) {
+        // Check compatibility against the representative (first member)
+        if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) {
+          sg.push(book);
+          placed = true;
+          break;
+        }
+      }
+      if (!placed) {
+        subGroups.push([book]);
+      }
+    }
+
+    // From each sub-group, pick the best representative and collect group metadata
+    for (const sg of subGroups) {
+      let best = sg[0];
+      let bestScore = metadataScore(best);
+      for (let i = 1; i < sg.length; i++) {
+        const score = metadataScore(sg[i]);
+        if (score > bestScore) {
+          best = sg[i];
+          bestScore = score;
+        }
+      }
+      result.push(best);
+
+      // Collect group metadata for works-table persistence (only multi-ASIN groups)
+      if (sg.length >= 2) {
+        dedupGroups.push({
+          canonicalAsin: best.asin,
+          allAsins: sg.map(b => b.asin),
+          title: best.title,
+          author: best.author,
+          narrator: best.narrator,
+          durationMinutes: best.durationMinutes,
+        });
+      }
+    }
+  }
+
+  return { books: result, groups: dedupGroups };
+}
@@ -0,0 +1,44 @@
+/**
+ * Component: Runtime Parsing Utility
+ * Documentation: documentation/integrations/audible.md
+ *
+ * Shared runtime/duration text parser extracted from AudibleService.
+ * Handles all i18n patterns (English, German, Spanish, French) via
+ * language-specific regex patterns in LanguageConfig.
+ */
+
+import type { LanguageConfig } from '../constants/language-config';
+
+/**
+ * Parse runtime text (e.g. "12 hrs and 30 mins", "5 Std. 20 Min.")
+ * into total minutes using language-specific patterns.
+ *
+ * @param runtimeText - Raw runtime string from Audible HTML
+ * @param langConfig  - Language configuration with hour/minute regex patterns
+ * @returns Total minutes, or undefined if no duration could be parsed
+ */
+export function parseRuntime(runtimeText: string, langConfig: LanguageConfig): number | undefined {
+  if (!runtimeText) return undefined;
+
+  let totalMinutes = 0;
+
+  // Try each hour pattern until one matches
+  for (const pattern of langConfig.scraping.runtimeHourPatterns) {
+    const match = runtimeText.match(pattern);
+    if (match) {
+      totalMinutes += parseInt(match[1]) * 60;
+      break;
+    }
+  }
+
+  // Try each minute pattern until one matches
+  for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
+    const match = runtimeText.match(pattern);
+    if (match) {
+      totalMinutes += parseInt(match[1]);
+      break;
+    }
+  }
+
+  return totalMinutes > 0 ? totalMinutes : undefined;
+}