mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-05 13:50:11 +00:00
Add works table and ASIN deduping
Add persistent cross-ASIN "works" mapping and client-side deduplication to improve library matching. Introduces a Prisma migration and models (Work, WorkAsin) plus src/lib/services/works.service for persisting dedup groups, seeding ASINs at request time, and sibling lookup. Adds a deduplication utility (deduplicate-audiobooks) that normalizes titles/narrators, compares durations, and returns grouping metadata; API routes (search, author, series) now deduplicate results before enrichment and fire-and-forget persist groups. Adds sibling-ASIN expansion into audiobook matcher and expands getAvailableAsins accordingly. Extracts runtime parsing into a shared parse-runtime util and updates audible scrapers/services to use it. Includes unit tests for dedup logic and works service and updates test Prisma mocks.
This commit is contained in:
@@ -0,0 +1,42 @@
|
|||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "works" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"title" TEXT NOT NULL,
|
||||||
|
"author" TEXT NOT NULL,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
"updated_at" TIMESTAMP(3) NOT NULL,
|
||||||
|
|
||||||
|
CONSTRAINT "works_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateTable
|
||||||
|
CREATE TABLE "work_asins" (
|
||||||
|
"id" TEXT NOT NULL,
|
||||||
|
"work_id" TEXT NOT NULL,
|
||||||
|
"asin" TEXT NOT NULL,
|
||||||
|
"narrator" TEXT,
|
||||||
|
"duration_minutes" INTEGER,
|
||||||
|
"is_canonical" BOOLEAN NOT NULL DEFAULT false,
|
||||||
|
"source" TEXT NOT NULL,
|
||||||
|
"created_at" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
|
||||||
|
CONSTRAINT "work_asins_pkey" PRIMARY KEY ("id")
|
||||||
|
);
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "works_title_idx" ON "works"("title");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "works_author_idx" ON "works"("author");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE UNIQUE INDEX "work_asins_asin_key" ON "work_asins"("asin");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "work_asins_work_id_idx" ON "work_asins"("work_id");
|
||||||
|
|
||||||
|
-- CreateIndex
|
||||||
|
CREATE INDEX "work_asins_asin_idx" ON "work_asins"("asin");
|
||||||
|
|
||||||
|
-- AddForeignKey
|
||||||
|
ALTER TABLE "work_asins" ADD CONSTRAINT "work_asins_work_id_fkey" FOREIGN KEY ("work_id") REFERENCES "works"("id") ON DELETE CASCADE ON UPDATE CASCADE;
|
||||||
@@ -531,3 +531,43 @@ model GoodreadsBookMapping {
|
|||||||
@@index([audibleAsin])
|
@@index([audibleAsin])
|
||||||
@@map("goodreads_book_mappings")
|
@@map("goodreads_book_mappings")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// WORKS TABLE
|
||||||
|
// Cross-ASIN audiobook identity mapping — links multiple Audible ASINs
|
||||||
|
// to a single logical work for library matching across editions.
|
||||||
|
// Documentation: documentation/integrations/audible.md
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
model Work {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
title String
|
||||||
|
author String
|
||||||
|
createdAt DateTime @default(now()) @map("created_at")
|
||||||
|
updatedAt DateTime @updatedAt @map("updated_at")
|
||||||
|
|
||||||
|
// Relations
|
||||||
|
asins WorkAsin[]
|
||||||
|
|
||||||
|
@@index([title])
|
||||||
|
@@index([author])
|
||||||
|
@@map("works")
|
||||||
|
}
|
||||||
|
|
||||||
|
model WorkAsin {
|
||||||
|
id String @id @default(uuid())
|
||||||
|
workId String @map("work_id")
|
||||||
|
asin String @unique
|
||||||
|
narrator String?
|
||||||
|
durationMinutes Int? @map("duration_minutes")
|
||||||
|
isCanonical Boolean @default(false) @map("is_canonical")
|
||||||
|
source String // 'dedup_auto' | 'admin_manual'
|
||||||
|
createdAt DateTime @default(now()) @map("created_at")
|
||||||
|
|
||||||
|
// Relations
|
||||||
|
work Work @relation(fields: [workId], references: [id], onDelete: Cascade)
|
||||||
|
|
||||||
|
@@index([workId])
|
||||||
|
@@index([asin])
|
||||||
|
@@map("work_asins")
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,6 +6,8 @@
|
|||||||
import { NextRequest, NextResponse } from 'next/server';
|
import { NextRequest, NextResponse } from 'next/server';
|
||||||
import { getAudibleService } from '@/lib/integrations/audible.service';
|
import { getAudibleService } from '@/lib/integrations/audible.service';
|
||||||
import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
|
import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
|
||||||
|
import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
|
||||||
|
import { persistDedupGroups } from '@/lib/services/works.service';
|
||||||
import { getCurrentUser } from '@/lib/middleware/auth';
|
import { getCurrentUser } from '@/lib/middleware/auth';
|
||||||
import { RMABLogger } from '@/lib/utils/logger';
|
import { RMABLogger } from '@/lib/utils/logger';
|
||||||
|
|
||||||
@@ -38,14 +40,22 @@ export async function GET(request: NextRequest) {
|
|||||||
const currentUser = getCurrentUser(request);
|
const currentUser = getCurrentUser(request);
|
||||||
const userId = currentUser?.sub || undefined;
|
const userId = currentUser?.sub || undefined;
|
||||||
|
|
||||||
|
// Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
|
||||||
|
const { books: dedupedResults, groups } = deduplicateAndCollectGroups(results.results);
|
||||||
|
|
||||||
|
// Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
|
||||||
|
if (groups.length > 0) {
|
||||||
|
persistDedupGroups(groups).catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
// Enrich search results with availability and request status information
|
// Enrich search results with availability and request status information
|
||||||
const enrichedResults = await enrichAudiobooksWithMatches(results.results, userId);
|
const enrichedResults = await enrichAudiobooksWithMatches(dedupedResults, userId);
|
||||||
|
|
||||||
return NextResponse.json({
|
return NextResponse.json({
|
||||||
success: true,
|
success: true,
|
||||||
query: results.query,
|
query: results.query,
|
||||||
results: enrichedResults,
|
results: enrichedResults,
|
||||||
totalResults: results.totalResults,
|
totalResults: enrichedResults.length,
|
||||||
page: results.page,
|
page: results.page,
|
||||||
hasMore: results.hasMore,
|
hasMore: results.hasMore,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -6,6 +6,8 @@
|
|||||||
import { NextRequest, NextResponse } from 'next/server';
|
import { NextRequest, NextResponse } from 'next/server';
|
||||||
import { getAudibleService } from '@/lib/integrations/audible.service';
|
import { getAudibleService } from '@/lib/integrations/audible.service';
|
||||||
import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
|
import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
|
||||||
|
import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
|
||||||
|
import { persistDedupGroups } from '@/lib/services/works.service';
|
||||||
import { getCurrentUser } from '@/lib/middleware/auth';
|
import { getCurrentUser } from '@/lib/middleware/auth';
|
||||||
import { RMABLogger } from '@/lib/utils/logger';
|
import { RMABLogger } from '@/lib/utils/logger';
|
||||||
|
|
||||||
@@ -53,9 +55,17 @@ export async function GET(
|
|||||||
const audibleService = getAudibleService();
|
const audibleService = getAudibleService();
|
||||||
const result = await audibleService.searchByAuthorAsin(authorName.trim(), asin, page);
|
const result = await audibleService.searchByAuthorAsin(authorName.trim(), asin, page);
|
||||||
|
|
||||||
|
// Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
|
||||||
|
const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(result.books);
|
||||||
|
|
||||||
|
// Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
|
||||||
|
if (groups.length > 0) {
|
||||||
|
persistDedupGroups(groups).catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
// Enrich with library availability and request status
|
// Enrich with library availability and request status
|
||||||
const userId = currentUser.sub || undefined;
|
const userId = currentUser.sub || undefined;
|
||||||
const enrichedBooks = await enrichAudiobooksWithMatches(result.books, userId);
|
const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);
|
||||||
|
|
||||||
logger.info(`Author books complete: "${authorName}" → ${enrichedBooks.length} books (page ${page})`);
|
logger.info(`Author books complete: "${authorName}" → ${enrichedBooks.length} books (page ${page})`);
|
||||||
|
|
||||||
@@ -64,7 +74,7 @@ export async function GET(
|
|||||||
books: enrichedBooks,
|
books: enrichedBooks,
|
||||||
authorName: authorName.trim(),
|
authorName: authorName.trim(),
|
||||||
authorAsin: asin,
|
authorAsin: asin,
|
||||||
totalBooks: result.totalResults || enrichedBooks.length,
|
totalBooks: enrichedBooks.length,
|
||||||
hasMore: result.hasMore,
|
hasMore: result.hasMore,
|
||||||
page: result.page,
|
page: result.page,
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -8,6 +8,8 @@ import { getCurrentUser } from '@/lib/middleware/auth';
|
|||||||
import { RMABLogger } from '@/lib/utils/logger';
|
import { RMABLogger } from '@/lib/utils/logger';
|
||||||
import { scrapeSeriesPage } from '@/lib/integrations/audible-series';
|
import { scrapeSeriesPage } from '@/lib/integrations/audible-series';
|
||||||
import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
|
import { enrichAudiobooksWithMatches } from '@/lib/utils/audiobook-matcher';
|
||||||
|
import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
|
||||||
|
import { persistDedupGroups } from '@/lib/services/works.service';
|
||||||
|
|
||||||
const logger = RMABLogger.create('API.Series.Detail');
|
const logger = RMABLogger.create('API.Series.Detail');
|
||||||
|
|
||||||
@@ -49,9 +51,17 @@ export async function GET(
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Deduplicate before enrichment to avoid wasted DB queries on duplicate entries
|
||||||
|
const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(detail.books);
|
||||||
|
|
||||||
|
// Fire-and-forget: persist dedup groups to works table for cross-ASIN matching
|
||||||
|
if (groups.length > 0) {
|
||||||
|
persistDedupGroups(groups).catch(() => {});
|
||||||
|
}
|
||||||
|
|
||||||
// Enrich books with library availability and request status
|
// Enrich books with library availability and request status
|
||||||
const userId = currentUser.sub || undefined;
|
const userId = currentUser.sub || undefined;
|
||||||
const enrichedBooks = await enrichAudiobooksWithMatches(detail.books, userId);
|
const enrichedBooks = await enrichAudiobooksWithMatches(dedupedBooks, userId);
|
||||||
|
|
||||||
logger.info(`Series detail complete: "${detail.title}" (${enrichedBooks.length} books, page ${page})`);
|
logger.info(`Series detail complete: "${detail.title}" (${enrichedBooks.length} books, page ${page})`);
|
||||||
|
|
||||||
|
|||||||
@@ -14,8 +14,10 @@ import {
|
|||||||
getLanguageForRegion,
|
getLanguageForRegion,
|
||||||
buildContainsSelector,
|
buildContainsSelector,
|
||||||
stripPrefixes,
|
stripPrefixes,
|
||||||
|
type LanguageConfig,
|
||||||
} from '../constants/language-config';
|
} from '../constants/language-config';
|
||||||
import { RMABLogger } from '../utils/logger';
|
import { RMABLogger } from '../utils/logger';
|
||||||
|
import { parseRuntime } from '../utils/parse-runtime';
|
||||||
import { randomDelay } from '../utils/scrape-resilience';
|
import { randomDelay } from '../utils/scrape-resilience';
|
||||||
|
|
||||||
const logger = RMABLogger.create('Audible.Series');
|
const logger = RMABLogger.create('Audible.Series');
|
||||||
@@ -311,7 +313,7 @@ export async function scrapeSeriesPage(asin: string, page: number = 1): Promise<
|
|||||||
undefined;
|
undefined;
|
||||||
|
|
||||||
// Parse all books from the series page
|
// Parse all books from the series page
|
||||||
const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes);
|
const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes, langConfig);
|
||||||
|
|
||||||
// Use actual book count if we got more from scraping
|
// Use actual book count if we got more from scraping
|
||||||
const bookCount = Math.max(summary.bookCount, books.length);
|
const bookCount = Math.max(summary.bookCount, books.length);
|
||||||
@@ -403,7 +405,8 @@ function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCoun
|
|||||||
function parseSeriesBooks(
|
function parseSeriesBooks(
|
||||||
$: cheerio.CheerioAPI,
|
$: cheerio.CheerioAPI,
|
||||||
authorPrefixes: string[],
|
authorPrefixes: string[],
|
||||||
narratorPrefixes: string[]
|
narratorPrefixes: string[],
|
||||||
|
langConfig: LanguageConfig
|
||||||
): AudibleAudiobook[] {
|
): AudibleAudiobook[] {
|
||||||
const books: AudibleAudiobook[] = [];
|
const books: AudibleAudiobook[] = [];
|
||||||
const seenAsins = new Set<string>();
|
const seenAsins = new Set<string>();
|
||||||
@@ -453,6 +456,11 @@ function parseSeriesBooks(
|
|||||||
const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
|
const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
|
||||||
const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;
|
const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;
|
||||||
|
|
||||||
|
// Duration
|
||||||
|
const runtimeText = $el.find('.runtimeLabel').text().trim() ||
|
||||||
|
$el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
|
||||||
|
const durationMinutes = parseRuntime(runtimeText, langConfig);
|
||||||
|
|
||||||
books.push({
|
books.push({
|
||||||
asin: bookAsin,
|
asin: bookAsin,
|
||||||
title,
|
title,
|
||||||
@@ -461,6 +469,7 @@ function parseSeriesBooks(
|
|||||||
narrator: stripPrefixes(narratorText, narratorPrefixes),
|
narrator: stripPrefixes(narratorText, narratorPrefixes),
|
||||||
coverArtUrl,
|
coverArtUrl,
|
||||||
rating,
|
rating,
|
||||||
|
durationMinutes,
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ import {
|
|||||||
AdaptivePacer,
|
AdaptivePacer,
|
||||||
FetchResultMeta,
|
FetchResultMeta,
|
||||||
} from '../utils/scrape-resilience';
|
} from '../utils/scrape-resilience';
|
||||||
|
import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime';
|
||||||
|
|
||||||
// Module-level logger
|
// Module-level logger
|
||||||
const logger = RMABLogger.create('Audible');
|
const logger = RMABLogger.create('Audible');
|
||||||
@@ -1134,33 +1135,11 @@ export class AudibleService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parse runtime text to minutes using language-specific patterns
|
* Parse runtime text to minutes using language-specific patterns.
|
||||||
|
* Delegates to shared utility in src/lib/utils/parse-runtime.ts.
|
||||||
*/
|
*/
|
||||||
private parseRuntime(runtimeText: string): number | undefined {
|
private parseRuntime(runtimeText: string): number | undefined {
|
||||||
if (!runtimeText) return undefined;
|
return parseRuntimeUtil(runtimeText, this.getLangConfig());
|
||||||
|
|
||||||
const langConfig = this.getLangConfig();
|
|
||||||
let totalMinutes = 0;
|
|
||||||
|
|
||||||
// Try each hour pattern until one matches
|
|
||||||
for (const pattern of langConfig.scraping.runtimeHourPatterns) {
|
|
||||||
const match = runtimeText.match(pattern);
|
|
||||||
if (match) {
|
|
||||||
totalMinutes += parseInt(match[1]) * 60;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try each minute pattern until one matches
|
|
||||||
for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
|
|
||||||
const match = runtimeText.match(pattern);
|
|
||||||
if (match) {
|
|
||||||
totalMinutes += parseInt(match[1]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return totalMinutes > 0 ? totalMinutes : undefined;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import { getJobQueueService } from '@/lib/services/job-queue.service';
|
|||||||
import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
|
import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
|
||||||
import { getAudibleService } from '@/lib/integrations/audible.service';
|
import { getAudibleService } from '@/lib/integrations/audible.service';
|
||||||
import { RMABLogger } from '@/lib/utils/logger';
|
import { RMABLogger } from '@/lib/utils/logger';
|
||||||
|
import { seedAsin } from '@/lib/services/works.service';
|
||||||
|
|
||||||
const logger = RMABLogger.create('RequestCreator');
|
const logger = RMABLogger.create('RequestCreator');
|
||||||
|
|
||||||
@@ -147,6 +148,15 @@ export async function createRequestForUser(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Seed works table for cross-ASIN matching (Layer 2: request-time seeding)
|
||||||
|
seedAsin(
|
||||||
|
audiobook.asin,
|
||||||
|
audiobookRecord.title,
|
||||||
|
audiobookRecord.author,
|
||||||
|
audiobookRecord.narrator || undefined,
|
||||||
|
undefined // duration not available at request time
|
||||||
|
).catch(() => {});
|
||||||
|
|
||||||
// Check if user already has an active request for this audiobook
|
// Check if user already has an active request for this audiobook
|
||||||
const existingRequest = await prisma.request.findFirst({
|
const existingRequest = await prisma.request.findFirst({
|
||||||
where: {
|
where: {
|
||||||
|
|||||||
@@ -0,0 +1,248 @@
|
|||||||
|
/**
|
||||||
|
* Component: Works Service
|
||||||
|
* Documentation: documentation/integrations/audible.md
|
||||||
|
*
|
||||||
|
* Manages the works table — persistent cross-ASIN audiobook identity mapping.
|
||||||
|
* Layer 1: Auto-populated from dedup logic when users browse search/author/series pages.
|
||||||
|
* Layer 2: Seeded at request time to ensure requested ASINs are tracked.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { prisma } from '@/lib/db';
|
||||||
|
import { RMABLogger } from '@/lib/utils/logger';
|
||||||
|
import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
|
||||||
|
|
||||||
|
const logger = RMABLogger.create('WorksService');
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Layer 1: Persist dedup groups (fire-and-forget from API routes)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Persist dedup groups to the works table. For each group of 2+ ASINs that
|
||||||
|
* were identified as the same audiobook, create or update a Work record
|
||||||
|
* linking all ASINs together.
|
||||||
|
*
|
||||||
|
* Safe to call fire-and-forget — never throws.
|
||||||
|
*/
|
||||||
|
export async function persistDedupGroups(groups: DedupGroup[]): Promise<void> {
|
||||||
|
try {
|
||||||
|
for (const group of groups) {
|
||||||
|
await persistSingleGroup(group);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Failed to persist dedup groups', {
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
groupCount: groups.length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Persist a single dedup group. Handles merging when ASINs span multiple
|
||||||
|
* existing works.
|
||||||
|
*/
|
||||||
|
async function persistSingleGroup(group: DedupGroup): Promise<void> {
|
||||||
|
const { canonicalAsin, allAsins, title, author, narrator, durationMinutes } = group;
|
||||||
|
|
||||||
|
// Find which of these ASINs already exist in work_asins
|
||||||
|
const existingEntries = await prisma.workAsin.findMany({
|
||||||
|
where: { asin: { in: allAsins } },
|
||||||
|
select: { asin: true, workId: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Collect unique work IDs that already contain any of our ASINs
|
||||||
|
const existingWorkIds = [...new Set(existingEntries.map(e => e.workId))];
|
||||||
|
const existingAsinSet = new Set(existingEntries.map(e => e.asin));
|
||||||
|
|
||||||
|
if (existingWorkIds.length === 0) {
|
||||||
|
// No existing works — create a new one with all ASINs
|
||||||
|
const work = await prisma.work.create({
|
||||||
|
data: { title, author },
|
||||||
|
});
|
||||||
|
|
||||||
|
await Promise.all(
|
||||||
|
allAsins.map(asin =>
|
||||||
|
prisma.workAsin.create({
|
||||||
|
data: {
|
||||||
|
workId: work.id,
|
||||||
|
asin,
|
||||||
|
narrator: asin === canonicalAsin ? narrator : undefined,
|
||||||
|
durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
|
||||||
|
isCanonical: asin === canonicalAsin,
|
||||||
|
source: 'dedup_auto',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
logger.debug('Created new work', { workId: work.id, asinCount: allAsins.length });
|
||||||
|
} else {
|
||||||
|
// Use the first existing work as the target
|
||||||
|
const targetWorkId = existingWorkIds[0];
|
||||||
|
|
||||||
|
// If multiple existing works, merge them into the target
|
||||||
|
if (existingWorkIds.length > 1) {
|
||||||
|
const mergeWorkIds = existingWorkIds.slice(1);
|
||||||
|
|
||||||
|
// Move all ASINs from other works to the target
|
||||||
|
await prisma.workAsin.updateMany({
|
||||||
|
where: { workId: { in: mergeWorkIds } },
|
||||||
|
data: { workId: targetWorkId },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Delete the now-empty works
|
||||||
|
await prisma.work.deleteMany({
|
||||||
|
where: { id: { in: mergeWorkIds } },
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.debug('Merged works', {
|
||||||
|
targetWorkId,
|
||||||
|
mergedWorkIds: mergeWorkIds,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add any new ASINs that don't already exist
|
||||||
|
const newAsins = allAsins.filter(a => !existingAsinSet.has(a));
|
||||||
|
if (newAsins.length > 0) {
|
||||||
|
await Promise.all(
|
||||||
|
newAsins.map(asin =>
|
||||||
|
prisma.workAsin.create({
|
||||||
|
data: {
|
||||||
|
workId: targetWorkId,
|
||||||
|
asin,
|
||||||
|
narrator: asin === canonicalAsin ? narrator : undefined,
|
||||||
|
durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
|
||||||
|
isCanonical: asin === canonicalAsin,
|
||||||
|
source: 'dedup_auto',
|
||||||
|
},
|
||||||
|
})
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
logger.debug('Added ASINs to existing work', {
|
||||||
|
workId: targetWorkId,
|
||||||
|
newAsinCount: newAsins.length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update canonical status: ensure the canonical ASIN is marked
|
||||||
|
await prisma.workAsin.updateMany({
|
||||||
|
where: { workId: targetWorkId, asin: canonicalAsin },
|
||||||
|
data: { isCanonical: true },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Layer 2: Seed ASIN at request time
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure an ASIN is tracked in the works table. Creates a single-ASIN work
|
||||||
|
* if the ASIN isn't already present. Called at request creation time.
|
||||||
|
*
|
||||||
|
* Safe to call fire-and-forget — never throws.
|
||||||
|
*/
|
||||||
|
export async function seedAsin(
|
||||||
|
asin: string,
|
||||||
|
title: string,
|
||||||
|
author: string,
|
||||||
|
narrator?: string,
|
||||||
|
durationMinutes?: number
|
||||||
|
): Promise<void> {
|
||||||
|
try {
|
||||||
|
// Check if ASIN already tracked
|
||||||
|
const existing = await prisma.workAsin.findUnique({
|
||||||
|
where: { asin },
|
||||||
|
});
|
||||||
|
if (existing) return;
|
||||||
|
|
||||||
|
// Create a new single-ASIN work
|
||||||
|
const work = await prisma.work.create({
|
||||||
|
data: { title, author },
|
||||||
|
});
|
||||||
|
|
||||||
|
await prisma.workAsin.create({
|
||||||
|
data: {
|
||||||
|
workId: work.id,
|
||||||
|
asin,
|
||||||
|
narrator,
|
||||||
|
durationMinutes,
|
||||||
|
isCanonical: true,
|
||||||
|
source: 'dedup_auto',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.debug('Seeded ASIN', { workId: work.id, asin });
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Failed to seed ASIN', {
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
asin,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Sibling ASIN lookup (for library matching expansion)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a list of ASINs, return a map of each input ASIN to its sibling ASINs
|
||||||
|
* (other ASINs in the same work, NOT including the input ASIN itself).
|
||||||
|
*
|
||||||
|
* ASINs not found in the works table are simply omitted from the result.
|
||||||
|
*/
|
||||||
|
export async function getSiblingAsins(
|
||||||
|
asins: string[]
|
||||||
|
): Promise<Map<string, string[]>> {
|
||||||
|
const result = new Map<string, string[]>();
|
||||||
|
if (asins.length === 0) return result;
|
||||||
|
|
||||||
|
// Step 1: Find which input ASINs are in work_asins and their work IDs
|
||||||
|
const inputEntries = await prisma.workAsin.findMany({
|
||||||
|
where: { asin: { in: asins } },
|
||||||
|
select: { asin: true, workId: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (inputEntries.length === 0) return result;
|
||||||
|
|
||||||
|
// Build map of workId -> input ASINs in that work
|
||||||
|
const workIdToInputAsins = new Map<string, string[]>();
|
||||||
|
for (const entry of inputEntries) {
|
||||||
|
const list = workIdToInputAsins.get(entry.workId);
|
||||||
|
if (list) {
|
||||||
|
list.push(entry.asin);
|
||||||
|
} else {
|
||||||
|
workIdToInputAsins.set(entry.workId, [entry.asin]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Get ALL ASINs in those works
|
||||||
|
const workIds = [...workIdToInputAsins.keys()];
|
||||||
|
const allWorkAsins = await prisma.workAsin.findMany({
|
||||||
|
where: { workId: { in: workIds } },
|
||||||
|
select: { asin: true, workId: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Build map of workId -> all ASINs
|
||||||
|
const workIdToAllAsins = new Map<string, string[]>();
|
||||||
|
for (const entry of allWorkAsins) {
|
||||||
|
const list = workIdToAllAsins.get(entry.workId);
|
||||||
|
if (list) {
|
||||||
|
list.push(entry.asin);
|
||||||
|
} else {
|
||||||
|
workIdToAllAsins.set(entry.workId, [entry.asin]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: For each input ASIN, compute siblings (all ASINs in same work minus self)
|
||||||
|
for (const entry of inputEntries) {
|
||||||
|
const allInWork = workIdToAllAsins.get(entry.workId) || [];
|
||||||
|
const siblings = allInWork.filter(a => a !== entry.asin);
|
||||||
|
if (siblings.length > 0) {
|
||||||
|
result.set(entry.asin, siblings);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
@@ -8,6 +8,7 @@
|
|||||||
|
|
||||||
import { prisma } from '@/lib/db';
|
import { prisma } from '@/lib/db';
|
||||||
import { LibraryItem } from '@/lib/services/library';
|
import { LibraryItem } from '@/lib/services/library';
|
||||||
|
import { getSiblingAsins } from '@/lib/services/works.service';
|
||||||
import { RMABLogger } from './logger';
|
import { RMABLogger } from './logger';
|
||||||
|
|
||||||
// Module-level logger
|
// Module-level logger
|
||||||
@@ -178,6 +179,61 @@ export async function enrichAudiobooksWithMatches(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Works-table sibling expansion: check if unmatched ASINs have siblings in the library
|
||||||
|
try {
|
||||||
|
const unmatchedAsins = results.filter(r => !r.isAvailable).map(r => r.asin);
|
||||||
|
if (unmatchedAsins.length > 0) {
|
||||||
|
const siblingMap = await getSiblingAsins(unmatchedAsins);
|
||||||
|
if (siblingMap.size > 0) {
|
||||||
|
// Collect all sibling ASINs for a single batch library query
|
||||||
|
const allSiblingAsins = new Set<string>();
|
||||||
|
for (const siblings of siblingMap.values()) {
|
||||||
|
for (const s of siblings) allSiblingAsins.add(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allSiblingAsins.size > 0) {
|
||||||
|
const siblingLibraryMatches = await prisma.plexLibrary.findMany({
|
||||||
|
where: { asin: { in: [...allSiblingAsins] } },
|
||||||
|
select: { asin: true, plexGuid: true },
|
||||||
|
});
|
||||||
|
const libraryAsinSet = new Set(
|
||||||
|
siblingLibraryMatches.filter(m => m.asin).map(m => m.asin!.toLowerCase())
|
||||||
|
);
|
||||||
|
|
||||||
|
// Update results where a sibling ASIN is found in the library
|
||||||
|
for (const result of results) {
|
||||||
|
if (result.isAvailable) continue;
|
||||||
|
const siblings = siblingMap.get(result.asin);
|
||||||
|
if (!siblings) continue;
|
||||||
|
const matchedSiblingAsin = siblings.find(s => libraryAsinSet.has(s.toLowerCase()));
|
||||||
|
if (matchedSiblingAsin) {
|
||||||
|
const libMatch = siblingLibraryMatches.find(
|
||||||
|
m => m.asin?.toLowerCase() === matchedSiblingAsin.toLowerCase()
|
||||||
|
);
|
||||||
|
(result as any).isAvailable = true;
|
||||||
|
(result as any).plexGuid = libMatch?.plexGuid || null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const siblingMatchCount = results.filter(r => {
|
||||||
|
if (!r.isAvailable) return false;
|
||||||
|
return siblingMap.has(r.asin);
|
||||||
|
}).length;
|
||||||
|
logger.debug('Sibling expansion', {
|
||||||
|
unmatchedCount: unmatchedAsins.length,
|
||||||
|
siblingGroupsFound: siblingMap.size,
|
||||||
|
siblingMatches: siblingMatchCount,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
// Works table expansion is best-effort — direct matches still work
|
||||||
|
logger.error('Sibling ASIN expansion failed', {
|
||||||
|
error: error instanceof Error ? error.message : String(error),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Always enrich with request status (check ANY user's requests)
|
// Always enrich with request status (check ANY user's requests)
|
||||||
const asins = audiobooks.map(book => book.asin);
|
const asins = audiobooks.map(book => book.asin);
|
||||||
|
|
||||||
@@ -307,6 +363,19 @@ export async function getAvailableAsins(): Promise<Set<string>> {
|
|||||||
for (const item of completedRequests) {
|
for (const item of completedRequests) {
|
||||||
if (item.audibleAsin) asins.add(item.audibleAsin);
|
if (item.audibleAsin) asins.add(item.audibleAsin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Expand with works-table sibling ASINs
|
||||||
|
try {
|
||||||
|
if (asins.size > 0) {
|
||||||
|
const siblingMap = await getSiblingAsins([...asins]);
|
||||||
|
for (const siblings of siblingMap.values()) {
|
||||||
|
for (const s of siblings) asins.add(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Works table expansion is best-effort
|
||||||
|
}
|
||||||
|
|
||||||
return asins;
|
return asins;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,201 @@
|
|||||||
|
/**
|
||||||
|
* Component: Audiobook Deduplication Utility
|
||||||
|
* Documentation: documentation/integrations/audible.md
|
||||||
|
*
|
||||||
|
* Deduplicates audiobook listings that represent the same recording
|
||||||
|
* under different ASINs (publisher re-listings, rights transfers, etc.).
|
||||||
|
*
|
||||||
|
* Dedup key: normalized title + normalized narrator
|
||||||
|
* Duration tolerance: max(longerDuration * 0.01, 5) minutes
|
||||||
|
* Missing duration treated as compatible (graceful degradation).
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { AudibleAudiobook } from '../integrations/audible.service';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Title / narrator normalization
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** Patterns in parentheses or brackets to strip (edition markers, format labels) */
|
||||||
|
const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi;
|
||||||
|
|
||||||
|
/** Trailing subtitle after colon or long dash */
|
||||||
|
const SUBTITLE_RE = /\s*[:]\s+.+$/;
|
||||||
|
const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/;
|
||||||
|
|
||||||
|
/** Trailing descriptors like "A Novel", "A Memoir" */
|
||||||
|
const TRAILING_DESCRIPTOR_RE = /\s*[-:,]?\s+a\s+(novel|memoir|thriller|mystery|romance|story|tale|novella)\s*$/i;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Normalize a title for dedup comparison.
|
||||||
|
* Strips subtitles, edition markers, and trailing descriptors.
|
||||||
|
*/
|
||||||
|
export function normalizeTitle(title: string): string {
|
||||||
|
let t = title.toLowerCase();
|
||||||
|
// Remove parenthesized/bracketed edition markers
|
||||||
|
t = t.replace(EDITION_PAREN_RE, '');
|
||||||
|
// Remove trailing descriptors before subtitle stripping
|
||||||
|
t = t.replace(TRAILING_DESCRIPTOR_RE, '');
|
||||||
|
// Remove subtitle after colon
|
||||||
|
t = t.replace(SUBTITLE_RE, '');
|
||||||
|
// Remove subtitle after long dash (but not short hyphenated words)
|
||||||
|
t = t.replace(LONG_DASH_SUBTITLE_RE, '');
|
||||||
|
// Collapse whitespace and trim
|
||||||
|
return t.replace(/\s+/g, ' ').trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Normalize narrator for comparison. */
|
||||||
|
function normalizeNarrator(narrator?: string): string {
|
||||||
|
return (narrator || '').toLowerCase().trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Duration compatibility
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if two durations are compatible (represent the same recording).
|
||||||
|
* Tolerance: max(longerDuration * 0.01, 5) minutes.
|
||||||
|
* Missing duration on either side is treated as compatible.
|
||||||
|
*/
|
||||||
|
export function areDurationsCompatible(a?: number, b?: number): boolean {
|
||||||
|
if (a == null || b == null) return true;
|
||||||
|
const longer = Math.max(a, b);
|
||||||
|
const tolerance = Math.max(longer * 0.01, 5);
|
||||||
|
return Math.abs(a - b) <= tolerance;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Metadata scoring (for picking best representative)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function metadataScore(book: AudibleAudiobook): number {
|
||||||
|
let score = 0;
|
||||||
|
if (book.coverArtUrl) score++;
|
||||||
|
if (book.rating != null) score++;
|
||||||
|
if (book.durationMinutes != null) score++;
|
||||||
|
if (book.description) score++;
|
||||||
|
if (book.narrator) score++;
|
||||||
|
if (book.releaseDate) score++;
|
||||||
|
if (book.genres && book.genres.length > 0) score++;
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Dedup group types (for works-table persistence)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/** Metadata about a group of ASINs that were collapsed during dedup. */
|
||||||
|
export interface DedupGroup {
|
||||||
|
canonicalAsin: string; // ASIN of the "winner" (best metadata score)
|
||||||
|
allAsins: string[]; // All ASINs in this group (including canonical)
|
||||||
|
title: string; // Author from the canonical entry
|
||||||
|
author: string; // Author from the canonical entry
|
||||||
|
narrator?: string; // Narrator from the canonical entry
|
||||||
|
durationMinutes?: number; // Duration from the canonical entry
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Result of deduplication with group collection. */
|
||||||
|
export interface DeduplicateResult {
|
||||||
|
books: AudibleAudiobook[]; // The deduped list (same as deduplicateAudiobooks returns)
|
||||||
|
groups: DedupGroup[]; // Groups where 2+ ASINs were collapsed
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Main dedup functions
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deduplicate audiobook listings by normalized title + narrator + duration.
|
||||||
|
*
|
||||||
|
* Same narrator + compatible duration + similar title = same recording -> collapse.
|
||||||
|
* Different narrator = different production -> keep both.
|
||||||
|
* Duration outside tolerance = different content (abridged vs unabridged) -> keep both.
|
||||||
|
*
|
||||||
|
* Preserves original ordering (position of first appearance).
|
||||||
|
*/
|
||||||
|
export function deduplicateAudiobooks(books: AudibleAudiobook[]): AudibleAudiobook[] {
|
||||||
|
return deduplicateAndCollectGroups(books).books;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deduplicate audiobooks AND return grouping metadata for works-table persistence.
|
||||||
|
* Returns both the deduped list and the groups where 2+ ASINs were collapsed.
|
||||||
|
*/
|
||||||
|
export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): DeduplicateResult {
|
||||||
|
if (books.length <= 1) return { books: [...books], groups: [] };
|
||||||
|
|
||||||
|
// Group by normalized title + narrator
|
||||||
|
const titleNarratorGroups = new Map<string, AudibleAudiobook[]>();
|
||||||
|
const insertionOrder: string[] = [];
|
||||||
|
|
||||||
|
for (const book of books) {
|
||||||
|
const key = `${normalizeTitle(book.title)}|||${normalizeNarrator(book.narrator)}`;
|
||||||
|
const group = titleNarratorGroups.get(key);
|
||||||
|
if (group) {
|
||||||
|
group.push(book);
|
||||||
|
} else {
|
||||||
|
titleNarratorGroups.set(key, [book]);
|
||||||
|
insertionOrder.push(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const result: AudibleAudiobook[] = [];
|
||||||
|
const dedupGroups: DedupGroup[] = [];
|
||||||
|
|
||||||
|
for (const key of insertionOrder) {
|
||||||
|
const group = titleNarratorGroups.get(key)!;
|
||||||
|
if (group.length === 1) {
|
||||||
|
result.push(group[0]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Within a title+narrator group, further split by duration compatibility.
|
||||||
|
// Build sub-groups where all members are duration-compatible with the
|
||||||
|
// representative (first member). A book joins the first compatible sub-group.
|
||||||
|
const subGroups: AudibleAudiobook[][] = [];
|
||||||
|
|
||||||
|
for (const book of group) {
|
||||||
|
let placed = false;
|
||||||
|
for (const sg of subGroups) {
|
||||||
|
// Check compatibility against the representative (first member)
|
||||||
|
if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) {
|
||||||
|
sg.push(book);
|
||||||
|
placed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!placed) {
|
||||||
|
subGroups.push([book]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// From each sub-group, pick the best representative and collect group metadata
|
||||||
|
for (const sg of subGroups) {
|
||||||
|
let best = sg[0];
|
||||||
|
let bestScore = metadataScore(best);
|
||||||
|
for (let i = 1; i < sg.length; i++) {
|
||||||
|
const score = metadataScore(sg[i]);
|
||||||
|
if (score > bestScore) {
|
||||||
|
best = sg[i];
|
||||||
|
bestScore = score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.push(best);
|
||||||
|
|
||||||
|
// Collect group metadata for works-table persistence (only multi-ASIN groups)
|
||||||
|
if (sg.length >= 2) {
|
||||||
|
dedupGroups.push({
|
||||||
|
canonicalAsin: best.asin,
|
||||||
|
allAsins: sg.map(b => b.asin),
|
||||||
|
title: best.title,
|
||||||
|
author: best.author,
|
||||||
|
narrator: best.narrator,
|
||||||
|
durationMinutes: best.durationMinutes,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { books: result, groups: dedupGroups };
|
||||||
|
}
|
||||||
@@ -0,0 +1,44 @@
|
|||||||
|
/**
|
||||||
|
* Component: Runtime Parsing Utility
|
||||||
|
* Documentation: documentation/integrations/audible.md
|
||||||
|
*
|
||||||
|
* Shared runtime/duration text parser extracted from AudibleService.
|
||||||
|
* Handles all i18n patterns (English, German, Spanish, French) via
|
||||||
|
* language-specific regex patterns in LanguageConfig.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { LanguageConfig } from '../constants/language-config';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse runtime text (e.g. "12 hrs and 30 mins", "5 Std. 20 Min.")
|
||||||
|
* into total minutes using language-specific patterns.
|
||||||
|
*
|
||||||
|
* @param runtimeText - Raw runtime string from Audible HTML
|
||||||
|
* @param langConfig - Language configuration with hour/minute regex patterns
|
||||||
|
* @returns Total minutes, or undefined if no duration could be parsed
|
||||||
|
*/
|
||||||
|
export function parseRuntime(runtimeText: string, langConfig: LanguageConfig): number | undefined {
|
||||||
|
if (!runtimeText) return undefined;
|
||||||
|
|
||||||
|
let totalMinutes = 0;
|
||||||
|
|
||||||
|
// Try each hour pattern until one matches
|
||||||
|
for (const pattern of langConfig.scraping.runtimeHourPatterns) {
|
||||||
|
const match = runtimeText.match(pattern);
|
||||||
|
if (match) {
|
||||||
|
totalMinutes += parseInt(match[1]) * 60;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try each minute pattern until one matches
|
||||||
|
for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
|
||||||
|
const match = runtimeText.match(pattern);
|
||||||
|
if (match) {
|
||||||
|
totalMinutes += parseInt(match[1]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalMinutes > 0 ? totalMinutes : undefined;
|
||||||
|
}
|
||||||
@@ -47,6 +47,8 @@ export const createPrismaMock = () => ({
|
|||||||
bookDateSwipe: createModelMock(),
|
bookDateSwipe: createModelMock(),
|
||||||
goodreadsShelf: createModelMock(),
|
goodreadsShelf: createModelMock(),
|
||||||
goodreadsBookMapping: createModelMock(),
|
goodreadsBookMapping: createModelMock(),
|
||||||
|
work: createModelMock(),
|
||||||
|
workAsin: createModelMock(),
|
||||||
$queryRaw: vi.fn(),
|
$queryRaw: vi.fn(),
|
||||||
$disconnect: vi.fn(),
|
$disconnect: vi.fn(),
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -0,0 +1,306 @@
|
|||||||
|
/**
|
||||||
|
* Component: Works Service Tests
|
||||||
|
* Documentation: documentation/integrations/audible.md
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||||
|
import { createPrismaMock } from '../helpers/prisma';
|
||||||
|
import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
|
||||||
|
|
||||||
|
const prismaMock = createPrismaMock();
|
||||||
|
|
||||||
|
vi.mock('@/lib/db', () => ({
|
||||||
|
prisma: prismaMock,
|
||||||
|
}));
|
||||||
|
|
||||||
|
vi.mock('@/lib/utils/logger', () => ({
|
||||||
|
RMABLogger: {
|
||||||
|
create: () => ({
|
||||||
|
debug: vi.fn(),
|
||||||
|
info: vi.fn(),
|
||||||
|
warn: vi.fn(),
|
||||||
|
error: vi.fn(),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
}));
|
||||||
|
|
||||||
|
describe('persistDedupGroups', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
vi.resetModules();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('creates new work + work_asins for a fresh group', async () => {
|
||||||
|
prismaMock.workAsin.findMany.mockResolvedValue([]);
|
||||||
|
prismaMock.work.create.mockResolvedValue({ id: 'work-1' });
|
||||||
|
prismaMock.workAsin.create.mockResolvedValue({});
|
||||||
|
prismaMock.workAsin.updateMany.mockResolvedValue({ count: 0 });
|
||||||
|
|
||||||
|
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const groups: DedupGroup[] = [{
|
||||||
|
canonicalAsin: 'ASIN_A',
|
||||||
|
allAsins: ['ASIN_A', 'ASIN_B'],
|
||||||
|
title: 'Test Book',
|
||||||
|
author: 'Test Author',
|
||||||
|
narrator: 'Test Narrator',
|
||||||
|
durationMinutes: 600,
|
||||||
|
}];
|
||||||
|
|
||||||
|
await persistDedupGroups(groups);
|
||||||
|
|
||||||
|
expect(prismaMock.work.create).toHaveBeenCalledWith({
|
||||||
|
data: { title: 'Test Book', author: 'Test Author' },
|
||||||
|
});
|
||||||
|
expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
|
||||||
|
|
||||||
|
// Canonical ASIN should have narrator, duration, isCanonical=true
|
||||||
|
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||||
|
data: expect.objectContaining({
|
||||||
|
workId: 'work-1',
|
||||||
|
asin: 'ASIN_A',
|
||||||
|
narrator: 'Test Narrator',
|
||||||
|
durationMinutes: 600,
|
||||||
|
isCanonical: true,
|
||||||
|
source: 'dedup_auto',
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
// Non-canonical ASIN should have isCanonical=false
|
||||||
|
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||||
|
data: expect.objectContaining({
|
||||||
|
workId: 'work-1',
|
||||||
|
asin: 'ASIN_B',
|
||||||
|
isCanonical: false,
|
||||||
|
source: 'dedup_auto',
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('adds new ASINs to existing work when canonical already exists', async () => {
|
||||||
|
prismaMock.workAsin.findMany.mockResolvedValue([
|
||||||
|
{ asin: 'ASIN_A', workId: 'existing-work' },
|
||||||
|
]);
|
||||||
|
prismaMock.workAsin.create.mockResolvedValue({});
|
||||||
|
prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
|
||||||
|
|
||||||
|
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const groups: DedupGroup[] = [{
|
||||||
|
canonicalAsin: 'ASIN_A',
|
||||||
|
allAsins: ['ASIN_A', 'ASIN_B', 'ASIN_C'],
|
||||||
|
title: 'Test Book',
|
||||||
|
author: 'Test Author',
|
||||||
|
narrator: 'Narrator',
|
||||||
|
durationMinutes: 500,
|
||||||
|
}];
|
||||||
|
|
||||||
|
await persistDedupGroups(groups);
|
||||||
|
|
||||||
|
// Should NOT create a new work
|
||||||
|
expect(prismaMock.work.create).not.toHaveBeenCalled();
|
||||||
|
|
||||||
|
// Should create entries for ASIN_B and ASIN_C only (ASIN_A already exists)
|
||||||
|
expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
|
||||||
|
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||||
|
data: expect.objectContaining({
|
||||||
|
workId: 'existing-work',
|
||||||
|
asin: 'ASIN_B',
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||||
|
data: expect.objectContaining({
|
||||||
|
workId: 'existing-work',
|
||||||
|
asin: 'ASIN_C',
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('merges two separate works when dedup groups them together', async () => {
|
||||||
|
// ASIN_A is in work-1, ASIN_B is in work-2
|
||||||
|
prismaMock.workAsin.findMany.mockResolvedValue([
|
||||||
|
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||||
|
{ asin: 'ASIN_B', workId: 'work-2' },
|
||||||
|
]);
|
||||||
|
prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
|
||||||
|
prismaMock.work.deleteMany.mockResolvedValue({ count: 1 });
|
||||||
|
|
||||||
|
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const groups: DedupGroup[] = [{
|
||||||
|
canonicalAsin: 'ASIN_A',
|
||||||
|
allAsins: ['ASIN_A', 'ASIN_B'],
|
||||||
|
title: 'Merged Book',
|
||||||
|
author: 'Author',
|
||||||
|
}];
|
||||||
|
|
||||||
|
await persistDedupGroups(groups);
|
||||||
|
|
||||||
|
// Should move work-2 ASINs to work-1
|
||||||
|
expect(prismaMock.workAsin.updateMany).toHaveBeenCalledWith({
|
||||||
|
where: { workId: { in: ['work-2'] } },
|
||||||
|
data: { workId: 'work-1' },
|
||||||
|
});
|
||||||
|
|
||||||
|
// Should delete work-2
|
||||||
|
expect(prismaMock.work.deleteMany).toHaveBeenCalledWith({
|
||||||
|
where: { id: { in: ['work-2'] } },
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('silently catches and logs errors without throwing', async () => {
|
||||||
|
prismaMock.workAsin.findMany.mockRejectedValue(new Error('DB connection failed'));
|
||||||
|
|
||||||
|
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const groups: DedupGroup[] = [{
|
||||||
|
canonicalAsin: 'ASIN_A',
|
||||||
|
allAsins: ['ASIN_A', 'ASIN_B'],
|
||||||
|
title: 'Test',
|
||||||
|
author: 'Auth',
|
||||||
|
}];
|
||||||
|
|
||||||
|
// Should not throw
|
||||||
|
await expect(persistDedupGroups(groups)).resolves.toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('seedAsin', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
vi.resetModules();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('creates single-ASIN work for new ASIN', async () => {
|
||||||
|
prismaMock.workAsin.findUnique.mockResolvedValue(null);
|
||||||
|
prismaMock.work.create.mockResolvedValue({ id: 'new-work' });
|
||||||
|
prismaMock.workAsin.create.mockResolvedValue({});
|
||||||
|
|
||||||
|
const { seedAsin } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
await seedAsin('NEW_ASIN', 'New Book', 'Author', 'Narrator', 300);
|
||||||
|
|
||||||
|
expect(prismaMock.work.create).toHaveBeenCalledWith({
|
||||||
|
data: { title: 'New Book', author: 'Author' },
|
||||||
|
});
|
||||||
|
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||||
|
data: {
|
||||||
|
workId: 'new-work',
|
||||||
|
asin: 'NEW_ASIN',
|
||||||
|
narrator: 'Narrator',
|
||||||
|
durationMinutes: 300,
|
||||||
|
isCanonical: true,
|
||||||
|
source: 'dedup_auto',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does nothing for already-tracked ASIN', async () => {
|
||||||
|
prismaMock.workAsin.findUnique.mockResolvedValue({
|
||||||
|
id: 'existing',
|
||||||
|
asin: 'EXISTING_ASIN',
|
||||||
|
workId: 'work-1',
|
||||||
|
});
|
||||||
|
|
||||||
|
const { seedAsin } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
await seedAsin('EXISTING_ASIN', 'Book', 'Author');
|
||||||
|
|
||||||
|
expect(prismaMock.work.create).not.toHaveBeenCalled();
|
||||||
|
expect(prismaMock.workAsin.create).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('silently catches and logs errors without throwing', async () => {
|
||||||
|
prismaMock.workAsin.findUnique.mockRejectedValue(new Error('DB error'));
|
||||||
|
|
||||||
|
const { seedAsin } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
await expect(seedAsin('ASIN', 'Book', 'Auth')).resolves.toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('getSiblingAsins', () => {
|
||||||
|
beforeEach(() => {
|
||||||
|
vi.clearAllMocks();
|
||||||
|
vi.resetModules();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns sibling ASINs correctly', async () => {
|
||||||
|
// First query: find input ASINs and their work IDs
|
||||||
|
prismaMock.workAsin.findMany
|
||||||
|
.mockResolvedValueOnce([
|
||||||
|
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||||
|
{ asin: 'ASIN_C', workId: 'work-2' },
|
||||||
|
])
|
||||||
|
// Second query: all ASINs in those works
|
||||||
|
.mockResolvedValueOnce([
|
||||||
|
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||||
|
{ asin: 'ASIN_B', workId: 'work-1' },
|
||||||
|
{ asin: 'ASIN_C', workId: 'work-2' },
|
||||||
|
{ asin: 'ASIN_D', workId: 'work-2' },
|
||||||
|
{ asin: 'ASIN_E', workId: 'work-2' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const result = await getSiblingAsins(['ASIN_A', 'ASIN_C']);
|
||||||
|
|
||||||
|
expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
|
||||||
|
expect(result.get('ASIN_C')).toEqual(['ASIN_D', 'ASIN_E']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty map for unknown ASINs', async () => {
|
||||||
|
prismaMock.workAsin.findMany.mockResolvedValue([]);
|
||||||
|
|
||||||
|
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const result = await getSiblingAsins(['UNKNOWN']);
|
||||||
|
|
||||||
|
expect(result.size).toBe(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty map for empty input', async () => {
|
||||||
|
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const result = await getSiblingAsins([]);
|
||||||
|
|
||||||
|
expect(result.size).toBe(0);
|
||||||
|
// Should not query DB
|
||||||
|
expect(prismaMock.workAsin.findMany).not.toHaveBeenCalled();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('excludes the input ASIN itself from siblings', async () => {
|
||||||
|
prismaMock.workAsin.findMany
|
||||||
|
.mockResolvedValueOnce([
|
||||||
|
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||||
|
])
|
||||||
|
.mockResolvedValueOnce([
|
||||||
|
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||||
|
{ asin: 'ASIN_B', workId: 'work-1' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const result = await getSiblingAsins(['ASIN_A']);
|
||||||
|
|
||||||
|
expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
|
||||||
|
expect(result.get('ASIN_A')).not.toContain('ASIN_A');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('omits ASINs with no siblings (single-ASIN works)', async () => {
|
||||||
|
prismaMock.workAsin.findMany
|
||||||
|
.mockResolvedValueOnce([
|
||||||
|
{ asin: 'ASIN_LONELY', workId: 'work-solo' },
|
||||||
|
])
|
||||||
|
.mockResolvedValueOnce([
|
||||||
|
{ asin: 'ASIN_LONELY', workId: 'work-solo' },
|
||||||
|
]);
|
||||||
|
|
||||||
|
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||||
|
|
||||||
|
const result = await getSiblingAsins(['ASIN_LONELY']);
|
||||||
|
|
||||||
|
// No siblings means it shouldn't be in the map at all
|
||||||
|
expect(result.has('ASIN_LONELY')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,434 @@
|
|||||||
|
/**
|
||||||
|
* Component: Audiobook Deduplication Tests
|
||||||
|
* Documentation: documentation/integrations/audible.md
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, expect, it } from 'vitest';
|
||||||
|
import {
|
||||||
|
deduplicateAudiobooks,
|
||||||
|
deduplicateAndCollectGroups,
|
||||||
|
normalizeTitle,
|
||||||
|
areDurationsCompatible,
|
||||||
|
} from '@/lib/utils/deduplicate-audiobooks';
|
||||||
|
import type { AudibleAudiobook } from '@/lib/integrations/audible.service';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Helper: minimal AudibleAudiobook factory
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
function makeBook(overrides: Partial<AudibleAudiobook> & { asin: string; title: string; author: string }): AudibleAudiobook {
|
||||||
|
return {
|
||||||
|
narrator: undefined,
|
||||||
|
coverArtUrl: undefined,
|
||||||
|
durationMinutes: undefined,
|
||||||
|
rating: undefined,
|
||||||
|
description: undefined,
|
||||||
|
releaseDate: undefined,
|
||||||
|
genres: undefined,
|
||||||
|
series: undefined,
|
||||||
|
seriesPart: undefined,
|
||||||
|
seriesAsin: undefined,
|
||||||
|
authorAsin: undefined,
|
||||||
|
...overrides,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// normalizeTitle
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('normalizeTitle', () => {
|
||||||
|
it('lowercases', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips (Unabridged)', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism (Unabridged)')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips [Abridged Edition]', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism [Abridged Edition]')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips (2024 Remastered Edition)', () => {
|
||||||
|
expect(normalizeTitle('The Hobbit (2024 Remastered Edition)')).toBe('the hobbit');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips subtitle after colon', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism: Lightbringer, Book 1')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips subtitle after long dash', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism \u2014 A Lightbringer Novel')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips trailing "A Novel"', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism: A Novel')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips (Audiobook)', () => {
|
||||||
|
expect(normalizeTitle('The Hobbit (Audiobook)')).toBe('the hobbit');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips (Dramatized Adaptation)', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism (Dramatized Adaptation)')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips (Full Cast Narration)', () => {
|
||||||
|
expect(normalizeTitle('The Black Prism (Full Cast Narration)')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses whitespace', () => {
|
||||||
|
expect(normalizeTitle(' The Black Prism ')).toBe('the black prism');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles empty string', () => {
|
||||||
|
expect(normalizeTitle('')).toBe('');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves hyphenated words (not subtitles)', () => {
|
||||||
|
// "well-known" has a short dash, not a subtitle separator
|
||||||
|
expect(normalizeTitle('A Well-Known Book')).toBe('a well-known book');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// areDurationsCompatible
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('areDurationsCompatible', () => {
|
||||||
|
it('returns true when both undefined', () => {
|
||||||
|
expect(areDurationsCompatible(undefined, undefined)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns true when one undefined', () => {
|
||||||
|
expect(areDurationsCompatible(600, undefined)).toBe(true);
|
||||||
|
expect(areDurationsCompatible(undefined, 600)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns true for identical durations', () => {
|
||||||
|
expect(areDurationsCompatible(600, 600)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses 1% of longer duration as tolerance for long books', () => {
|
||||||
|
// Two 40-hour books (2400 min): tolerance = max(2400*0.01, 5) = 24 min
|
||||||
|
expect(areDurationsCompatible(2400, 2424)).toBe(true); // exactly at tolerance
|
||||||
|
expect(areDurationsCompatible(2400, 2425)).toBe(false); // just over
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses 5-minute minimum tolerance for short books', () => {
|
||||||
|
// Two 2-hour books (120 min): tolerance = max(120*0.01, 5) = max(1.2, 5) = 5 min
|
||||||
|
expect(areDurationsCompatible(120, 125)).toBe(true); // exactly at 5-min minimum
|
||||||
|
expect(areDurationsCompatible(120, 126)).toBe(false); // just over
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps abridged vs unabridged separate (large duration gap)', () => {
|
||||||
|
// Unabridged: 720 min (12 hrs), Abridged: 360 min (6 hrs)
|
||||||
|
expect(areDurationsCompatible(720, 360)).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('symmetry: order does not matter', () => {
|
||||||
|
expect(areDurationsCompatible(2400, 2424)).toBe(true);
|
||||||
|
expect(areDurationsCompatible(2424, 2400)).toBe(true);
|
||||||
|
expect(areDurationsCompatible(120, 126)).toBe(false);
|
||||||
|
expect(areDurationsCompatible(126, 120)).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// deduplicateAudiobooks
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('deduplicateAudiobooks', () => {
|
||||||
|
it('returns empty array for empty input', () => {
|
||||||
|
expect(deduplicateAudiobooks([])).toEqual([]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns single book unchanged', () => {
|
||||||
|
const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Author' });
|
||||||
|
expect(deduplicateAudiobooks([book])).toEqual([book]);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('passes through all-unique books unchanged', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
|
||||||
|
makeBook({ asin: 'A3', title: 'Book Three', author: 'Auth', narrator: 'Nar B', durationMinutes: 700 }),
|
||||||
|
];
|
||||||
|
expect(deduplicateAudiobooks(books)).toHaveLength(3);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses simple duplicates (same title + narrator + similar duration)', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps books with different narrators (different production)', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Full Cast', durationMinutes: 480 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps abridged vs unabridged (same narrator, very different duration)', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses when one book has missing duration', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: undefined }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses when both books have missing duration', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses title variants with edition markers', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism (Unabridged)', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1258 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('collapses title variants with subtitles', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('picks the representative with most metadata', () => {
|
||||||
|
const sparse = makeBook({
|
||||||
|
asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks',
|
||||||
|
narrator: 'Simon Vance', durationMinutes: 1260,
|
||||||
|
});
|
||||||
|
const rich = makeBook({
|
||||||
|
asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks',
|
||||||
|
narrator: 'Simon Vance', durationMinutes: 1262,
|
||||||
|
coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
|
||||||
|
});
|
||||||
|
const result = deduplicateAudiobooks([sparse, rich]);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(result[0].asin).toBe('A2'); // rich entry wins
|
||||||
|
});
|
||||||
|
|
||||||
|
it('preserves original order (first-seen position)', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
|
||||||
|
makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 400 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
|
||||||
|
makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(3);
|
||||||
|
expect(result.map(b => b.title)).toEqual(['Alpha', 'Beta', 'Charlie']);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('handles Lightbringer-style scenario: unabridged + dramatized', () => {
|
||||||
|
// Simon Vance full narration (long)
|
||||||
|
const vance1 = makeBook({
|
||||||
|
asin: 'SV1', title: 'The Black Prism', author: 'Brent Weeks',
|
||||||
|
narrator: 'Simon Vance', durationMinutes: 1260,
|
||||||
|
coverArtUrl: 'cover1.jpg', rating: 4.7,
|
||||||
|
});
|
||||||
|
// Re-listed Simon Vance (same duration, different ASIN)
|
||||||
|
const vance2 = makeBook({
|
||||||
|
asin: 'SV2', title: 'The Black Prism: Lightbringer Book 1', author: 'Brent Weeks',
|
||||||
|
narrator: 'Simon Vance', durationMinutes: 1262,
|
||||||
|
});
|
||||||
|
// Dramatized with full cast (shorter, different narrator)
|
||||||
|
const drama = makeBook({
|
||||||
|
asin: 'DR1', title: 'The Black Prism (Dramatized Adaptation)', author: 'Brent Weeks',
|
||||||
|
narrator: 'Full Cast', durationMinutes: 480,
|
||||||
|
coverArtUrl: 'cover-drama.jpg',
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = deduplicateAudiobooks([vance1, vance2, drama]);
|
||||||
|
expect(result).toHaveLength(2);
|
||||||
|
// Simon Vance should collapse to 1, Full Cast stays
|
||||||
|
expect(result.find(b => b.narrator === 'Simon Vance')).toBeTruthy();
|
||||||
|
expect(result.find(b => b.narrator === 'Full Cast')).toBeTruthy();
|
||||||
|
// Should pick the richer entry for Simon Vance
|
||||||
|
const svResult = result.find(b => b.narrator === 'Simon Vance')!;
|
||||||
|
expect(svResult.asin).toBe('SV1'); // has cover + rating
|
||||||
|
});
|
||||||
|
|
||||||
|
it('uses percentage tolerance for very long audiobooks', () => {
|
||||||
|
// Two 40-hour books: tolerance = max(2400*0.01, 5) = 24 min
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2420 }),
|
||||||
|
];
|
||||||
|
expect(deduplicateAudiobooks(books)).toHaveLength(1);
|
||||||
|
|
||||||
|
// Beyond tolerance
|
||||||
|
const booksFar = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2430 }),
|
||||||
|
];
|
||||||
|
expect(deduplicateAudiobooks(booksFar)).toHaveLength(2);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('treats missing narrator as its own group', () => {
|
||||||
|
// Two entries with same title but no narrator - should collapse
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 302 }),
|
||||||
|
];
|
||||||
|
expect(deduplicateAudiobooks(books)).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not collapse empty-narrator with named narrator', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'John Smith', durationMinutes: 302 }),
|
||||||
|
];
|
||||||
|
expect(deduplicateAudiobooks(books)).toHaveLength(2);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// deduplicateAndCollectGroups
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('deduplicateAndCollectGroups', () => {
|
||||||
|
it('returns empty groups array when no duplicates', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
|
||||||
|
];
|
||||||
|
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||||
|
expect(result).toHaveLength(2);
|
||||||
|
expect(groups).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty groups for empty input', () => {
|
||||||
|
const { books: result, groups } = deduplicateAndCollectGroups([]);
|
||||||
|
expect(result).toHaveLength(0);
|
||||||
|
expect(groups).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty groups for single book', () => {
|
||||||
|
const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Auth' });
|
||||||
|
const { books: result, groups } = deduplicateAndCollectGroups([book]);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(groups).toHaveLength(0);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns group with 2 ASINs when 2 books match', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
|
||||||
|
];
|
||||||
|
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(groups).toHaveLength(1);
|
||||||
|
expect(groups[0].allAsins).toHaveLength(2);
|
||||||
|
expect(groups[0].allAsins).toContain('A1');
|
||||||
|
expect(groups[0].allAsins).toContain('A2');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns group with 3+ ASINs for multi-duplicate scenario', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 662 }),
|
||||||
|
makeBook({ asin: 'A3', title: 'The Hobbit (Unabridged)', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 658 }),
|
||||||
|
];
|
||||||
|
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(groups).toHaveLength(1);
|
||||||
|
expect(groups[0].allAsins).toHaveLength(3);
|
||||||
|
expect(groups[0].allAsins).toContain('A1');
|
||||||
|
expect(groups[0].allAsins).toContain('A2');
|
||||||
|
expect(groups[0].allAsins).toContain('A3');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('canonicalAsin is the one with highest metadata score', () => {
|
||||||
|
const sparse = makeBook({
|
||||||
|
asin: 'SPARSE', title: 'The Black Prism', author: 'Brent Weeks',
|
||||||
|
narrator: 'Simon Vance', durationMinutes: 1260,
|
||||||
|
});
|
||||||
|
const rich = makeBook({
|
||||||
|
asin: 'RICH', title: 'The Black Prism', author: 'Brent Weeks',
|
||||||
|
narrator: 'Simon Vance', durationMinutes: 1262,
|
||||||
|
coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
|
||||||
|
});
|
||||||
|
const { groups } = deduplicateAndCollectGroups([sparse, rich]);
|
||||||
|
expect(groups).toHaveLength(1);
|
||||||
|
expect(groups[0].canonicalAsin).toBe('RICH');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('groups only include entries with 2+ ASINs', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
|
||||||
|
makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
|
||||||
|
];
|
||||||
|
const { groups } = deduplicateAndCollectGroups(books);
|
||||||
|
// Only Alpha group should appear (Beta is a singleton)
|
||||||
|
expect(groups).toHaveLength(1);
|
||||||
|
expect(groups[0].allAsins).toContain('A1');
|
||||||
|
expect(groups[0].allAsins).toContain('A2');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('duration-incompatible books produce separate entries (no group for singletons)', () => {
|
||||||
|
// Same title/narrator but very different durations (abridged vs unabridged)
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
|
||||||
|
];
|
||||||
|
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||||
|
expect(result).toHaveLength(2); // Not collapsed
|
||||||
|
expect(groups).toHaveLength(0); // No multi-ASIN groups
|
||||||
|
});
|
||||||
|
|
||||||
|
it('books field matches what deduplicateAudiobooks returns', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300, coverArtUrl: 'img.jpg', rating: 4.5 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
|
||||||
|
makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
|
||||||
|
makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 600 }),
|
||||||
|
makeBook({ asin: 'C2', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 601 }),
|
||||||
|
];
|
||||||
|
const dedupOnly = deduplicateAudiobooks(books);
|
||||||
|
const { books: withGroups } = deduplicateAndCollectGroups(books);
|
||||||
|
expect(withGroups.map(b => b.asin)).toEqual(dedupOnly.map(b => b.asin));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('includes narrator and durationMinutes from canonical entry in group', () => {
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 480 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 482, coverArtUrl: 'img.jpg', rating: 4.0 }),
|
||||||
|
];
|
||||||
|
const { groups } = deduplicateAndCollectGroups(books);
|
||||||
|
expect(groups).toHaveLength(1);
|
||||||
|
expect(groups[0].canonicalAsin).toBe('A2'); // richer metadata
|
||||||
|
expect(groups[0].narrator).toBe('Jane Doe');
|
||||||
|
expect(groups[0].durationMinutes).toBe(482);
|
||||||
|
expect(groups[0].author).toBe('Auth');
|
||||||
|
});
|
||||||
|
});
|
||||||
Reference in New Issue
Block a user