Add works table and ASIN deduping

Add persistent cross-ASIN "works" mapping and client-side deduplication to improve library matching. Introduces a Prisma migration and models (Work, WorkAsin) plus src/lib/services/works.service for persisting dedup groups, seeding ASINs at request time, and sibling lookup. Adds a deduplication utility (deduplicate-audiobooks) that normalizes titles/narrators, compares durations, and returns grouping metadata; API routes (search, author, series) now deduplicate results before enrichment and fire-and-forget persist groups. Adds sibling-ASIN expansion into audiobook matcher and expands getAvailableAsins accordingly. Extracts runtime parsing into a shared parse-runtime util and updates audible scrapers/services to use it. Includes unit tests for dedup logic and works service and updates test Prisma mocks.
This commit is contained in:
kikootwo
2026-03-03 13:31:46 -05:00
parent ff80d995c5
commit 610873af6b
15 changed files with 1446 additions and 32 deletions
+11 -2
View File
@@ -14,8 +14,10 @@ import {
getLanguageForRegion,
buildContainsSelector,
stripPrefixes,
type LanguageConfig,
} from '../constants/language-config';
import { RMABLogger } from '../utils/logger';
import { parseRuntime } from '../utils/parse-runtime';
import { randomDelay } from '../utils/scrape-resilience';
const logger = RMABLogger.create('Audible.Series');
@@ -311,7 +313,7 @@ export async function scrapeSeriesPage(asin: string, page: number = 1): Promise<
undefined;
// Parse all books from the series page
const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes);
const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes, langConfig);
// Use actual book count if we got more from scraping
const bookCount = Math.max(summary.bookCount, books.length);
@@ -403,7 +405,8 @@ function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCoun
function parseSeriesBooks(
$: cheerio.CheerioAPI,
authorPrefixes: string[],
narratorPrefixes: string[]
narratorPrefixes: string[],
langConfig: LanguageConfig
): AudibleAudiobook[] {
const books: AudibleAudiobook[] = [];
const seenAsins = new Set<string>();
@@ -453,6 +456,11 @@ function parseSeriesBooks(
const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;
// Duration
const runtimeText = $el.find('.runtimeLabel').text().trim() ||
$el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
const durationMinutes = parseRuntime(runtimeText, langConfig);
books.push({
asin: bookAsin,
title,
@@ -461,6 +469,7 @@ function parseSeriesBooks(
narrator: stripPrefixes(narratorText, narratorPrefixes),
coverArtUrl,
rating,
durationMinutes,
});
});
+4 -25
View File
@@ -23,6 +23,7 @@ import {
AdaptivePacer,
FetchResultMeta,
} from '../utils/scrape-resilience';
import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime';
// Module-level logger
const logger = RMABLogger.create('Audible');
@@ -1134,33 +1135,11 @@ export class AudibleService {
}
/**
* Parse runtime text to minutes using language-specific patterns
* Parse runtime text to minutes using language-specific patterns.
* Delegates to shared utility in src/lib/utils/parse-runtime.ts.
*/
private parseRuntime(runtimeText: string): number | undefined {
if (!runtimeText) return undefined;
const langConfig = this.getLangConfig();
let totalMinutes = 0;
// Try each hour pattern until one matches
for (const pattern of langConfig.scraping.runtimeHourPatterns) {
const match = runtimeText.match(pattern);
if (match) {
totalMinutes += parseInt(match[1]) * 60;
break;
}
}
// Try each minute pattern until one matches
for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
const match = runtimeText.match(pattern);
if (match) {
totalMinutes += parseInt(match[1]);
break;
}
}
return totalMinutes > 0 ? totalMinutes : undefined;
return parseRuntimeUtil(runtimeText, this.getLangConfig());
}
/**