Merge branch 'main' into feature/per-user-api-tokens

This commit is contained in:
kikootwo
2026-03-04 13:26:57 -05:00
committed by GitHub
50 changed files with 4728 additions and 434 deletions
+4 -3
View File
@@ -35,11 +35,12 @@ export interface Audiobook {
hasReportedIssue?: boolean; // True if an open issue exists for this audiobook
}
export function useAudiobooks(type: 'popular' | 'new-releases', limit: number = 20, page: number = 1) {
export function useAudiobooks(type: 'popular' | 'new-releases', limit: number = 20, page: number = 1, hideAvailable: boolean = false) {
const hideParam = hideAvailable ? '&hideAvailable=true' : '';
const endpoint =
type === 'popular'
? `/api/audiobooks/popular?page=${page}&limit=${limit}`
: `/api/audiobooks/new-releases?page=${page}&limit=${limit}`;
? `/api/audiobooks/popular?page=${page}&limit=${limit}${hideParam}`
: `/api/audiobooks/new-releases?page=${page}&limit=${limit}${hideParam}`;
const { data, error, isLoading } = useSWR(endpoint, authenticatedFetcher, {
revalidateOnFocus: false,
+119
View File
@@ -0,0 +1,119 @@
/**
* Component: Watched Authors Hook
* Documentation: documentation/features/watched-lists.md
*/
'use client';
import { useState } from 'react';
import useSWR, { mutate } from 'swr';
import { useAuth } from '@/contexts/AuthContext';
import { fetchWithAuth } from '@/lib/utils/api';
export interface WatchedAuthorItem {
id: string;
authorAsin: string;
authorName: string;
coverArtUrl: string | null;
lastCheckedAt: string | null;
createdAt: string;
}
const fetcher = (url: string) =>
fetchWithAuth(url).then((res) => res.json());
export function useWatchedAuthors() {
const { accessToken } = useAuth();
const endpoint = accessToken ? '/api/user/watched-authors' : null;
const { data, error, isLoading } = useSWR(
endpoint,
fetcher,
{ refreshInterval: 60000 }
);
return {
authors: (data?.authors || []) as WatchedAuthorItem[],
isLoading,
error,
};
}
export function useAddWatchedAuthor() {
const { accessToken } = useAuth();
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const addAuthor = async (authorAsin: string, authorName: string, coverArtUrl?: string) => {
if (!accessToken) throw new Error('Not authenticated');
setIsLoading(true);
setError(null);
try {
const response = await fetchWithAuth('/api/user/watched-authors', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ authorAsin, authorName, coverArtUrl }),
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.message || data.error || 'Failed to watch author');
}
// Revalidate watched authors list
mutate((key) => typeof key === 'string' && key.includes('/api/user/watched-authors'));
return data.author as WatchedAuthorItem;
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
setError(message);
throw err;
} finally {
setIsLoading(false);
}
};
return { addAuthor, isLoading, error };
}
export function useDeleteWatchedAuthor() {
const { accessToken } = useAuth();
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const deleteAuthor = async (id: string) => {
if (!accessToken) throw new Error('Not authenticated');
setIsLoading(true);
setError(null);
try {
const response = await fetchWithAuth(`/api/user/watched-authors/${id}`, {
method: 'DELETE',
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.message || data.error || 'Failed to unwatch author');
}
// Revalidate watched authors list
mutate((key) => typeof key === 'string' && key.includes('/api/user/watched-authors'));
return true;
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
setError(message);
throw err;
} finally {
setIsLoading(false);
}
};
return { deleteAuthor, isLoading, error };
}
+119
View File
@@ -0,0 +1,119 @@
/**
* Component: Watched Series Hook
* Documentation: documentation/features/watched-lists.md
*/
'use client';
import { useState } from 'react';
import useSWR, { mutate } from 'swr';
import { useAuth } from '@/contexts/AuthContext';
import { fetchWithAuth } from '@/lib/utils/api';
export interface WatchedSeriesItem {
id: string;
seriesAsin: string;
seriesTitle: string;
coverArtUrl: string | null;
lastCheckedAt: string | null;
createdAt: string;
}
const fetcher = (url: string) =>
fetchWithAuth(url).then((res) => res.json());
export function useWatchedSeries() {
const { accessToken } = useAuth();
const endpoint = accessToken ? '/api/user/watched-series' : null;
const { data, error, isLoading } = useSWR(
endpoint,
fetcher,
{ refreshInterval: 60000 }
);
return {
series: (data?.series || []) as WatchedSeriesItem[],
isLoading,
error,
};
}
export function useAddWatchedSeries() {
const { accessToken } = useAuth();
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const addSeries = async (seriesAsin: string, seriesTitle: string, coverArtUrl?: string) => {
if (!accessToken) throw new Error('Not authenticated');
setIsLoading(true);
setError(null);
try {
const response = await fetchWithAuth('/api/user/watched-series', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ seriesAsin, seriesTitle, coverArtUrl }),
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.message || data.error || 'Failed to watch series');
}
// Revalidate watched series list
mutate((key) => typeof key === 'string' && key.includes('/api/user/watched-series'));
return data.series as WatchedSeriesItem;
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
setError(message);
throw err;
} finally {
setIsLoading(false);
}
};
return { addSeries, isLoading, error };
}
export function useDeleteWatchedSeries() {
const { accessToken } = useAuth();
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const deleteSeries = async (id: string) => {
if (!accessToken) throw new Error('Not authenticated');
setIsLoading(true);
setError(null);
try {
const response = await fetchWithAuth(`/api/user/watched-series/${id}`, {
method: 'DELETE',
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.message || data.error || 'Failed to unwatch series');
}
// Revalidate watched series list
mutate((key) => typeof key === 'string' && key.includes('/api/user/watched-series'));
return true;
} catch (err) {
const message = err instanceof Error ? err.message : 'Unknown error';
setError(message);
throw err;
} finally {
setIsLoading(false);
}
};
return { deleteSeries, isLoading, error };
}
+11 -2
View File
@@ -14,8 +14,10 @@ import {
getLanguageForRegion,
buildContainsSelector,
stripPrefixes,
type LanguageConfig,
} from '../constants/language-config';
import { RMABLogger } from '../utils/logger';
import { parseRuntime } from '../utils/parse-runtime';
import { randomDelay } from '../utils/scrape-resilience';
const logger = RMABLogger.create('Audible.Series');
@@ -311,7 +313,7 @@ export async function scrapeSeriesPage(asin: string, page: number = 1): Promise<
undefined;
// Parse all books from the series page
const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes);
const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes, langConfig);
// Use actual book count if we got more from scraping
const bookCount = Math.max(summary.bookCount, books.length);
@@ -403,7 +405,8 @@ function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCoun
function parseSeriesBooks(
$: cheerio.CheerioAPI,
authorPrefixes: string[],
narratorPrefixes: string[]
narratorPrefixes: string[],
langConfig: LanguageConfig
): AudibleAudiobook[] {
const books: AudibleAudiobook[] = [];
const seenAsins = new Set<string>();
@@ -453,6 +456,11 @@ function parseSeriesBooks(
const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;
// Duration
const runtimeText = $el.find('.runtimeLabel').text().trim() ||
$el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
const durationMinutes = parseRuntime(runtimeText, langConfig);
books.push({
asin: bookAsin,
title,
@@ -461,6 +469,7 @@ function parseSeriesBooks(
narrator: stripPrefixes(narratorText, narratorPrefixes),
coverArtUrl,
rating,
durationMinutes,
});
});
+4 -25
View File
@@ -23,6 +23,7 @@ import {
AdaptivePacer,
FetchResultMeta,
} from '../utils/scrape-resilience';
import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime';
// Module-level logger
const logger = RMABLogger.create('Audible');
@@ -1134,33 +1135,11 @@ export class AudibleService {
}
/**
* Parse runtime text to minutes using language-specific patterns
* Parse runtime text to minutes using language-specific patterns.
* Delegates to shared utility in src/lib/utils/parse-runtime.ts.
*/
private parseRuntime(runtimeText: string): number | undefined {
if (!runtimeText) return undefined;
const langConfig = this.getLangConfig();
let totalMinutes = 0;
// Try each hour pattern until one matches
for (const pattern of langConfig.scraping.runtimeHourPatterns) {
const match = runtimeText.match(pattern);
if (match) {
totalMinutes += parseInt(match[1]) * 60;
break;
}
}
// Try each minute pattern until one matches
for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
const match = runtimeText.match(pattern);
if (match) {
totalMinutes += parseInt(match[1]);
break;
}
}
return totalMinutes > 0 ? totalMinutes : undefined;
return parseRuntimeUtil(runtimeText, this.getLangConfig());
}
/**
@@ -0,0 +1,43 @@
/**
* Component: Check Watched Lists Processor
* Documentation: documentation/features/watched-lists.md
*
* Dedicated processor for checking watched series and watched authors
* for new releases and auto-creating requests.
* Supports targeted processing of a single series/author for immediate sync.
*/
import { RMABLogger } from '../utils/logger';
export interface CheckWatchedListsPayload {
jobId?: string;
scheduledJobId?: string;
/** If set, only process watched items for this user */
userId?: string;
/** If set, only process this specific series */
seriesAsin?: string;
/** If set, only process this specific author */
authorAsin?: string;
}
export async function processCheckWatchedLists(payload: CheckWatchedListsPayload): Promise<any> {
const { jobId, userId, seriesAsin, authorAsin } = payload;
const logger = RMABLogger.forJob(jobId, 'CheckWatchedLists');
const isTargeted = !!(userId && (seriesAsin || authorAsin));
logger.info(isTargeted
? `Starting targeted watched lists check (user: ${userId}, series: ${seriesAsin || 'n/a'}, author: ${authorAsin || 'n/a'})...`
: 'Starting watched lists check...'
);
const { processWatchedLists } = await import('../services/watched-lists.service');
const stats = await processWatchedLists(logger, { userId, seriesAsin, authorAsin });
logger.info('Watched lists check complete', { stats });
return {
success: true,
message: isTargeted ? 'Targeted watched item checked' : 'Watched lists checked',
...stats,
};
}
+109 -3
View File
@@ -15,6 +15,7 @@ import { PathMapper, PathMappingConfig } from '../utils/path-mapper';
import { generateFilesHash } from '../utils/files-hash';
import { fixEpubForKindle, cleanupFixedEpub } from '../utils/epub-fixer';
import { removeEmptyParentDirectories } from '../utils/cleanup-helpers';
import { getAudibleService } from '../integrations/audible.service';
/**
* Process organize files job
@@ -118,7 +119,62 @@ export async function processOrganizeFiles(payload: OrganizeFilesPayload): Promi
}
}
logger.info(`Final metadata for path organization: year=${year || 'null'}, narrator=${narrator || 'null'}`)
// Enrich missing series data from Audnexus (safety net for records created without series)
let series = audiobook.series || undefined;
let seriesPart = audiobook.seriesPart || undefined;
if (audiobook.audibleAsin && !series) {
try {
logger.info(`Missing series data, fetching from Audnexus for ASIN: ${audiobook.audibleAsin}`);
const audibleService = getAudibleService();
const audnexusData = await audibleService.getAudiobookDetails(audiobook.audibleAsin);
if (audnexusData) {
const updates: Record<string, any> = {};
if (audnexusData.series) {
series = audnexusData.series;
updates.series = series;
logger.info(`Got series "${series}" from Audnexus`);
}
if (audnexusData.seriesPart) {
seriesPart = audnexusData.seriesPart;
updates.seriesPart = seriesPart;
logger.info(`Got seriesPart "${seriesPart}" from Audnexus`);
}
if (audnexusData.seriesAsin) {
updates.seriesAsin = audnexusData.seriesAsin;
}
// Also backfill year/narrator if still missing
if (!year && audnexusData.releaseDate) {
const releaseYear = new Date(audnexusData.releaseDate).getFullYear();
if (!isNaN(releaseYear)) {
year = releaseYear;
updates.year = year;
logger.info(`Got year ${year} from Audnexus`);
}
}
if (!narrator && audnexusData.narrator) {
narrator = audnexusData.narrator;
updates.narrator = narrator;
logger.info(`Got narrator "${narrator}" from Audnexus`);
}
if (Object.keys(updates).length > 0) {
await prisma.audiobook.update({
where: { id: audiobookId },
data: updates,
});
logger.info(`Updated audiobook record with Audnexus metadata`);
}
}
} catch (error) {
// Non-fatal: missing series won't block organization, just degrades path quality
logger.warn(`Failed to fetch Audnexus data for ASIN ${audiobook.audibleAsin}: ${error instanceof Error ? error.message : String(error)}`);
}
}
logger.info(`Final metadata for path organization: year=${year || 'null'}, narrator=${narrator || 'null'}, series=${series || 'null'}, seriesPart=${seriesPart || 'null'}`);
// Get file organizer (reads media_dir from database config)
const organizer = await getFileOrganizer();
@@ -151,8 +207,8 @@ export async function processOrganizeFiles(payload: OrganizeFilesPayload): Promi
coverArtUrl: audiobook.coverArtUrl || undefined,
asin: audiobook.audibleAsin || undefined,
year,
series: audiobook.series || undefined,
seriesPart: audiobook.seriesPart || undefined,
series,
seriesPart,
},
template,
jobId ? { jobId, context: 'FileOrganizer' } : undefined,
@@ -545,6 +601,56 @@ async function processEbookOrganization(
}
}
// Enrich missing series data from Audnexus (safety net for records created without series)
if (book.audibleAsin && !series) {
try {
logger.info(`Missing series data for ebook, fetching from Audnexus for ASIN: ${book.audibleAsin}`);
const audibleService = getAudibleService();
const audnexusData = await audibleService.getAudiobookDetails(book.audibleAsin);
if (audnexusData) {
const updates: Record<string, any> = {};
if (audnexusData.series) {
series = audnexusData.series;
updates.series = series;
logger.info(`Got series "${series}" from Audnexus`);
}
if (audnexusData.seriesPart) {
seriesPart = audnexusData.seriesPart;
updates.seriesPart = seriesPart;
logger.info(`Got seriesPart "${seriesPart}" from Audnexus`);
}
if (audnexusData.seriesAsin) {
updates.seriesAsin = audnexusData.seriesAsin;
}
if (!year && audnexusData.releaseDate) {
const releaseYear = new Date(audnexusData.releaseDate).getFullYear();
if (!isNaN(releaseYear)) {
year = releaseYear;
updates.year = year;
logger.info(`Got year ${year} from Audnexus`);
}
}
if (!narrator && audnexusData.narrator) {
narrator = audnexusData.narrator;
updates.narrator = narrator;
logger.info(`Got narrator "${narrator}" from Audnexus`);
}
if (Object.keys(updates).length > 0) {
await prisma.audiobook.update({
where: { id: audiobookId },
data: updates,
});
logger.info(`Updated book record with Audnexus metadata`);
}
}
} catch (error) {
logger.warn(`Failed to fetch Audnexus data for ASIN ${book.audibleAsin}: ${error instanceof Error ? error.message : String(error)}`);
}
}
logger.info(`Final metadata for path organization: year=${year || 'null'}, narrator=${narrator || 'null'}, series=${series || 'null'}, seriesPart=${seriesPart || 'null'}`);
// Check if this is an indexer download (needs to keep source for seeding)
+8 -5
View File
@@ -54,10 +54,12 @@ export class LocalAuthProvider implements IAuthProvider {
return { success: false, error: 'Username and password required' };
}
const normalizedUsername = username.trim().toLowerCase();
// Find user (exclude soft-deleted users)
const user = await prisma.user.findFirst({
where: {
plexUsername: username,
plexUsername: normalizedUsername,
authProvider: 'local',
deletedAt: null, // Exclude soft-deleted users
},
@@ -144,9 +146,10 @@ export class LocalAuthProvider implements IAuthProvider {
async register(params: RegisterParams): Promise<AuthResult> {
try {
const { username, password } = params;
const normalizedUsername = username?.trim().toLowerCase();
// Validate
if (!username || username.length < 3) {
if (!normalizedUsername || normalizedUsername.length < 3) {
return { success: false, error: 'Username must be at least 3 characters' };
}
@@ -167,7 +170,7 @@ export class LocalAuthProvider implements IAuthProvider {
// Check username uniqueness (only among non-deleted users)
const existing = await prisma.user.findFirst({
where: {
plexUsername: username,
plexUsername: normalizedUsername,
authProvider: 'local',
deletedAt: null, // Allow reuse of usernames from deleted accounts
},
@@ -194,8 +197,8 @@ export class LocalAuthProvider implements IAuthProvider {
// Create user
const user = await prisma.user.create({
data: {
plexId: `local-${username}`,
plexUsername: username,
plexId: `local-${normalizedUsername}`,
plexUsername: normalizedUsername,
authToken: encryptedHash,
authProvider: 'local',
role: isFirstUser ? 'admin' : 'user',
+50
View File
@@ -27,6 +27,7 @@ export type JobType =
| 'cleanup_seeded_torrents'
| 'monitor_rss_feeds'
| 'sync_goodreads_shelves'
| 'check_watched_lists'
| 'send_notification'
// Ebook-specific job types
| 'search_ebook'
@@ -113,6 +114,16 @@ export interface SyncGoodreadsShelvesPayload extends JobPayload {
maxLookupsPerShelf?: number;
}
export interface CheckWatchedListsPayload extends JobPayload {
scheduledJobId?: string;
/** If set, only process watched items for this user */
userId?: string;
/** If set, only process this specific series */
seriesAsin?: string;
/** If set, only process this specific author */
authorAsin?: string;
}
// Ebook-specific payload interfaces
export interface SearchEbookPayload extends JobPayload {
requestId: string;
@@ -384,6 +395,12 @@ export class JobQueueService {
return await processSyncGoodreadsShelves(payloadWithJobId);
});
this.queue.process('check_watched_lists', 1, async (job: BullJob<CheckWatchedListsPayload>) => {
const { processCheckWatchedLists } = await import('../processors/check-watched-lists.processor');
const payloadWithJobId = await this.ensureJobRecord(job, 'check_watched_lists');
return await processCheckWatchedLists(payloadWithJobId);
});
// Send notification processor
this.queue.process('send_notification', 2, async (job: BullJob<SendNotificationPayload>) => {
const { processSendNotification } = await import('../processors/send-notification.processor');
@@ -766,6 +783,39 @@ export class JobQueueService {
);
}
/**
* Add check watched lists job (watched series + watched authors)
*/
async addCheckWatchedListsJob(scheduledJobId?: string): Promise<string> {
return await this.addJob(
'check_watched_lists',
{
scheduledJobId,
} as CheckWatchedListsPayload,
{
priority: 7,
}
);
}
/**
* Add a targeted check for a specific watched series or author for a specific user.
* Used for immediate processing when a user adds a new watch.
*/
async addCheckWatchedItemJob(userId: string, seriesAsin?: string, authorAsin?: string): Promise<string> {
return await this.addJob(
'check_watched_lists',
{
userId,
seriesAsin,
authorAsin,
} as CheckWatchedListsPayload,
{
priority: 8, // Higher than scheduled (7) since user-initiated
}
);
}
// =========================================================================
// EBOOK-SPECIFIC JOB METHODS
// =========================================================================
@@ -12,6 +12,7 @@ import { getJobQueueService } from '@/lib/services/job-queue.service';
import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
import { getAudibleService } from '@/lib/integrations/audible.service';
import { RMABLogger } from '@/lib/utils/logger';
import { seedAsin } from '@/lib/services/works.service';
const logger = RMABLogger.create('RequestCreator');
@@ -147,6 +148,15 @@ export async function createRequestForUser(
}
}
// Seed works table for cross-ASIN matching (Layer 2: request-time seeding)
seedAsin(
audiobook.asin,
audiobookRecord.title,
audiobookRecord.author,
audiobookRecord.narrator || undefined,
undefined // duration not available at request time
).catch(() => {});
// Check if user already has an active request for this audiobook
const existingRequest = await prisma.request.findFirst({
where: {
+18 -1
View File
@@ -10,7 +10,7 @@ import { RMABLogger } from '../utils/logger';
const logger = RMABLogger.create('Scheduler');
export type ScheduledJobType = 'plex_library_scan' | 'plex_recently_added_check' | 'audible_refresh' | 'retry_missing_torrents' | 'retry_failed_imports' | 'cleanup_seeded_torrents' | 'monitor_rss_feeds' | 'sync_goodreads_shelves';
export type ScheduledJobType = 'plex_library_scan' | 'plex_recently_added_check' | 'audible_refresh' | 'retry_missing_torrents' | 'retry_failed_imports' | 'cleanup_seeded_torrents' | 'monitor_rss_feeds' | 'sync_goodreads_shelves' | 'check_watched_lists';
export interface ScheduledJob {
id: string;
@@ -133,6 +133,13 @@ export class SchedulerService {
enabled: true, // Enable by default
payload: {},
},
{
name: 'Check Watched Lists',
type: 'check_watched_lists' as ScheduledJobType,
schedule: '0 0 * * *', // Daily at midnight (every 24 hours)
enabled: true, // Enable by default
payload: {},
},
];
let created = 0;
@@ -353,6 +360,9 @@ export class SchedulerService {
case 'sync_goodreads_shelves':
bullJobId = await this.triggerSyncGoodreadsShelves(job);
break;
case 'check_watched_lists':
bullJobId = await this.triggerCheckWatchedLists(job);
break;
default:
throw new Error(`Unknown job type: ${job.type}`);
}
@@ -627,6 +637,13 @@ export class SchedulerService {
private async triggerSyncGoodreadsShelves(job: any): Promise<string> {
return await this.jobQueue.addSyncGoodreadsShelvesJob(job.id);
}
/**
* Trigger watched lists check (watched series + watched authors)
*/
private async triggerCheckWatchedLists(job: any): Promise<string> {
return await this.jobQueue.addCheckWatchedListsJob(job.id);
}
}
// Singleton instance
+414
View File
@@ -0,0 +1,414 @@
/**
* Component: Watched Lists Service
* Documentation: documentation/features/watched-lists.md
*
* Checks watched series and watched authors for new releases.
* Deduplicates results using the works table, checks against user's library,
* and auto-creates requests via the shared request-creator service.
* Follows the same pattern as goodreads-sync.service.ts.
*/
import { prisma } from '@/lib/db';
import { getAudibleService, AudibleAudiobook } from '@/lib/integrations/audible.service';
import { scrapeSeriesPage } from '@/lib/integrations/audible-series';
import { deduplicateAndCollectGroups } from '@/lib/utils/deduplicate-audiobooks';
import { persistDedupGroups } from '@/lib/services/works.service';
import { createRequestForUser } from '@/lib/services/request-creator.service';
import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
import { getSiblingAsins } from '@/lib/services/works.service';
import { RMABLogger } from '@/lib/utils/logger';
const logger = RMABLogger.create('WatchedLists');
/** Max books to process per series (avoid excessively long runs) */
const MAX_BOOKS_PER_SERIES = 200;
/** Max author book pages to scrape */
const MAX_AUTHOR_PAGES = 4;
/** Delay between scrapes to avoid rate limiting (ms) */
const SCRAPE_DELAY_MS = 2000;
export interface WatchedListsSyncStats {
seriesChecked: number;
authorsChecked: number;
booksFound: number;
requestsCreated: number;
skippedOwned: number;
skippedExisting: number;
errors: number;
}
export interface WatchedListsSyncOptions {
/** Process only this specific user (for targeted sync) */
userId?: string;
/** Process only this specific series (for immediate sync on watch) */
seriesAsin?: string;
/** Process only this specific author (for immediate sync on watch) */
authorAsin?: string;
}
/**
* Process all watched series and authors: scrape for new releases,
* deduplicate, check library ownership, and create requests.
* Called from the check_watched_lists processor.
*/
export async function processWatchedLists(
jobLogger?: ReturnType<typeof RMABLogger.forJob>,
options: WatchedListsSyncOptions = {}
): Promise<WatchedListsSyncStats> {
const log = jobLogger || logger;
const stats: WatchedListsSyncStats = {
seriesChecked: 0,
authorsChecked: 0,
booksFound: 0,
requestsCreated: 0,
skippedOwned: 0,
skippedExisting: 0,
errors: 0,
};
// ---- Watched Series ----
await processAllWatchedSeries(log, stats, options);
// ---- Watched Authors ----
await processAllWatchedAuthors(log, stats, options);
log.info('Watched lists sync complete', {
seriesChecked: stats.seriesChecked,
authorsChecked: stats.authorsChecked,
booksFound: stats.booksFound,
requestsCreated: stats.requestsCreated,
skippedOwned: stats.skippedOwned,
skippedExisting: stats.skippedExisting,
errors: stats.errors,
});
return stats;
}
// ---------------------------------------------------------------------------
// Watched Series
// ---------------------------------------------------------------------------
async function processAllWatchedSeries(
log: ReturnType<typeof RMABLogger.forJob> | ReturnType<typeof RMABLogger.create>,
stats: WatchedListsSyncStats,
options: WatchedListsSyncOptions
): Promise<void> {
const whereClause: any = {};
if (options.userId) whereClause.userId = options.userId;
if (options.seriesAsin) whereClause.seriesAsin = options.seriesAsin;
const watchedSeries = await prisma.watchedSeries.findMany({
where: whereClause,
include: { user: { select: { id: true, plexUsername: true } } },
});
if (watchedSeries.length === 0) {
log.info('No watched series to process');
return;
}
// Group by seriesAsin to avoid re-scraping the same series for multiple users
const seriesByAsin = new Map<string, typeof watchedSeries>();
for (const ws of watchedSeries) {
const list = seriesByAsin.get(ws.seriesAsin) || [];
list.push(ws);
seriesByAsin.set(ws.seriesAsin, list);
}
log.info(`Processing ${seriesByAsin.size} unique watched series (${watchedSeries.length} total subscriptions)`);
for (const [seriesAsin, subscriptions] of seriesByAsin) {
try {
await processSeriesForUsers(seriesAsin, subscriptions, log, stats);
} catch (error) {
stats.errors++;
log.error(`Failed to process watched series ${seriesAsin}`, {
error: error instanceof Error ? error.message : String(error),
});
}
// Rate limit between series
await delay(SCRAPE_DELAY_MS);
}
}
async function processSeriesForUsers(
seriesAsin: string,
subscriptions: Array<{ id: string; seriesTitle: string; user: { id: string; plexUsername: string } }>,
log: ReturnType<typeof RMABLogger.forJob> | ReturnType<typeof RMABLogger.create>,
stats: WatchedListsSyncStats
): Promise<void> {
const title = subscriptions[0].seriesTitle;
log.info(`Scraping watched series: "${title}" (${seriesAsin})`);
// Scrape all pages of the series (up to MAX_BOOKS_PER_SERIES)
const allBooks: AudibleAudiobook[] = [];
let page = 1;
let hasMore = true;
while (hasMore && allBooks.length < MAX_BOOKS_PER_SERIES) {
const result = await scrapeSeriesPage(seriesAsin, page);
if (!result || result.books.length === 0) break;
allBooks.push(...result.books);
hasMore = result.hasMore;
page++;
if (hasMore) await delay(1000);
}
if (allBooks.length === 0) {
log.info(`No books found for series "${title}"`);
stats.seriesChecked++;
return;
}
stats.booksFound += allBooks.length;
// Deduplicate
const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(allBooks);
// Persist dedup groups (fire-and-forget)
if (groups.length > 0) {
persistDedupGroups(groups).catch(() => {});
}
// For each user watching this series, create requests for new books
for (const subscription of subscriptions) {
await createRequestsForUser(
subscription.user.id,
subscription.user.plexUsername,
dedupedBooks,
log,
stats
);
// Update lastCheckedAt
await prisma.watchedSeries.update({
where: { id: subscription.id },
data: { lastCheckedAt: new Date() },
}).catch(() => {});
}
stats.seriesChecked++;
}
// ---------------------------------------------------------------------------
// Watched Authors
// ---------------------------------------------------------------------------
async function processAllWatchedAuthors(
log: ReturnType<typeof RMABLogger.forJob> | ReturnType<typeof RMABLogger.create>,
stats: WatchedListsSyncStats,
options: WatchedListsSyncOptions
): Promise<void> {
const whereClause: any = {};
if (options.userId) whereClause.userId = options.userId;
if (options.authorAsin) whereClause.authorAsin = options.authorAsin;
const watchedAuthors = await prisma.watchedAuthor.findMany({
where: whereClause,
include: { user: { select: { id: true, plexUsername: true } } },
});
if (watchedAuthors.length === 0) {
log.info('No watched authors to process');
return;
}
// Group by authorAsin to avoid re-scraping the same author for multiple users
const authorsByAsin = new Map<string, typeof watchedAuthors>();
for (const wa of watchedAuthors) {
const list = authorsByAsin.get(wa.authorAsin) || [];
list.push(wa);
authorsByAsin.set(wa.authorAsin, list);
}
log.info(`Processing ${authorsByAsin.size} unique watched authors (${watchedAuthors.length} total subscriptions)`);
for (const [authorAsin, subscriptions] of authorsByAsin) {
try {
await processAuthorForUsers(authorAsin, subscriptions, log, stats);
} catch (error) {
stats.errors++;
log.error(`Failed to process watched author ${authorAsin}`, {
error: error instanceof Error ? error.message : String(error),
});
}
// Rate limit between authors
await delay(SCRAPE_DELAY_MS);
}
}
async function processAuthorForUsers(
authorAsin: string,
subscriptions: Array<{ id: string; authorName: string; user: { id: string; plexUsername: string } }>,
log: ReturnType<typeof RMABLogger.forJob> | ReturnType<typeof RMABLogger.create>,
stats: WatchedListsSyncStats
): Promise<void> {
const authorName = subscriptions[0].authorName;
log.info(`Scraping watched author: "${authorName}" (${authorAsin})`);
const audibleService = getAudibleService();
const allBooks: AudibleAudiobook[] = [];
let page = 1;
let hasMore = true;
while (hasMore && page <= MAX_AUTHOR_PAGES) {
try {
const result = await audibleService.searchByAuthorAsin(authorName, authorAsin, page);
if (result.books.length === 0) break;
allBooks.push(...result.books);
hasMore = result.hasMore;
page++;
if (hasMore) await delay(1000);
} catch (error) {
log.error(`Failed to scrape author page ${page} for "${authorName}"`, {
error: error instanceof Error ? error.message : String(error),
});
break;
}
}
if (allBooks.length === 0) {
log.info(`No books found for author "${authorName}"`);
stats.authorsChecked++;
return;
}
stats.booksFound += allBooks.length;
// Deduplicate
const { books: dedupedBooks, groups } = deduplicateAndCollectGroups(allBooks);
// Persist dedup groups (fire-and-forget)
if (groups.length > 0) {
persistDedupGroups(groups).catch(() => {});
}
// For each user watching this author, create requests for new books
for (const subscription of subscriptions) {
await createRequestsForUser(
subscription.user.id,
subscription.user.plexUsername,
dedupedBooks,
log,
stats
);
// Update lastCheckedAt
await prisma.watchedAuthor.update({
where: { id: subscription.id },
data: { lastCheckedAt: new Date() },
}).catch(() => {});
}
stats.authorsChecked++;
}
// ---------------------------------------------------------------------------
// Shared: Create requests for a user from a list of books
// ---------------------------------------------------------------------------
async function createRequestsForUser(
userId: string,
username: string,
books: AudibleAudiobook[],
log: ReturnType<typeof RMABLogger.forJob> | ReturnType<typeof RMABLogger.create>,
stats: WatchedListsSyncStats
): Promise<void> {
// Filter to books that have an ASIN
const booksWithAsin = books.filter(b => b.asin);
if (booksWithAsin.length === 0) return;
// Batch check: which ASINs are already in library (direct + sibling expansion)
const ownedAsins = await getOwnedAsins(booksWithAsin.map(b => b.asin));
for (const book of booksWithAsin) {
// Skip if user already owns this (direct or via sibling ASIN)
if (ownedAsins.has(book.asin)) {
stats.skippedOwned++;
continue;
}
try {
const result = await createRequestForUser(userId, {
asin: book.asin,
title: book.title,
author: book.author,
narrator: book.narrator,
description: book.description,
coverArtUrl: book.coverArtUrl,
});
if (result.success) {
stats.requestsCreated++;
log.info(`Auto-requested "${book.title}" by ${book.author} for ${username}`);
} else {
// already_available, being_processed, duplicate — all expected
stats.skippedExisting++;
}
} catch (error) {
log.error(`Failed to create request for "${book.title}" for ${username}`, {
error: error instanceof Error ? error.message : String(error),
});
}
}
}
/**
* Get the set of ASINs that are already in the library (direct match + sibling expansion).
*/
async function getOwnedAsins(asins: string[]): Promise<Set<string>> {
const owned = new Set<string>();
// Direct library lookup
const libraryItems = await prisma.plexLibrary.findMany({
where: { asin: { in: asins } },
select: { asin: true },
});
for (const item of libraryItems) {
if (item.asin) owned.add(item.asin);
}
// Sibling expansion via works table
try {
const siblingMap = await getSiblingAsins(asins);
if (siblingMap.size > 0) {
const allSiblings = new Set<string>();
for (const siblings of siblingMap.values()) {
for (const s of siblings) allSiblings.add(s);
}
if (allSiblings.size > 0) {
const siblingLibrary = await prisma.plexLibrary.findMany({
where: { asin: { in: [...allSiblings] } },
select: { asin: true },
});
for (const item of siblingLibrary) {
if (item.asin) {
// Mark the original ASIN as owned (not the sibling)
for (const [originalAsin, siblings] of siblingMap) {
if (siblings.includes(item.asin)) {
owned.add(originalAsin);
}
}
}
}
}
}
} catch {
// Works table expansion is best-effort
}
return owned;
}
function delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
+248
View File
@@ -0,0 +1,248 @@
/**
* Component: Works Service
* Documentation: documentation/integrations/audible.md
*
* Manages the works table — persistent cross-ASIN audiobook identity mapping.
* Layer 1: Auto-populated from dedup logic when users browse search/author/series pages.
* Layer 2: Seeded at request time to ensure requested ASINs are tracked.
*/
import { prisma } from '@/lib/db';
import { RMABLogger } from '@/lib/utils/logger';
import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
const logger = RMABLogger.create('WorksService');
// ---------------------------------------------------------------------------
// Layer 1: Persist dedup groups (fire-and-forget from API routes)
// ---------------------------------------------------------------------------
/**
* Persist dedup groups to the works table. For each group of 2+ ASINs that
* were identified as the same audiobook, create or update a Work record
* linking all ASINs together.
*
* Safe to call fire-and-forget — never throws.
*/
export async function persistDedupGroups(groups: DedupGroup[]): Promise<void> {
try {
for (const group of groups) {
await persistSingleGroup(group);
}
} catch (error) {
logger.error('Failed to persist dedup groups', {
error: error instanceof Error ? error.message : String(error),
groupCount: groups.length,
});
}
}
/**
* Persist a single dedup group. Handles merging when ASINs span multiple
* existing works.
*/
async function persistSingleGroup(group: DedupGroup): Promise<void> {
const { canonicalAsin, allAsins, title, author, narrator, durationMinutes } = group;
// Find which of these ASINs already exist in work_asins
const existingEntries = await prisma.workAsin.findMany({
where: { asin: { in: allAsins } },
select: { asin: true, workId: true },
});
// Collect unique work IDs that already contain any of our ASINs
const existingWorkIds = [...new Set(existingEntries.map(e => e.workId))];
const existingAsinSet = new Set(existingEntries.map(e => e.asin));
if (existingWorkIds.length === 0) {
// No existing works — create a new one with all ASINs
const work = await prisma.work.create({
data: { title, author },
});
await Promise.all(
allAsins.map(asin =>
prisma.workAsin.create({
data: {
workId: work.id,
asin,
narrator: asin === canonicalAsin ? narrator : undefined,
durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
isCanonical: asin === canonicalAsin,
source: 'dedup_auto',
},
})
)
);
logger.debug('Created new work', { workId: work.id, asinCount: allAsins.length });
} else {
// Use the first existing work as the target
const targetWorkId = existingWorkIds[0];
// If multiple existing works, merge them into the target
if (existingWorkIds.length > 1) {
const mergeWorkIds = existingWorkIds.slice(1);
// Move all ASINs from other works to the target
await prisma.workAsin.updateMany({
where: { workId: { in: mergeWorkIds } },
data: { workId: targetWorkId },
});
// Delete the now-empty works
await prisma.work.deleteMany({
where: { id: { in: mergeWorkIds } },
});
logger.debug('Merged works', {
targetWorkId,
mergedWorkIds: mergeWorkIds,
});
}
// Add any new ASINs that don't already exist
const newAsins = allAsins.filter(a => !existingAsinSet.has(a));
if (newAsins.length > 0) {
await Promise.all(
newAsins.map(asin =>
prisma.workAsin.create({
data: {
workId: targetWorkId,
asin,
narrator: asin === canonicalAsin ? narrator : undefined,
durationMinutes: asin === canonicalAsin ? durationMinutes : undefined,
isCanonical: asin === canonicalAsin,
source: 'dedup_auto',
},
})
)
);
logger.debug('Added ASINs to existing work', {
workId: targetWorkId,
newAsinCount: newAsins.length,
});
}
// Update canonical status: ensure the canonical ASIN is marked
await prisma.workAsin.updateMany({
where: { workId: targetWorkId, asin: canonicalAsin },
data: { isCanonical: true },
});
}
}
// ---------------------------------------------------------------------------
// Layer 2: Seed ASIN at request time
// ---------------------------------------------------------------------------
/**
* Ensure an ASIN is tracked in the works table. Creates a single-ASIN work
* if the ASIN isn't already present. Called at request creation time.
*
* Safe to call fire-and-forget — never throws.
*/
export async function seedAsin(
asin: string,
title: string,
author: string,
narrator?: string,
durationMinutes?: number
): Promise<void> {
try {
// Check if ASIN already tracked
const existing = await prisma.workAsin.findUnique({
where: { asin },
});
if (existing) return;
// Create a new single-ASIN work
const work = await prisma.work.create({
data: { title, author },
});
await prisma.workAsin.create({
data: {
workId: work.id,
asin,
narrator,
durationMinutes,
isCanonical: true,
source: 'dedup_auto',
},
});
logger.debug('Seeded ASIN', { workId: work.id, asin });
} catch (error) {
logger.error('Failed to seed ASIN', {
error: error instanceof Error ? error.message : String(error),
asin,
});
}
}
// ---------------------------------------------------------------------------
// Sibling ASIN lookup (for library matching expansion)
// ---------------------------------------------------------------------------
/**
* Given a list of ASINs, return a map of each input ASIN to its sibling ASINs
* (other ASINs in the same work, NOT including the input ASIN itself).
*
* ASINs not found in the works table are simply omitted from the result.
*/
export async function getSiblingAsins(
asins: string[]
): Promise<Map<string, string[]>> {
const result = new Map<string, string[]>();
if (asins.length === 0) return result;
// Step 1: Find which input ASINs are in work_asins and their work IDs
const inputEntries = await prisma.workAsin.findMany({
where: { asin: { in: asins } },
select: { asin: true, workId: true },
});
if (inputEntries.length === 0) return result;
// Build map of workId -> input ASINs in that work
const workIdToInputAsins = new Map<string, string[]>();
for (const entry of inputEntries) {
const list = workIdToInputAsins.get(entry.workId);
if (list) {
list.push(entry.asin);
} else {
workIdToInputAsins.set(entry.workId, [entry.asin]);
}
}
// Step 2: Get ALL ASINs in those works
const workIds = [...workIdToInputAsins.keys()];
const allWorkAsins = await prisma.workAsin.findMany({
where: { workId: { in: workIds } },
select: { asin: true, workId: true },
});
// Build map of workId -> all ASINs
const workIdToAllAsins = new Map<string, string[]>();
for (const entry of allWorkAsins) {
const list = workIdToAllAsins.get(entry.workId);
if (list) {
list.push(entry.asin);
} else {
workIdToAllAsins.set(entry.workId, [entry.asin]);
}
}
// Step 3: For each input ASIN, compute siblings (all ASINs in same work minus self)
for (const entry of inputEntries) {
const allInWork = workIdToAllAsins.get(entry.workId) || [];
const siblings = allInWork.filter(a => a !== entry.asin);
if (siblings.length > 0) {
result.set(entry.asin, siblings);
}
}
return result;
}
+107
View File
@@ -8,6 +8,7 @@
import { prisma } from '@/lib/db';
import { LibraryItem } from '@/lib/services/library';
import { getSiblingAsins } from '@/lib/services/works.service';
import { RMABLogger } from './logger';
// Module-level logger
@@ -178,6 +179,61 @@ export async function enrichAudiobooksWithMatches(
}
}
// Works-table sibling expansion: check if unmatched ASINs have siblings in the library
try {
const unmatchedAsins = results.filter(r => !r.isAvailable).map(r => r.asin);
if (unmatchedAsins.length > 0) {
const siblingMap = await getSiblingAsins(unmatchedAsins);
if (siblingMap.size > 0) {
// Collect all sibling ASINs for a single batch library query
const allSiblingAsins = new Set<string>();
for (const siblings of siblingMap.values()) {
for (const s of siblings) allSiblingAsins.add(s);
}
if (allSiblingAsins.size > 0) {
const siblingLibraryMatches = await prisma.plexLibrary.findMany({
where: { asin: { in: [...allSiblingAsins] } },
select: { asin: true, plexGuid: true },
});
const libraryAsinSet = new Set(
siblingLibraryMatches.filter(m => m.asin).map(m => m.asin!.toLowerCase())
);
// Update results where a sibling ASIN is found in the library
for (const result of results) {
if (result.isAvailable) continue;
const siblings = siblingMap.get(result.asin);
if (!siblings) continue;
const matchedSiblingAsin = siblings.find(s => libraryAsinSet.has(s.toLowerCase()));
if (matchedSiblingAsin) {
const libMatch = siblingLibraryMatches.find(
m => m.asin?.toLowerCase() === matchedSiblingAsin.toLowerCase()
);
(result as any).isAvailable = true;
(result as any).plexGuid = libMatch?.plexGuid || null;
}
}
const siblingMatchCount = results.filter(r => {
if (!r.isAvailable) return false;
return siblingMap.has(r.asin);
}).length;
logger.debug('Sibling expansion', {
unmatchedCount: unmatchedAsins.length,
siblingGroupsFound: siblingMap.size,
siblingMatches: siblingMatchCount,
});
}
}
}
} catch (error) {
// Works table expansion is best-effort — direct matches still work
logger.error('Sibling ASIN expansion failed', {
error: error instanceof Error ? error.message : String(error),
});
}
// Always enrich with request status (check ANY user's requests)
const asins = audiobooks.map(book => book.asin);
@@ -272,6 +328,57 @@ export async function enrichAudiobooksWithMatches(
return results;
}
/**
* Get all ASINs that are considered "available" — present in library or have completed requests.
* Used by paginated API routes to exclude available items at the DB level.
*/
export async function getAvailableAsins(): Promise<Set<string>> {
const [libraryItems, completedRequests] = await Promise.all([
// ASINs present in the library (Plex or Audiobookshelf)
prisma.plexLibrary.findMany({
where: { asin: { not: null } },
select: { asin: true },
distinct: ['asin'],
}),
// ASINs with completed audiobook requests
prisma.audiobook.findMany({
where: {
audibleAsin: { not: null },
requests: {
some: {
status: 'completed',
type: 'audiobook',
deletedAt: null,
},
},
},
select: { audibleAsin: true },
}),
]);
const asins = new Set<string>();
for (const item of libraryItems) {
if (item.asin) asins.add(item.asin);
}
for (const item of completedRequests) {
if (item.audibleAsin) asins.add(item.audibleAsin);
}
// Expand with works-table sibling ASINs
try {
if (asins.size > 0) {
const siblingMap = await getSiblingAsins([...asins]);
for (const siblings of siblingMap.values()) {
for (const s of siblings) asins.add(s);
}
}
} catch {
// Works table expansion is best-effort
}
return asins;
}
/**
* Normalize ISBN for comparison (remove dashes and spaces)
*/
+203
View File
@@ -0,0 +1,203 @@
/**
* Component: Audiobook Deduplication Utility
* Documentation: documentation/integrations/audible.md
*
* Deduplicates audiobook listings that represent the same recording
* under different ASINs (publisher re-listings, rights transfers, etc.).
*
* Dedup key: normalized title + normalized narrator
* Duration tolerance: max(longerDuration * 0.01, 5) minutes
* Missing duration treated as compatible (graceful degradation).
*/
import type { AudibleAudiobook } from '../integrations/audible.service';
// ---------------------------------------------------------------------------
// Title / narrator normalization
// ---------------------------------------------------------------------------
/** Patterns in parentheses or brackets to strip (edition markers, format labels) */
const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi;
/** Trailing subtitle after colon or long dash */
const SUBTITLE_RE = /\s*[:]\s+.+$/;
const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/;
/** Trailing descriptors like "A Novel", "A Memoir" */
const TRAILING_DESCRIPTOR_RE = /\s*[-:,]?\s+a\s+(novel|memoir|thriller|mystery|romance|story|tale|novella)\s*$/i;
/**
* Normalize a title for dedup comparison.
* Strips subtitles, edition markers, and trailing descriptors.
*/
export function normalizeTitle(title: string): string {
let t = title.toLowerCase();
// Remove parenthesized/bracketed edition markers
t = t.replace(EDITION_PAREN_RE, '');
// Remove trailing descriptors before subtitle stripping
t = t.replace(TRAILING_DESCRIPTOR_RE, '');
// Remove subtitle after colon
t = t.replace(SUBTITLE_RE, '');
// Remove subtitle after long dash (but not short hyphenated words)
t = t.replace(LONG_DASH_SUBTITLE_RE, '');
// Collapse whitespace and trim
return t.replace(/\s+/g, ' ').trim();
}
/** Normalize narrator for comparison. Sorts individual names so order doesn't matter. */
function normalizeNarrator(narrator?: string): string {
const raw = (narrator || '').toLowerCase().trim();
if (!raw) return raw;
return raw.split(',').map(n => n.trim()).filter(Boolean).sort().join(', ');
}
// ---------------------------------------------------------------------------
// Duration compatibility
// ---------------------------------------------------------------------------
/**
* Check if two durations are compatible (represent the same recording).
* Tolerance: max(longerDuration * 0.01, 5) minutes.
* Missing duration on either side is treated as compatible.
*/
export function areDurationsCompatible(a?: number, b?: number): boolean {
if (a == null || b == null) return true;
const longer = Math.max(a, b);
const tolerance = Math.max(longer * 0.01, 5);
return Math.abs(a - b) <= tolerance;
}
// ---------------------------------------------------------------------------
// Metadata scoring (for picking best representative)
// ---------------------------------------------------------------------------
function metadataScore(book: AudibleAudiobook): number {
let score = 0;
if (book.coverArtUrl) score++;
if (book.rating != null) score++;
if (book.durationMinutes != null) score++;
if (book.description) score++;
if (book.narrator) score++;
if (book.releaseDate) score++;
if (book.genres && book.genres.length > 0) score++;
return score;
}
// ---------------------------------------------------------------------------
// Dedup group types (for works-table persistence)
// ---------------------------------------------------------------------------
/** Metadata about a group of ASINs that were collapsed during dedup. */
export interface DedupGroup {
canonicalAsin: string; // ASIN of the "winner" (best metadata score)
allAsins: string[]; // All ASINs in this group (including canonical)
title: string; // Author from the canonical entry
author: string; // Author from the canonical entry
narrator?: string; // Narrator from the canonical entry
durationMinutes?: number; // Duration from the canonical entry
}
/** Result of deduplication with group collection. */
export interface DeduplicateResult {
books: AudibleAudiobook[]; // The deduped list (same as deduplicateAudiobooks returns)
groups: DedupGroup[]; // Groups where 2+ ASINs were collapsed
}
// ---------------------------------------------------------------------------
// Main dedup functions
// ---------------------------------------------------------------------------
/**
* Deduplicate audiobook listings by normalized title + narrator + duration.
*
* Same narrator + compatible duration + similar title = same recording -> collapse.
* Different narrator = different production -> keep both.
* Duration outside tolerance = different content (abridged vs unabridged) -> keep both.
*
* Preserves original ordering (position of first appearance).
*/
export function deduplicateAudiobooks(books: AudibleAudiobook[]): AudibleAudiobook[] {
return deduplicateAndCollectGroups(books).books;
}
/**
* Deduplicate audiobooks AND return grouping metadata for works-table persistence.
* Returns both the deduped list and the groups where 2+ ASINs were collapsed.
*/
export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): DeduplicateResult {
if (books.length <= 1) return { books: [...books], groups: [] };
// Group by normalized title + narrator
const titleNarratorGroups = new Map<string, AudibleAudiobook[]>();
const insertionOrder: string[] = [];
for (const book of books) {
const key = `${normalizeTitle(book.title)}|||${normalizeNarrator(book.narrator)}`;
const group = titleNarratorGroups.get(key);
if (group) {
group.push(book);
} else {
titleNarratorGroups.set(key, [book]);
insertionOrder.push(key);
}
}
const result: AudibleAudiobook[] = [];
const dedupGroups: DedupGroup[] = [];
for (const key of insertionOrder) {
const group = titleNarratorGroups.get(key)!;
if (group.length === 1) {
result.push(group[0]);
continue;
}
// Within a title+narrator group, further split by duration compatibility.
// Build sub-groups where all members are duration-compatible with the
// representative (first member). A book joins the first compatible sub-group.
const subGroups: AudibleAudiobook[][] = [];
for (const book of group) {
let placed = false;
for (const sg of subGroups) {
// Check compatibility against the representative (first member)
if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) {
sg.push(book);
placed = true;
break;
}
}
if (!placed) {
subGroups.push([book]);
}
}
// From each sub-group, pick the best representative and collect group metadata
for (const sg of subGroups) {
let best = sg[0];
let bestScore = metadataScore(best);
for (let i = 1; i < sg.length; i++) {
const score = metadataScore(sg[i]);
if (score > bestScore) {
best = sg[i];
bestScore = score;
}
}
result.push(best);
// Collect group metadata for works-table persistence (only multi-ASIN groups)
if (sg.length >= 2) {
dedupGroups.push({
canonicalAsin: best.asin,
allAsins: sg.map(b => b.asin),
title: best.title,
author: best.author,
narrator: best.narrator,
durationMinutes: best.durationMinutes,
});
}
}
}
return { books: result, groups: dedupGroups };
}
+44
View File
@@ -0,0 +1,44 @@
/**
* Component: Runtime Parsing Utility
* Documentation: documentation/integrations/audible.md
*
* Shared runtime/duration text parser extracted from AudibleService.
* Handles all i18n patterns (English, German, Spanish, French) via
* language-specific regex patterns in LanguageConfig.
*/
import type { LanguageConfig } from '../constants/language-config';
/**
* Parse runtime text (e.g. "12 hrs and 30 mins", "5 Std. 20 Min.")
* into total minutes using language-specific patterns.
*
* @param runtimeText - Raw runtime string from Audible HTML
* @param langConfig - Language configuration with hour/minute regex patterns
* @returns Total minutes, or undefined if no duration could be parsed
*/
export function parseRuntime(runtimeText: string, langConfig: LanguageConfig): number | undefined {
if (!runtimeText) return undefined;
let totalMinutes = 0;
// Try each hour pattern until one matches
for (const pattern of langConfig.scraping.runtimeHourPatterns) {
const match = runtimeText.match(pattern);
if (match) {
totalMinutes += parseInt(match[1]) * 60;
break;
}
}
// Try each minute pattern until one matches
for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
const match = runtimeText.match(pattern);
if (match) {
totalMinutes += parseInt(match[1]);
break;
}
}
return totalMinutes > 0 ? totalMinutes : undefined;
}