mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-02 20:30:10 +00:00
Add works table and ASIN deduping
Add persistent cross-ASIN "works" mapping and client-side deduplication to improve library matching. Introduces a Prisma migration and models (Work, WorkAsin) plus src/lib/services/works.service for persisting dedup groups, seeding ASINs at request time, and sibling lookup. Adds a deduplication utility (deduplicate-audiobooks) that normalizes titles/narrators, compares durations, and returns grouping metadata; API routes (search, author, series) now deduplicate results before enrichment and fire-and-forget persist groups. Adds sibling-ASIN expansion into audiobook matcher and expands getAvailableAsins accordingly. Extracts runtime parsing into a shared parse-runtime util and updates audible scrapers/services to use it. Includes unit tests for dedup logic and works service and updates test Prisma mocks.
This commit is contained in:
@@ -47,6 +47,8 @@ export const createPrismaMock = () => ({
|
||||
bookDateSwipe: createModelMock(),
|
||||
goodreadsShelf: createModelMock(),
|
||||
goodreadsBookMapping: createModelMock(),
|
||||
work: createModelMock(),
|
||||
workAsin: createModelMock(),
|
||||
$queryRaw: vi.fn(),
|
||||
$disconnect: vi.fn(),
|
||||
});
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
/**
|
||||
* Component: Works Service Tests
|
||||
* Documentation: documentation/integrations/audible.md
|
||||
*/
|
||||
|
||||
import { beforeEach, describe, expect, it, vi } from 'vitest';
|
||||
import { createPrismaMock } from '../helpers/prisma';
|
||||
import type { DedupGroup } from '@/lib/utils/deduplicate-audiobooks';
|
||||
|
||||
const prismaMock = createPrismaMock();
|
||||
|
||||
vi.mock('@/lib/db', () => ({
|
||||
prisma: prismaMock,
|
||||
}));
|
||||
|
||||
vi.mock('@/lib/utils/logger', () => ({
|
||||
RMABLogger: {
|
||||
create: () => ({
|
||||
debug: vi.fn(),
|
||||
info: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
describe('persistDedupGroups', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
it('creates new work + work_asins for a fresh group', async () => {
|
||||
prismaMock.workAsin.findMany.mockResolvedValue([]);
|
||||
prismaMock.work.create.mockResolvedValue({ id: 'work-1' });
|
||||
prismaMock.workAsin.create.mockResolvedValue({});
|
||||
prismaMock.workAsin.updateMany.mockResolvedValue({ count: 0 });
|
||||
|
||||
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||
|
||||
const groups: DedupGroup[] = [{
|
||||
canonicalAsin: 'ASIN_A',
|
||||
allAsins: ['ASIN_A', 'ASIN_B'],
|
||||
title: 'Test Book',
|
||||
author: 'Test Author',
|
||||
narrator: 'Test Narrator',
|
||||
durationMinutes: 600,
|
||||
}];
|
||||
|
||||
await persistDedupGroups(groups);
|
||||
|
||||
expect(prismaMock.work.create).toHaveBeenCalledWith({
|
||||
data: { title: 'Test Book', author: 'Test Author' },
|
||||
});
|
||||
expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
|
||||
|
||||
// Canonical ASIN should have narrator, duration, isCanonical=true
|
||||
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||
data: expect.objectContaining({
|
||||
workId: 'work-1',
|
||||
asin: 'ASIN_A',
|
||||
narrator: 'Test Narrator',
|
||||
durationMinutes: 600,
|
||||
isCanonical: true,
|
||||
source: 'dedup_auto',
|
||||
}),
|
||||
});
|
||||
|
||||
// Non-canonical ASIN should have isCanonical=false
|
||||
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||
data: expect.objectContaining({
|
||||
workId: 'work-1',
|
||||
asin: 'ASIN_B',
|
||||
isCanonical: false,
|
||||
source: 'dedup_auto',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it('adds new ASINs to existing work when canonical already exists', async () => {
|
||||
prismaMock.workAsin.findMany.mockResolvedValue([
|
||||
{ asin: 'ASIN_A', workId: 'existing-work' },
|
||||
]);
|
||||
prismaMock.workAsin.create.mockResolvedValue({});
|
||||
prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
|
||||
|
||||
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||
|
||||
const groups: DedupGroup[] = [{
|
||||
canonicalAsin: 'ASIN_A',
|
||||
allAsins: ['ASIN_A', 'ASIN_B', 'ASIN_C'],
|
||||
title: 'Test Book',
|
||||
author: 'Test Author',
|
||||
narrator: 'Narrator',
|
||||
durationMinutes: 500,
|
||||
}];
|
||||
|
||||
await persistDedupGroups(groups);
|
||||
|
||||
// Should NOT create a new work
|
||||
expect(prismaMock.work.create).not.toHaveBeenCalled();
|
||||
|
||||
// Should create entries for ASIN_B and ASIN_C only (ASIN_A already exists)
|
||||
expect(prismaMock.workAsin.create).toHaveBeenCalledTimes(2);
|
||||
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||
data: expect.objectContaining({
|
||||
workId: 'existing-work',
|
||||
asin: 'ASIN_B',
|
||||
}),
|
||||
});
|
||||
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||
data: expect.objectContaining({
|
||||
workId: 'existing-work',
|
||||
asin: 'ASIN_C',
|
||||
}),
|
||||
});
|
||||
});
|
||||
|
||||
it('merges two separate works when dedup groups them together', async () => {
|
||||
// ASIN_A is in work-1, ASIN_B is in work-2
|
||||
prismaMock.workAsin.findMany.mockResolvedValue([
|
||||
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||
{ asin: 'ASIN_B', workId: 'work-2' },
|
||||
]);
|
||||
prismaMock.workAsin.updateMany.mockResolvedValue({ count: 1 });
|
||||
prismaMock.work.deleteMany.mockResolvedValue({ count: 1 });
|
||||
|
||||
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||
|
||||
const groups: DedupGroup[] = [{
|
||||
canonicalAsin: 'ASIN_A',
|
||||
allAsins: ['ASIN_A', 'ASIN_B'],
|
||||
title: 'Merged Book',
|
||||
author: 'Author',
|
||||
}];
|
||||
|
||||
await persistDedupGroups(groups);
|
||||
|
||||
// Should move work-2 ASINs to work-1
|
||||
expect(prismaMock.workAsin.updateMany).toHaveBeenCalledWith({
|
||||
where: { workId: { in: ['work-2'] } },
|
||||
data: { workId: 'work-1' },
|
||||
});
|
||||
|
||||
// Should delete work-2
|
||||
expect(prismaMock.work.deleteMany).toHaveBeenCalledWith({
|
||||
where: { id: { in: ['work-2'] } },
|
||||
});
|
||||
});
|
||||
|
||||
it('silently catches and logs errors without throwing', async () => {
|
||||
prismaMock.workAsin.findMany.mockRejectedValue(new Error('DB connection failed'));
|
||||
|
||||
const { persistDedupGroups } = await import('@/lib/services/works.service');
|
||||
|
||||
const groups: DedupGroup[] = [{
|
||||
canonicalAsin: 'ASIN_A',
|
||||
allAsins: ['ASIN_A', 'ASIN_B'],
|
||||
title: 'Test',
|
||||
author: 'Auth',
|
||||
}];
|
||||
|
||||
// Should not throw
|
||||
await expect(persistDedupGroups(groups)).resolves.toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('seedAsin', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
it('creates single-ASIN work for new ASIN', async () => {
|
||||
prismaMock.workAsin.findUnique.mockResolvedValue(null);
|
||||
prismaMock.work.create.mockResolvedValue({ id: 'new-work' });
|
||||
prismaMock.workAsin.create.mockResolvedValue({});
|
||||
|
||||
const { seedAsin } = await import('@/lib/services/works.service');
|
||||
|
||||
await seedAsin('NEW_ASIN', 'New Book', 'Author', 'Narrator', 300);
|
||||
|
||||
expect(prismaMock.work.create).toHaveBeenCalledWith({
|
||||
data: { title: 'New Book', author: 'Author' },
|
||||
});
|
||||
expect(prismaMock.workAsin.create).toHaveBeenCalledWith({
|
||||
data: {
|
||||
workId: 'new-work',
|
||||
asin: 'NEW_ASIN',
|
||||
narrator: 'Narrator',
|
||||
durationMinutes: 300,
|
||||
isCanonical: true,
|
||||
source: 'dedup_auto',
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('does nothing for already-tracked ASIN', async () => {
|
||||
prismaMock.workAsin.findUnique.mockResolvedValue({
|
||||
id: 'existing',
|
||||
asin: 'EXISTING_ASIN',
|
||||
workId: 'work-1',
|
||||
});
|
||||
|
||||
const { seedAsin } = await import('@/lib/services/works.service');
|
||||
|
||||
await seedAsin('EXISTING_ASIN', 'Book', 'Author');
|
||||
|
||||
expect(prismaMock.work.create).not.toHaveBeenCalled();
|
||||
expect(prismaMock.workAsin.create).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('silently catches and logs errors without throwing', async () => {
|
||||
prismaMock.workAsin.findUnique.mockRejectedValue(new Error('DB error'));
|
||||
|
||||
const { seedAsin } = await import('@/lib/services/works.service');
|
||||
|
||||
await expect(seedAsin('ASIN', 'Book', 'Auth')).resolves.toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getSiblingAsins', () => {
|
||||
beforeEach(() => {
|
||||
vi.clearAllMocks();
|
||||
vi.resetModules();
|
||||
});
|
||||
|
||||
it('returns sibling ASINs correctly', async () => {
|
||||
// First query: find input ASINs and their work IDs
|
||||
prismaMock.workAsin.findMany
|
||||
.mockResolvedValueOnce([
|
||||
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||
{ asin: 'ASIN_C', workId: 'work-2' },
|
||||
])
|
||||
// Second query: all ASINs in those works
|
||||
.mockResolvedValueOnce([
|
||||
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||
{ asin: 'ASIN_B', workId: 'work-1' },
|
||||
{ asin: 'ASIN_C', workId: 'work-2' },
|
||||
{ asin: 'ASIN_D', workId: 'work-2' },
|
||||
{ asin: 'ASIN_E', workId: 'work-2' },
|
||||
]);
|
||||
|
||||
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||
|
||||
const result = await getSiblingAsins(['ASIN_A', 'ASIN_C']);
|
||||
|
||||
expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
|
||||
expect(result.get('ASIN_C')).toEqual(['ASIN_D', 'ASIN_E']);
|
||||
});
|
||||
|
||||
it('returns empty map for unknown ASINs', async () => {
|
||||
prismaMock.workAsin.findMany.mockResolvedValue([]);
|
||||
|
||||
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||
|
||||
const result = await getSiblingAsins(['UNKNOWN']);
|
||||
|
||||
expect(result.size).toBe(0);
|
||||
});
|
||||
|
||||
it('returns empty map for empty input', async () => {
|
||||
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||
|
||||
const result = await getSiblingAsins([]);
|
||||
|
||||
expect(result.size).toBe(0);
|
||||
// Should not query DB
|
||||
expect(prismaMock.workAsin.findMany).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('excludes the input ASIN itself from siblings', async () => {
|
||||
prismaMock.workAsin.findMany
|
||||
.mockResolvedValueOnce([
|
||||
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||
])
|
||||
.mockResolvedValueOnce([
|
||||
{ asin: 'ASIN_A', workId: 'work-1' },
|
||||
{ asin: 'ASIN_B', workId: 'work-1' },
|
||||
]);
|
||||
|
||||
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||
|
||||
const result = await getSiblingAsins(['ASIN_A']);
|
||||
|
||||
expect(result.get('ASIN_A')).toEqual(['ASIN_B']);
|
||||
expect(result.get('ASIN_A')).not.toContain('ASIN_A');
|
||||
});
|
||||
|
||||
it('omits ASINs with no siblings (single-ASIN works)', async () => {
|
||||
prismaMock.workAsin.findMany
|
||||
.mockResolvedValueOnce([
|
||||
{ asin: 'ASIN_LONELY', workId: 'work-solo' },
|
||||
])
|
||||
.mockResolvedValueOnce([
|
||||
{ asin: 'ASIN_LONELY', workId: 'work-solo' },
|
||||
]);
|
||||
|
||||
const { getSiblingAsins } = await import('@/lib/services/works.service');
|
||||
|
||||
const result = await getSiblingAsins(['ASIN_LONELY']);
|
||||
|
||||
// No siblings means it shouldn't be in the map at all
|
||||
expect(result.has('ASIN_LONELY')).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,434 @@
|
||||
/**
|
||||
* Component: Audiobook Deduplication Tests
|
||||
* Documentation: documentation/integrations/audible.md
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
deduplicateAudiobooks,
|
||||
deduplicateAndCollectGroups,
|
||||
normalizeTitle,
|
||||
areDurationsCompatible,
|
||||
} from '@/lib/utils/deduplicate-audiobooks';
|
||||
import type { AudibleAudiobook } from '@/lib/integrations/audible.service';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Helper: minimal AudibleAudiobook factory
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
function makeBook(overrides: Partial<AudibleAudiobook> & { asin: string; title: string; author: string }): AudibleAudiobook {
|
||||
return {
|
||||
narrator: undefined,
|
||||
coverArtUrl: undefined,
|
||||
durationMinutes: undefined,
|
||||
rating: undefined,
|
||||
description: undefined,
|
||||
releaseDate: undefined,
|
||||
genres: undefined,
|
||||
series: undefined,
|
||||
seriesPart: undefined,
|
||||
seriesAsin: undefined,
|
||||
authorAsin: undefined,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// normalizeTitle
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('normalizeTitle', () => {
|
||||
it('lowercases', () => {
|
||||
expect(normalizeTitle('The Black Prism')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('strips (Unabridged)', () => {
|
||||
expect(normalizeTitle('The Black Prism (Unabridged)')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('strips [Abridged Edition]', () => {
|
||||
expect(normalizeTitle('The Black Prism [Abridged Edition]')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('strips (2024 Remastered Edition)', () => {
|
||||
expect(normalizeTitle('The Hobbit (2024 Remastered Edition)')).toBe('the hobbit');
|
||||
});
|
||||
|
||||
it('strips subtitle after colon', () => {
|
||||
expect(normalizeTitle('The Black Prism: Lightbringer, Book 1')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('strips subtitle after long dash', () => {
|
||||
expect(normalizeTitle('The Black Prism \u2014 A Lightbringer Novel')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('strips trailing "A Novel"', () => {
|
||||
expect(normalizeTitle('The Black Prism: A Novel')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('strips (Audiobook)', () => {
|
||||
expect(normalizeTitle('The Hobbit (Audiobook)')).toBe('the hobbit');
|
||||
});
|
||||
|
||||
it('strips (Dramatized Adaptation)', () => {
|
||||
expect(normalizeTitle('The Black Prism (Dramatized Adaptation)')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('strips (Full Cast Narration)', () => {
|
||||
expect(normalizeTitle('The Black Prism (Full Cast Narration)')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('collapses whitespace', () => {
|
||||
expect(normalizeTitle(' The Black Prism ')).toBe('the black prism');
|
||||
});
|
||||
|
||||
it('handles empty string', () => {
|
||||
expect(normalizeTitle('')).toBe('');
|
||||
});
|
||||
|
||||
it('preserves hyphenated words (not subtitles)', () => {
|
||||
// "well-known" has a short dash, not a subtitle separator
|
||||
expect(normalizeTitle('A Well-Known Book')).toBe('a well-known book');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// areDurationsCompatible
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('areDurationsCompatible', () => {
|
||||
it('returns true when both undefined', () => {
|
||||
expect(areDurationsCompatible(undefined, undefined)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true when one undefined', () => {
|
||||
expect(areDurationsCompatible(600, undefined)).toBe(true);
|
||||
expect(areDurationsCompatible(undefined, 600)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns true for identical durations', () => {
|
||||
expect(areDurationsCompatible(600, 600)).toBe(true);
|
||||
});
|
||||
|
||||
it('uses 1% of longer duration as tolerance for long books', () => {
|
||||
// Two 40-hour books (2400 min): tolerance = max(2400*0.01, 5) = 24 min
|
||||
expect(areDurationsCompatible(2400, 2424)).toBe(true); // exactly at tolerance
|
||||
expect(areDurationsCompatible(2400, 2425)).toBe(false); // just over
|
||||
});
|
||||
|
||||
it('uses 5-minute minimum tolerance for short books', () => {
|
||||
// Two 2-hour books (120 min): tolerance = max(120*0.01, 5) = max(1.2, 5) = 5 min
|
||||
expect(areDurationsCompatible(120, 125)).toBe(true); // exactly at 5-min minimum
|
||||
expect(areDurationsCompatible(120, 126)).toBe(false); // just over
|
||||
});
|
||||
|
||||
it('keeps abridged vs unabridged separate (large duration gap)', () => {
|
||||
// Unabridged: 720 min (12 hrs), Abridged: 360 min (6 hrs)
|
||||
expect(areDurationsCompatible(720, 360)).toBe(false);
|
||||
});
|
||||
|
||||
it('symmetry: order does not matter', () => {
|
||||
expect(areDurationsCompatible(2400, 2424)).toBe(true);
|
||||
expect(areDurationsCompatible(2424, 2400)).toBe(true);
|
||||
expect(areDurationsCompatible(120, 126)).toBe(false);
|
||||
expect(areDurationsCompatible(126, 120)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// deduplicateAudiobooks
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('deduplicateAudiobooks', () => {
|
||||
it('returns empty array for empty input', () => {
|
||||
expect(deduplicateAudiobooks([])).toEqual([]);
|
||||
});
|
||||
|
||||
it('returns single book unchanged', () => {
|
||||
const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Author' });
|
||||
expect(deduplicateAudiobooks([book])).toEqual([book]);
|
||||
});
|
||||
|
||||
it('passes through all-unique books unchanged', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
|
||||
makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
|
||||
makeBook({ asin: 'A3', title: 'Book Three', author: 'Auth', narrator: 'Nar B', durationMinutes: 700 }),
|
||||
];
|
||||
expect(deduplicateAudiobooks(books)).toHaveLength(3);
|
||||
});
|
||||
|
||||
it('collapses simple duplicates (same title + narrator + similar duration)', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('keeps books with different narrators (different production)', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Full Cast', durationMinutes: 480 }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('keeps abridged vs unabridged (same narrator, very different duration)', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
|
||||
makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('collapses when one book has missing duration', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: undefined }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('collapses when both books have missing duration', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
|
||||
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance' }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('collapses title variants with edition markers', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Black Prism (Unabridged)', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1258 }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('collapses title variants with subtitles', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('picks the representative with most metadata', () => {
|
||||
const sparse = makeBook({
|
||||
asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks',
|
||||
narrator: 'Simon Vance', durationMinutes: 1260,
|
||||
});
|
||||
const rich = makeBook({
|
||||
asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks',
|
||||
narrator: 'Simon Vance', durationMinutes: 1262,
|
||||
coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
|
||||
});
|
||||
const result = deduplicateAudiobooks([sparse, rich]);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].asin).toBe('A2'); // rich entry wins
|
||||
});
|
||||
|
||||
it('preserves original order (first-seen position)', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
|
||||
makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 400 }),
|
||||
makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
|
||||
makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
|
||||
];
|
||||
const result = deduplicateAudiobooks(books);
|
||||
expect(result).toHaveLength(3);
|
||||
expect(result.map(b => b.title)).toEqual(['Alpha', 'Beta', 'Charlie']);
|
||||
});
|
||||
|
||||
it('handles Lightbringer-style scenario: unabridged + dramatized', () => {
|
||||
// Simon Vance full narration (long)
|
||||
const vance1 = makeBook({
|
||||
asin: 'SV1', title: 'The Black Prism', author: 'Brent Weeks',
|
||||
narrator: 'Simon Vance', durationMinutes: 1260,
|
||||
coverArtUrl: 'cover1.jpg', rating: 4.7,
|
||||
});
|
||||
// Re-listed Simon Vance (same duration, different ASIN)
|
||||
const vance2 = makeBook({
|
||||
asin: 'SV2', title: 'The Black Prism: Lightbringer Book 1', author: 'Brent Weeks',
|
||||
narrator: 'Simon Vance', durationMinutes: 1262,
|
||||
});
|
||||
// Dramatized with full cast (shorter, different narrator)
|
||||
const drama = makeBook({
|
||||
asin: 'DR1', title: 'The Black Prism (Dramatized Adaptation)', author: 'Brent Weeks',
|
||||
narrator: 'Full Cast', durationMinutes: 480,
|
||||
coverArtUrl: 'cover-drama.jpg',
|
||||
});
|
||||
|
||||
const result = deduplicateAudiobooks([vance1, vance2, drama]);
|
||||
expect(result).toHaveLength(2);
|
||||
// Simon Vance should collapse to 1, Full Cast stays
|
||||
expect(result.find(b => b.narrator === 'Simon Vance')).toBeTruthy();
|
||||
expect(result.find(b => b.narrator === 'Full Cast')).toBeTruthy();
|
||||
// Should pick the richer entry for Simon Vance
|
||||
const svResult = result.find(b => b.narrator === 'Simon Vance')!;
|
||||
expect(svResult.asin).toBe('SV1'); // has cover + rating
|
||||
});
|
||||
|
||||
it('uses percentage tolerance for very long audiobooks', () => {
|
||||
// Two 40-hour books: tolerance = max(2400*0.01, 5) = 24 min
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
|
||||
makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2420 }),
|
||||
];
|
||||
expect(deduplicateAudiobooks(books)).toHaveLength(1);
|
||||
|
||||
// Beyond tolerance
|
||||
const booksFar = [
|
||||
makeBook({ asin: 'A1', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2400 }),
|
||||
makeBook({ asin: 'A2', title: 'Long Book', author: 'Auth', narrator: 'Nar', durationMinutes: 2430 }),
|
||||
];
|
||||
expect(deduplicateAudiobooks(booksFar)).toHaveLength(2);
|
||||
});
|
||||
|
||||
it('treats missing narrator as its own group', () => {
|
||||
// Two entries with same title but no narrator - should collapse
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
|
||||
makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 302 }),
|
||||
];
|
||||
expect(deduplicateAudiobooks(books)).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('does not collapse empty-narrator with named narrator', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
|
||||
makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'John Smith', durationMinutes: 302 }),
|
||||
];
|
||||
expect(deduplicateAudiobooks(books)).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// deduplicateAndCollectGroups
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('deduplicateAndCollectGroups', () => {
|
||||
it('returns empty groups array when no duplicates', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Book One', author: 'Auth', narrator: 'Nar A', durationMinutes: 600 }),
|
||||
makeBook({ asin: 'A2', title: 'Book Two', author: 'Auth', narrator: 'Nar A', durationMinutes: 500 }),
|
||||
];
|
||||
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||
expect(result).toHaveLength(2);
|
||||
expect(groups).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('returns empty groups for empty input', () => {
|
||||
const { books: result, groups } = deduplicateAndCollectGroups([]);
|
||||
expect(result).toHaveLength(0);
|
||||
expect(groups).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('returns empty groups for single book', () => {
|
||||
const book = makeBook({ asin: 'A1', title: 'Book One', author: 'Auth' });
|
||||
const { books: result, groups } = deduplicateAndCollectGroups([book]);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(groups).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('returns group with 2 ASINs when 2 books match', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
|
||||
];
|
||||
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(groups).toHaveLength(1);
|
||||
expect(groups[0].allAsins).toHaveLength(2);
|
||||
expect(groups[0].allAsins).toContain('A1');
|
||||
expect(groups[0].allAsins).toContain('A2');
|
||||
});
|
||||
|
||||
it('returns group with 3+ ASINs for multi-duplicate scenario', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
|
||||
makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 662 }),
|
||||
makeBook({ asin: 'A3', title: 'The Hobbit (Unabridged)', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 658 }),
|
||||
];
|
||||
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(groups).toHaveLength(1);
|
||||
expect(groups[0].allAsins).toHaveLength(3);
|
||||
expect(groups[0].allAsins).toContain('A1');
|
||||
expect(groups[0].allAsins).toContain('A2');
|
||||
expect(groups[0].allAsins).toContain('A3');
|
||||
});
|
||||
|
||||
it('canonicalAsin is the one with highest metadata score', () => {
|
||||
const sparse = makeBook({
|
||||
asin: 'SPARSE', title: 'The Black Prism', author: 'Brent Weeks',
|
||||
narrator: 'Simon Vance', durationMinutes: 1260,
|
||||
});
|
||||
const rich = makeBook({
|
||||
asin: 'RICH', title: 'The Black Prism', author: 'Brent Weeks',
|
||||
narrator: 'Simon Vance', durationMinutes: 1262,
|
||||
coverArtUrl: 'https://img.jpg', rating: 4.5, description: 'Great book',
|
||||
});
|
||||
const { groups } = deduplicateAndCollectGroups([sparse, rich]);
|
||||
expect(groups).toHaveLength(1);
|
||||
expect(groups[0].canonicalAsin).toBe('RICH');
|
||||
});
|
||||
|
||||
it('groups only include entries with 2+ ASINs', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300 }),
|
||||
makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
|
||||
makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
|
||||
];
|
||||
const { groups } = deduplicateAndCollectGroups(books);
|
||||
// Only Alpha group should appear (Beta is a singleton)
|
||||
expect(groups).toHaveLength(1);
|
||||
expect(groups[0].allAsins).toContain('A1');
|
||||
expect(groups[0].allAsins).toContain('A2');
|
||||
});
|
||||
|
||||
it('duration-incompatible books produce separate entries (no group for singletons)', () => {
|
||||
// Same title/narrator but very different durations (abridged vs unabridged)
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 660 }),
|
||||
makeBook({ asin: 'A2', title: 'The Hobbit', author: 'Tolkien', narrator: 'Andy Serkis', durationMinutes: 330 }),
|
||||
];
|
||||
const { books: result, groups } = deduplicateAndCollectGroups(books);
|
||||
expect(result).toHaveLength(2); // Not collapsed
|
||||
expect(groups).toHaveLength(0); // No multi-ASIN groups
|
||||
});
|
||||
|
||||
it('books field matches what deduplicateAudiobooks returns', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 300, coverArtUrl: 'img.jpg', rating: 4.5 }),
|
||||
makeBook({ asin: 'A2', title: 'Alpha', author: 'Auth', narrator: 'Nar', durationMinutes: 302 }),
|
||||
makeBook({ asin: 'B1', title: 'Beta', author: 'Auth', narrator: 'Nar', durationMinutes: 500 }),
|
||||
makeBook({ asin: 'C1', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 600 }),
|
||||
makeBook({ asin: 'C2', title: 'Charlie', author: 'Auth', narrator: 'Nar', durationMinutes: 601 }),
|
||||
];
|
||||
const dedupOnly = deduplicateAudiobooks(books);
|
||||
const { books: withGroups } = deduplicateAndCollectGroups(books);
|
||||
expect(withGroups.map(b => b.asin)).toEqual(dedupOnly.map(b => b.asin));
|
||||
});
|
||||
|
||||
it('includes narrator and durationMinutes from canonical entry in group', () => {
|
||||
const books = [
|
||||
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 480 }),
|
||||
makeBook({ asin: 'A2', title: 'Test Book', author: 'Auth', narrator: 'Jane Doe', durationMinutes: 482, coverArtUrl: 'img.jpg', rating: 4.0 }),
|
||||
];
|
||||
const { groups } = deduplicateAndCollectGroups(books);
|
||||
expect(groups).toHaveLength(1);
|
||||
expect(groups[0].canonicalAsin).toBe('A2'); // richer metadata
|
||||
expect(groups[0].narrator).toBe('Jane Doe');
|
||||
expect(groups[0].durationMinutes).toBe(482);
|
||||
expect(groups[0].author).toBe('Auth');
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user