Add data-migration tracking; prevent subtitle dedup

Track and run run-once SQL data migrations: entrypoint now checks _data_migrations before executing each prisma data-migration file, records successful runs, and skips already-applied scripts. Adds a Prisma DataMigration model mapped to _data_migrations and a new reset-works-table.sql migration to clear work tables for a dedup rebuild. Also improves dedup logic: extractSubtitle and subtitle-compatibility checks are added so series entries like "Series: Book A" vs "Series: Book B" are not collapsed, with accompanying unit tests for extraction and behavior.
This commit is contained in:
kikootwo
2026-03-05 16:45:56 -05:00
parent 3e2221ad5b
commit 5b4aa3fa15
5 changed files with 132 additions and 9 deletions
@@ -8,6 +8,7 @@ import {
deduplicateAudiobooks,
deduplicateAndCollectGroups,
normalizeTitle,
extractSubtitle,
areDurationsCompatible,
} from '@/lib/utils/deduplicate-audiobooks';
import type { AudibleAudiobook } from '@/lib/integrations/audible.service';
@@ -92,6 +93,32 @@ describe('normalizeTitle', () => {
});
});
// ---------------------------------------------------------------------------
// extractSubtitle
// ---------------------------------------------------------------------------
describe('extractSubtitle', () => {
it('extracts subtitle after colon', () => {
expect(extractSubtitle('Eden\'s Gate: The Reborn')).toBe('the reborn');
});
it('extracts subtitle after long dash', () => {
expect(extractSubtitle('Eden\'s Gate \u2014 The Reborn')).toBe('the reborn');
});
it('returns empty for title without subtitle', () => {
expect(extractSubtitle('The Black Prism')).toBe('');
});
it('strips edition markers before extracting', () => {
expect(extractSubtitle('The Hobbit (Unabridged): Extended')).toBe('extended');
});
it('returns empty string for empty input', () => {
expect(extractSubtitle('')).toBe('');
});
});
// ---------------------------------------------------------------------------
// areDurationsCompatible
// ---------------------------------------------------------------------------
@@ -302,6 +329,27 @@ describe('deduplicateAudiobooks', () => {
expect(deduplicateAudiobooks(books)).toHaveLength(1);
});
it('does NOT collapse series entries with different subtitles (Eden\'s Gate bug)', () => {
// Series format: "Series Name: Book Title" — different books, NOT duplicates
const books = [
makeBook({ asin: 'A1', title: 'Eden\'s Gate: The Reborn', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 510 }),
makeBook({ asin: 'A2', title: 'Eden\'s Gate: The Spartan', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 540 }),
makeBook({ asin: 'A3', title: 'Eden\'s Gate: The Sapper', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 600 }),
];
const result = deduplicateAudiobooks(books);
expect(result).toHaveLength(3); // All 3 are different books!
});
it('still collapses when one has subtitle and other does not', () => {
// Same book re-listed: "The Black Prism: Lightbringer, Book 1" vs "The Black Prism"
const books = [
makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
];
const result = deduplicateAudiobooks(books);
expect(result).toHaveLength(1);
});
it('does not collapse empty-narrator with named narrator', () => {
const books = [
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),