diff --git a/docker/unified/entrypoint.sh b/docker/unified/entrypoint.sh index e0a65d7..76fb8c2 100644 --- a/docker/unified/entrypoint.sh +++ b/docker/unified/entrypoint.sh @@ -403,12 +403,26 @@ echo "🔄 Running Prisma migrations..." cd /app su - node -c "cd /app && DATABASE_URL='$DATABASE_URL' npx prisma db push --skip-generate --accept-data-loss" || echo "⚠️ Migrations may have failed, continuing..." -# Run data migrations (idempotent SQL scripts that prisma db push doesn't handle) +# Run data migrations (run-once SQL scripts tracked in _data_migrations table) echo "🔄 Running data migrations..." + for sql_file in /app/prisma/data-migrations/*.sql; do if [ -f "$sql_file" ]; then - echo " Running $(basename "$sql_file")..." - su - node -c "cd /app && DATABASE_URL='$DATABASE_URL' npx prisma db execute --schema prisma/schema.prisma --file '$sql_file'" || echo "⚠️ Data migration $(basename "$sql_file") may have failed, continuing..." + migration_name=$(basename "$sql_file") + + already_run=$(psql "$DATABASE_URL" -tA -c "SELECT 1 FROM _data_migrations WHERE name = '$migration_name' LIMIT 1;") + if [ "$already_run" = "1" ]; then + echo " Skipping $migration_name (already executed)" + continue + fi + + echo " Running $migration_name..." + if su - node -c "cd /app && DATABASE_URL='$DATABASE_URL' npx prisma db execute --schema prisma/schema.prisma --file '$sql_file'"; then + psql "$DATABASE_URL" -c "INSERT INTO _data_migrations (name) VALUES ('$migration_name');" + echo " ✅ $migration_name completed" + else + echo "⚠️ Data migration $migration_name failed, will retry on next start" + fi fi done diff --git a/prisma/data-migrations/reset-works-table.sql b/prisma/data-migrations/reset-works-table.sql new file mode 100644 index 0000000..1bbd29f --- /dev/null +++ b/prisma/data-migrations/reset-works-table.sql @@ -0,0 +1,7 @@ +-- Reset works table to fix incorrect dedup groupings (v1.1.2) +-- Books with "Series: Title" naming (e.g. "Eden's Gate: The Reborn" vs +-- "Eden's Gate: The Spartan") were incorrectly merged into the same work +-- because subtitle stripping collapsed them to the same base title. +-- The works table auto-rebuilds from dedup logic as users browse. +DELETE FROM work_asins; +DELETE FROM works; diff --git a/prisma/schema.prisma b/prisma/schema.prisma index 04ec930..7234838 100644 --- a/prisma/schema.prisma +++ b/prisma/schema.prisma @@ -718,3 +718,15 @@ model AudibleCacheCategory { @@index([categoryId, rank]) @@map("audible_cache_categories") } + +// ============================================================================ +// DATA MIGRATION TRACKING +// Tracks which data migration SQL scripts have been executed (run-once). +// ============================================================================ + +model DataMigration { + name String @id + executedAt DateTime @default(now()) @map("executed_at") + + @@map("_data_migrations") +} diff --git a/src/lib/utils/deduplicate-audiobooks.ts b/src/lib/utils/deduplicate-audiobooks.ts index 1cfe6f2..cbaf773 100644 --- a/src/lib/utils/deduplicate-audiobooks.ts +++ b/src/lib/utils/deduplicate-audiobooks.ts @@ -19,7 +19,7 @@ import type { AudibleAudiobook } from '../integrations/audible.service'; /** Patterns in parentheses or brackets to strip (edition markers, format labels) */ const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi; -/** Trailing subtitle after colon or long dash */ +/** Trailing subtitle after colon or long dash (used for extraction, not blind stripping) */ const SUBTITLE_RE = /\s*[:]\s+.+$/; const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/; @@ -44,6 +44,44 @@ export function normalizeTitle(title: string): string { return t.replace(/\s+/g, ' ').trim(); } +/** + * Extract the subtitle portion from a title (part after colon or long dash). + * Returns empty string if no subtitle found. + * Used to prevent false dedup of series books like "Series: Book A" vs "Series: Book B". + */ +export function extractSubtitle(title: string): string { + let t = title.toLowerCase(); + // Remove parenthesized/bracketed edition markers first (same as normalizeTitle) + t = t.replace(EDITION_PAREN_RE, ''); + // Remove trailing descriptors + t = t.replace(TRAILING_DESCRIPTOR_RE, ''); + t = t.replace(/\s+/g, ' ').trim(); + + // Try colon subtitle + const colonMatch = t.match(/\s*[:]\s+(.+)$/); + if (colonMatch) return colonMatch[1].trim(); + + // Try long dash subtitle + const dashMatch = t.match(/\s+[-\u2013\u2014]\s+(.+)$/); + if (dashMatch) return dashMatch[1].trim(); + + return ''; +} + +/** + * Check if two titles' subtitles are compatible for dedup purposes. + * - Both have no subtitle → compatible + * - One has a subtitle, other doesn't → compatible (re-listing with/without subtitle) + * - Both have the SAME subtitle → compatible + * - Both have DIFFERENT subtitles → NOT compatible (different books, e.g. series entries) + */ +function areSubtitlesCompatible(titleA: string, titleB: string): boolean { + const subA = extractSubtitle(titleA); + const subB = extractSubtitle(titleB); + if (!subA || !subB) return true; // one or both missing → compatible + return subA === subB; +} + /** Normalize narrator for comparison. Sorts individual names so order doesn't matter. */ function normalizeNarrator(narrator?: string): string { const raw = (narrator || '').toLowerCase().trim(); @@ -152,16 +190,20 @@ export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): Deduplic continue; } - // Within a title+narrator group, further split by duration compatibility. - // Build sub-groups where all members are duration-compatible with the - // representative (first member). A book joins the first compatible sub-group. + // Within a title+narrator group, further split by duration AND subtitle + // compatibility. Build sub-groups where all members are compatible with + // the representative (first member). A book joins the first compatible sub-group. + // This prevents false dedup of series entries like "Series: Book A" vs "Series: Book B". const subGroups: AudibleAudiobook[][] = []; for (const book of group) { let placed = false; for (const sg of subGroups) { - // Check compatibility against the representative (first member) - if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) { + // Check both duration and subtitle compatibility against the representative + if ( + areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes) && + areSubtitlesCompatible(sg[0].title, book.title) + ) { sg.push(book); placed = true; break; diff --git a/tests/utils/deduplicate-audiobooks.test.ts b/tests/utils/deduplicate-audiobooks.test.ts index a535e15..a84d5ed 100644 --- a/tests/utils/deduplicate-audiobooks.test.ts +++ b/tests/utils/deduplicate-audiobooks.test.ts @@ -8,6 +8,7 @@ import { deduplicateAudiobooks, deduplicateAndCollectGroups, normalizeTitle, + extractSubtitle, areDurationsCompatible, } from '@/lib/utils/deduplicate-audiobooks'; import type { AudibleAudiobook } from '@/lib/integrations/audible.service'; @@ -92,6 +93,32 @@ describe('normalizeTitle', () => { }); }); +// --------------------------------------------------------------------------- +// extractSubtitle +// --------------------------------------------------------------------------- + +describe('extractSubtitle', () => { + it('extracts subtitle after colon', () => { + expect(extractSubtitle('Eden\'s Gate: The Reborn')).toBe('the reborn'); + }); + + it('extracts subtitle after long dash', () => { + expect(extractSubtitle('Eden\'s Gate \u2014 The Reborn')).toBe('the reborn'); + }); + + it('returns empty for title without subtitle', () => { + expect(extractSubtitle('The Black Prism')).toBe(''); + }); + + it('strips edition markers before extracting', () => { + expect(extractSubtitle('The Hobbit (Unabridged): Extended')).toBe('extended'); + }); + + it('returns empty string for empty input', () => { + expect(extractSubtitle('')).toBe(''); + }); +}); + // --------------------------------------------------------------------------- // areDurationsCompatible // --------------------------------------------------------------------------- @@ -302,6 +329,27 @@ describe('deduplicateAudiobooks', () => { expect(deduplicateAudiobooks(books)).toHaveLength(1); }); + it('does NOT collapse series entries with different subtitles (Eden\'s Gate bug)', () => { + // Series format: "Series Name: Book Title" — different books, NOT duplicates + const books = [ + makeBook({ asin: 'A1', title: 'Eden\'s Gate: The Reborn', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 510 }), + makeBook({ asin: 'A2', title: 'Eden\'s Gate: The Spartan', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 540 }), + makeBook({ asin: 'A3', title: 'Eden\'s Gate: The Sapper', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 600 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(3); // All 3 are different books! + }); + + it('still collapses when one has subtitle and other does not', () => { + // Same book re-listed: "The Black Prism: Lightbringer, Book 1" vs "The Black Prism" + const books = [ + makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }), + makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }), + ]; + const result = deduplicateAudiobooks(books); + expect(result).toHaveLength(1); + }); + it('does not collapse empty-narrator with named narrator', () => { const books = [ makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),