mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-02 20:30:10 +00:00
Add data-migration tracking; prevent subtitle dedup
Track and run run-once SQL data migrations: entrypoint now checks _data_migrations before executing each prisma data-migration file, records successful runs, and skips already-applied scripts. Adds a Prisma DataMigration model mapped to _data_migrations and a new reset-works-table.sql migration to clear work tables for a dedup rebuild. Also improves dedup logic: extractSubtitle and subtitle-compatibility checks are added so series entries like "Series: Book A" vs "Series: Book B" are not collapsed, with accompanying unit tests for extraction and behavior.
This commit is contained in:
@@ -403,12 +403,26 @@ echo "🔄 Running Prisma migrations..."
|
|||||||
cd /app
|
cd /app
|
||||||
su - node -c "cd /app && DATABASE_URL='$DATABASE_URL' npx prisma db push --skip-generate --accept-data-loss" || echo "⚠️ Migrations may have failed, continuing..."
|
su - node -c "cd /app && DATABASE_URL='$DATABASE_URL' npx prisma db push --skip-generate --accept-data-loss" || echo "⚠️ Migrations may have failed, continuing..."
|
||||||
|
|
||||||
# Run data migrations (idempotent SQL scripts that prisma db push doesn't handle)
|
# Run data migrations (run-once SQL scripts tracked in _data_migrations table)
|
||||||
echo "🔄 Running data migrations..."
|
echo "🔄 Running data migrations..."
|
||||||
|
|
||||||
for sql_file in /app/prisma/data-migrations/*.sql; do
|
for sql_file in /app/prisma/data-migrations/*.sql; do
|
||||||
if [ -f "$sql_file" ]; then
|
if [ -f "$sql_file" ]; then
|
||||||
echo " Running $(basename "$sql_file")..."
|
migration_name=$(basename "$sql_file")
|
||||||
su - node -c "cd /app && DATABASE_URL='$DATABASE_URL' npx prisma db execute --schema prisma/schema.prisma --file '$sql_file'" || echo "⚠️ Data migration $(basename "$sql_file") may have failed, continuing..."
|
|
||||||
|
already_run=$(psql "$DATABASE_URL" -tA -c "SELECT 1 FROM _data_migrations WHERE name = '$migration_name' LIMIT 1;")
|
||||||
|
if [ "$already_run" = "1" ]; then
|
||||||
|
echo " Skipping $migration_name (already executed)"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " Running $migration_name..."
|
||||||
|
if su - node -c "cd /app && DATABASE_URL='$DATABASE_URL' npx prisma db execute --schema prisma/schema.prisma --file '$sql_file'"; then
|
||||||
|
psql "$DATABASE_URL" -c "INSERT INTO _data_migrations (name) VALUES ('$migration_name');"
|
||||||
|
echo " ✅ $migration_name completed"
|
||||||
|
else
|
||||||
|
echo "⚠️ Data migration $migration_name failed, will retry on next start"
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,7 @@
|
|||||||
|
-- Reset works table to fix incorrect dedup groupings (v1.1.2)
|
||||||
|
-- Books with "Series: Title" naming (e.g. "Eden's Gate: The Reborn" vs
|
||||||
|
-- "Eden's Gate: The Spartan") were incorrectly merged into the same work
|
||||||
|
-- because subtitle stripping collapsed them to the same base title.
|
||||||
|
-- The works table auto-rebuilds from dedup logic as users browse.
|
||||||
|
DELETE FROM work_asins;
|
||||||
|
DELETE FROM works;
|
||||||
@@ -718,3 +718,15 @@ model AudibleCacheCategory {
|
|||||||
@@index([categoryId, rank])
|
@@index([categoryId, rank])
|
||||||
@@map("audible_cache_categories")
|
@@map("audible_cache_categories")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================================
|
||||||
|
// DATA MIGRATION TRACKING
|
||||||
|
// Tracks which data migration SQL scripts have been executed (run-once).
|
||||||
|
// ============================================================================
|
||||||
|
|
||||||
|
model DataMigration {
|
||||||
|
name String @id
|
||||||
|
executedAt DateTime @default(now()) @map("executed_at")
|
||||||
|
|
||||||
|
@@map("_data_migrations")
|
||||||
|
}
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ import type { AudibleAudiobook } from '../integrations/audible.service';
|
|||||||
/** Patterns in parentheses or brackets to strip (edition markers, format labels) */
|
/** Patterns in parentheses or brackets to strip (edition markers, format labels) */
|
||||||
const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi;
|
const EDITION_PAREN_RE = /[([][^)\]]*?(?:unabridged|abridged|edition|remaster(?:ed)?|anniversary|complete|original|version|narrat(?:ed|or)?|audio(?:book)?|full cast|dramatiz(?:ed|ation))[^)\]]*[)\]]/gi;
|
||||||
|
|
||||||
/** Trailing subtitle after colon or long dash */
|
/** Trailing subtitle after colon or long dash (used for extraction, not blind stripping) */
|
||||||
const SUBTITLE_RE = /\s*[:]\s+.+$/;
|
const SUBTITLE_RE = /\s*[:]\s+.+$/;
|
||||||
const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/;
|
const LONG_DASH_SUBTITLE_RE = /\s+[-\u2013\u2014]\s+.+$/;
|
||||||
|
|
||||||
@@ -44,6 +44,44 @@ export function normalizeTitle(title: string): string {
|
|||||||
return t.replace(/\s+/g, ' ').trim();
|
return t.replace(/\s+/g, ' ').trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the subtitle portion from a title (part after colon or long dash).
|
||||||
|
* Returns empty string if no subtitle found.
|
||||||
|
* Used to prevent false dedup of series books like "Series: Book A" vs "Series: Book B".
|
||||||
|
*/
|
||||||
|
export function extractSubtitle(title: string): string {
|
||||||
|
let t = title.toLowerCase();
|
||||||
|
// Remove parenthesized/bracketed edition markers first (same as normalizeTitle)
|
||||||
|
t = t.replace(EDITION_PAREN_RE, '');
|
||||||
|
// Remove trailing descriptors
|
||||||
|
t = t.replace(TRAILING_DESCRIPTOR_RE, '');
|
||||||
|
t = t.replace(/\s+/g, ' ').trim();
|
||||||
|
|
||||||
|
// Try colon subtitle
|
||||||
|
const colonMatch = t.match(/\s*[:]\s+(.+)$/);
|
||||||
|
if (colonMatch) return colonMatch[1].trim();
|
||||||
|
|
||||||
|
// Try long dash subtitle
|
||||||
|
const dashMatch = t.match(/\s+[-\u2013\u2014]\s+(.+)$/);
|
||||||
|
if (dashMatch) return dashMatch[1].trim();
|
||||||
|
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if two titles' subtitles are compatible for dedup purposes.
|
||||||
|
* - Both have no subtitle → compatible
|
||||||
|
* - One has a subtitle, other doesn't → compatible (re-listing with/without subtitle)
|
||||||
|
* - Both have the SAME subtitle → compatible
|
||||||
|
* - Both have DIFFERENT subtitles → NOT compatible (different books, e.g. series entries)
|
||||||
|
*/
|
||||||
|
function areSubtitlesCompatible(titleA: string, titleB: string): boolean {
|
||||||
|
const subA = extractSubtitle(titleA);
|
||||||
|
const subB = extractSubtitle(titleB);
|
||||||
|
if (!subA || !subB) return true; // one or both missing → compatible
|
||||||
|
return subA === subB;
|
||||||
|
}
|
||||||
|
|
||||||
/** Normalize narrator for comparison. Sorts individual names so order doesn't matter. */
|
/** Normalize narrator for comparison. Sorts individual names so order doesn't matter. */
|
||||||
function normalizeNarrator(narrator?: string): string {
|
function normalizeNarrator(narrator?: string): string {
|
||||||
const raw = (narrator || '').toLowerCase().trim();
|
const raw = (narrator || '').toLowerCase().trim();
|
||||||
@@ -152,16 +190,20 @@ export function deduplicateAndCollectGroups(books: AudibleAudiobook[]): Deduplic
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Within a title+narrator group, further split by duration compatibility.
|
// Within a title+narrator group, further split by duration AND subtitle
|
||||||
// Build sub-groups where all members are duration-compatible with the
|
// compatibility. Build sub-groups where all members are compatible with
|
||||||
// representative (first member). A book joins the first compatible sub-group.
|
// the representative (first member). A book joins the first compatible sub-group.
|
||||||
|
// This prevents false dedup of series entries like "Series: Book A" vs "Series: Book B".
|
||||||
const subGroups: AudibleAudiobook[][] = [];
|
const subGroups: AudibleAudiobook[][] = [];
|
||||||
|
|
||||||
for (const book of group) {
|
for (const book of group) {
|
||||||
let placed = false;
|
let placed = false;
|
||||||
for (const sg of subGroups) {
|
for (const sg of subGroups) {
|
||||||
// Check compatibility against the representative (first member)
|
// Check both duration and subtitle compatibility against the representative
|
||||||
if (areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes)) {
|
if (
|
||||||
|
areDurationsCompatible(sg[0].durationMinutes, book.durationMinutes) &&
|
||||||
|
areSubtitlesCompatible(sg[0].title, book.title)
|
||||||
|
) {
|
||||||
sg.push(book);
|
sg.push(book);
|
||||||
placed = true;
|
placed = true;
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import {
|
|||||||
deduplicateAudiobooks,
|
deduplicateAudiobooks,
|
||||||
deduplicateAndCollectGroups,
|
deduplicateAndCollectGroups,
|
||||||
normalizeTitle,
|
normalizeTitle,
|
||||||
|
extractSubtitle,
|
||||||
areDurationsCompatible,
|
areDurationsCompatible,
|
||||||
} from '@/lib/utils/deduplicate-audiobooks';
|
} from '@/lib/utils/deduplicate-audiobooks';
|
||||||
import type { AudibleAudiobook } from '@/lib/integrations/audible.service';
|
import type { AudibleAudiobook } from '@/lib/integrations/audible.service';
|
||||||
@@ -92,6 +93,32 @@ describe('normalizeTitle', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// extractSubtitle
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
describe('extractSubtitle', () => {
|
||||||
|
it('extracts subtitle after colon', () => {
|
||||||
|
expect(extractSubtitle('Eden\'s Gate: The Reborn')).toBe('the reborn');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('extracts subtitle after long dash', () => {
|
||||||
|
expect(extractSubtitle('Eden\'s Gate \u2014 The Reborn')).toBe('the reborn');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty for title without subtitle', () => {
|
||||||
|
expect(extractSubtitle('The Black Prism')).toBe('');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips edition markers before extracting', () => {
|
||||||
|
expect(extractSubtitle('The Hobbit (Unabridged): Extended')).toBe('extended');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('returns empty string for empty input', () => {
|
||||||
|
expect(extractSubtitle('')).toBe('');
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// areDurationsCompatible
|
// areDurationsCompatible
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -302,6 +329,27 @@ describe('deduplicateAudiobooks', () => {
|
|||||||
expect(deduplicateAudiobooks(books)).toHaveLength(1);
|
expect(deduplicateAudiobooks(books)).toHaveLength(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it('does NOT collapse series entries with different subtitles (Eden\'s Gate bug)', () => {
|
||||||
|
// Series format: "Series Name: Book Title" — different books, NOT duplicates
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'Eden\'s Gate: The Reborn', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 510 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'Eden\'s Gate: The Spartan', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 540 }),
|
||||||
|
makeBook({ asin: 'A3', title: 'Eden\'s Gate: The Sapper', author: 'Edward Brody', narrator: 'Pavi Proczko', durationMinutes: 600 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(3); // All 3 are different books!
|
||||||
|
});
|
||||||
|
|
||||||
|
it('still collapses when one has subtitle and other does not', () => {
|
||||||
|
// Same book re-listed: "The Black Prism: Lightbringer, Book 1" vs "The Black Prism"
|
||||||
|
const books = [
|
||||||
|
makeBook({ asin: 'A1', title: 'The Black Prism: Lightbringer, Book 1', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1260 }),
|
||||||
|
makeBook({ asin: 'A2', title: 'The Black Prism', author: 'Brent Weeks', narrator: 'Simon Vance', durationMinutes: 1262 }),
|
||||||
|
];
|
||||||
|
const result = deduplicateAudiobooks(books);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
});
|
||||||
|
|
||||||
it('does not collapse empty-narrator with named narrator', () => {
|
it('does not collapse empty-narrator with named narrator', () => {
|
||||||
const books = [
|
const books = [
|
||||||
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
|
makeBook({ asin: 'A1', title: 'Test Book', author: 'Auth', narrator: undefined, durationMinutes: 300 }),
|
||||||
|
|||||||
Reference in New Issue
Block a user