Add Hardcover shelf sync & unify book mappings

Introduce Hardcover provider support and consolidate per-provider book mapping tables into a unified BookMapping model. Adds two Prisma migrations (add_hardcover_shelves, unify_book_mappings), new backend services (hardcover-api, shelf-sync-core), and provider-specific sync logic and API routes for hardcover shelves with token/list validation. Frontend: new HardcoverForm component, refactor AddShelfModal to support Hardcover, hook updates, and small UI/accessibility tweaks. Also add documentation for Goodreads and Hardcover sync flows and update tests to cover scheduler/prisma helpers.
This commit is contained in:
kikootwo
2026-03-04 10:11:19 -05:00
parent 6ca2e964e8
commit 338331d006
23 changed files with 1613 additions and 1391 deletions
+86 -284
View File
@@ -2,36 +2,29 @@
* Component: Goodreads Shelf Sync Service
* Documentation: documentation/backend/services/goodreads-sync.md
*
* Fetches Goodreads shelf RSS feeds, resolves books to Audible ASINs,
* and creates requests via the shared request-creator service.
* Fetches Goodreads shelf RSS feeds and delegates book processing
* to the shared shelf-sync-core service.
*/
import axios from 'axios';
import { XMLParser } from 'fast-xml-parser';
import { prisma } from '@/lib/db';
import { getAudibleService } from '@/lib/integrations/audible.service';
import { createRequestForUser } from '@/lib/services/request-creator.service';
import { RMABLogger } from '@/lib/utils/logger';
import {
ShelfBook,
ShelfSyncStats,
ShelfSyncOptions,
createEmptyStats,
resolveMaxLookups,
processShelfBooks,
} from '@/lib/services/shelf-sync-core.service';
const logger = RMABLogger.create('GoodreadsSync');
/** Default max Audible lookups per shelf per scheduled sync cycle */
const DEFAULT_MAX_LOOKUPS_PER_SHELF = 10;
/** Days before retrying a noMatch book */
const NO_MATCH_RETRY_DAYS = 7;
interface GoodreadsRssBook {
bookId: string;
title: string;
author: string;
coverUrl?: string;
}
/**
* Parse a Goodreads RSS feed XML into structured book data.
*/
function parseGoodreadsRss(xml: string): { shelfName: string; books: GoodreadsRssBook[] } {
function parseGoodreadsRss(xml: string): { shelfName: string; books: ShelfBook[] } {
const parser = new XMLParser({
ignoreAttributes: false,
attributeNamePrefix: '@_',
@@ -46,65 +39,84 @@ function parseGoodreadsRss(xml: string): { shelfName: string; books: GoodreadsRs
const shelfName = typeof channel.title === 'string' ? channel.title : 'Goodreads Shelf';
// Normalize items to array
let items = channel.item;
if (!items) return { shelfName, books: [] };
if (!Array.isArray(items)) items = [items];
const books: GoodreadsRssBook[] = [];
const books: ShelfBook[] = [];
for (const item of items) {
const bookId = item.book_id?.toString();
if (!bookId) continue;
const title = (item.title || '').toString().trim();
const authorName = (item.author_name || '').toString().trim();
// Goodreads RSS has book_image_url or book_medium_image_url
const author = (item.author_name || '').toString().trim();
const coverUrl = (item.book_large_image_url || item.book_medium_image_url || item.book_image_url || '').toString().trim() || undefined;
if (title && authorName) {
books.push({ bookId, title, author: authorName, coverUrl });
if (title && author) {
books.push({ bookId, title, author, coverUrl });
}
}
return { shelfName, books };
}
/** Max items Goodreads returns per RSS page */
const GOODREADS_PAGE_SIZE = 100;
/** Safety cap to avoid infinite loops */
const MAX_PAGES = 50;
/**
* Fetch and validate a Goodreads RSS URL.
* Returns the parsed shelf name and books if valid.
* Automatically paginates (sort=title, page=1,2,...) when a page returns 100 items.
* Deduplicates by bookId across pages.
*/
export async function fetchAndValidateRss(rssUrl: string): Promise<{ shelfName: string; books: GoodreadsRssBook[] }> {
const response = await axios.get(rssUrl, { timeout: 15000 });
return parseGoodreadsRss(response.data);
export async function fetchAndValidateRss(rssUrl: string): Promise<{ shelfName: string; books: ShelfBook[] }> {
const url = new URL(rssUrl);
url.searchParams.set('sort', 'title');
let shelfName = 'Goodreads Shelf';
const seenIds = new Set<string>();
const allBooks: ShelfBook[] = [];
for (let page = 1; page <= MAX_PAGES; page++) {
url.searchParams.set('page', page.toString());
const response = await axios.get(url.toString(), { timeout: 15000 });
const parsed = parseGoodreadsRss(response.data);
if (page === 1) {
shelfName = parsed.shelfName;
}
for (const book of parsed.books) {
if (!seenIds.has(book.bookId)) {
seenIds.add(book.bookId);
allBooks.push(book);
}
}
if (parsed.books.length < GOODREADS_PAGE_SIZE) break;
}
return { shelfName, books: allBooks };
}
export interface GoodreadsSyncStats {
shelvesProcessed: number;
booksFound: number;
lookupsPerformed: number;
requestsCreated: number;
errors: number;
}
export interface GoodreadsSyncOptions {
/** Process only this shelf ID (for immediate single-shelf sync) */
shelfId?: string;
/** Max Audible lookups per shelf. 0 = unlimited. Default: 10 for scheduled, unlimited for immediate. */
maxLookupsPerShelf?: number;
}
// Re-export types that downstream consumers expect
export type { ShelfSyncStats as GoodreadsSyncStats };
export type { ShelfSyncOptions as GoodreadsSyncOptions };
/**
* Process Goodreads shelves: fetch RSS, resolve ASINs, create requests.
* Called from the dedicated sync_goodreads_shelves processor.
* Called from the unified sync_reading_shelves processor.
*/
export async function processGoodreadsShelves(
jobLogger?: ReturnType<typeof RMABLogger.forJob>,
options: GoodreadsSyncOptions = {}
): Promise<GoodreadsSyncStats> {
options: ShelfSyncOptions = {}
): Promise<ShelfSyncStats> {
const log = jobLogger || logger;
const stats: GoodreadsSyncStats = { shelvesProcessed: 0, booksFound: 0, lookupsPerformed: 0, requestsCreated: 0, errors: 0 };
const maxLookups = options.maxLookupsPerShelf ?? DEFAULT_MAX_LOOKUPS_PER_SHELF;
const stats = createEmptyStats();
const maxLookups = resolveMaxLookups(options);
const whereClause = options.shelfId ? { id: options.shelfId } : {};
const shelves = await prisma.goodreadsShelf.findMany({
@@ -121,7 +133,32 @@ export async function processGoodreadsShelves(
for (const shelf of shelves) {
try {
await processShelf(shelf, stats, log, maxLookups);
log.info(`Fetching RSS for shelf "${shelf.name}" (user: ${shelf.user.plexUsername})`);
let rssData: { shelfName: string; books: ShelfBook[] };
try {
rssData = await fetchAndValidateRss(shelf.rssUrl);
} catch (error) {
log.error(`Failed to fetch RSS for shelf "${shelf.name}": ${error instanceof Error ? error.message : 'Unknown error'}`);
stats.errors++;
continue;
}
log.info(`Found ${rssData.books.length} books in shelf "${shelf.name}"`);
const bookData = await processShelfBooks(
'goodreads', rssData.books, shelf.user.id, shelf.id, stats, log, maxLookups,
);
await prisma.goodreadsShelf.update({
where: { id: shelf.id },
data: {
lastSyncAt: new Date(),
bookCount: rssData.books.length,
coverUrls: bookData.length > 0 ? JSON.stringify(bookData) : null,
},
});
stats.shelvesProcessed++;
} catch (error) {
stats.errors++;
@@ -132,238 +169,3 @@ export async function processGoodreadsShelves(
log.info(`Goodreads sync complete: ${stats.shelvesProcessed} shelves, ${stats.booksFound} books, ${stats.lookupsPerformed} lookups, ${stats.requestsCreated} requests created, ${stats.errors} errors`);
return stats;
}
async function processShelf(
shelf: { id: string; rssUrl: string; name: string; user: { id: string; plexUsername: string } },
stats: GoodreadsSyncStats,
log: ReturnType<typeof RMABLogger.forJob> | ReturnType<typeof RMABLogger.create>,
maxLookups: number
) {
log.info(`Fetching RSS for shelf "${shelf.name}" (user: ${shelf.user.plexUsername})`);
let rssData: { shelfName: string; books: GoodreadsRssBook[] };
try {
rssData = await fetchAndValidateRss(shelf.rssUrl);
} catch (error) {
log.error(`Failed to fetch RSS for shelf "${shelf.name}": ${error instanceof Error ? error.message : 'Unknown error'}`);
return;
}
const books = rssData.books;
stats.booksFound += books.length;
log.info(`Found ${books.length} books in shelf "${shelf.name}"`);
let lookupsThisCycle = 0;
const unlimitedLookups = maxLookups === 0;
for (const book of books) {
// Look up existing mapping
let mapping = await prisma.goodreadsBookMapping.findUnique({
where: { goodreadsBookId: book.bookId },
});
if (!mapping) {
// No mapping exists — perform Audible lookup if under cap
if (!unlimitedLookups && lookupsThisCycle >= maxLookups) {
continue; // Will be resolved in a future cycle
}
mapping = await performAudibleLookup(book, log);
lookupsThisCycle++;
stats.lookupsPerformed++;
// If lookup found an ASIN, fall through to create request immediately
if (!mapping?.audibleAsin) {
continue;
}
}
// Mapping exists with noMatch — check if we should retry
if (mapping.noMatch) {
if (mapping.lastSearchAt) {
const daysSinceSearch = (Date.now() - mapping.lastSearchAt.getTime()) / (1000 * 60 * 60 * 24);
if (daysSinceSearch >= NO_MATCH_RETRY_DAYS && (unlimitedLookups || lookupsThisCycle < maxLookups)) {
log.info(`Retrying Audible lookup for "${book.title}" (${NO_MATCH_RETRY_DAYS}+ days since last search)`);
mapping = await performAudibleLookup(book, log, mapping.id);
lookupsThisCycle++;
stats.lookupsPerformed++;
// If retry found an ASIN, fall through to create request
if (!mapping?.audibleAsin) {
continue;
}
} else {
continue; // Still no match, skip
}
} else {
continue;
}
}
// Mapping has ASIN — try to create request
if (mapping.audibleAsin) {
try {
const result = await createRequestForUser(shelf.user.id, {
asin: mapping.audibleAsin,
title: mapping.title,
author: mapping.author,
coverArtUrl: mapping.coverUrl || undefined,
});
if (result.success) {
stats.requestsCreated++;
log.info(`Created request for "${mapping.title}" by ${mapping.author} (ASIN: ${mapping.audibleAsin})`);
}
// If not success, it's already available/requested/duplicate — silently skip
} catch (error) {
log.error(`Failed to create request for "${mapping.title}": ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
}
// Collect enriched book data (coverUrl + ASIN) for display
const bookIds = books.map(b => b.bookId);
const mappings = bookIds.length > 0
? await prisma.goodreadsBookMapping.findMany({
where: { goodreadsBookId: { in: bookIds } },
select: { goodreadsBookId: true, audibleAsin: true, title: true, author: true, coverUrl: true },
})
: [];
const mappingsByBookId = new Map(mappings.map(m => [m.goodreadsBookId, m]));
// Look up AudibleCache records for high-quality cached cover URLs
const matchedAsins = mappings
.map(m => m.audibleAsin)
.filter((asin): asin is string => !!asin);
const cachedCovers = matchedAsins.length > 0
? await prisma.audibleCache.findMany({
where: { asin: { in: matchedAsins } },
select: { asin: true, coverArtUrl: true, cachedCoverPath: true },
})
: [];
const coverByAsin = new Map(
cachedCovers
.filter(c => c.cachedCoverPath || c.coverArtUrl)
.map(c => {
let coverUrl = c.coverArtUrl || '';
if (c.cachedCoverPath) {
const filename = c.cachedCoverPath.split('/').pop();
coverUrl = `/api/cache/thumbnails/${filename}`;
}
return [c.asin, coverUrl] as const;
})
);
const bookData = books
.map(b => {
const mapping = mappingsByBookId.get(b.bookId);
// Prefer cached cover (local proxy) > mapping cover > Goodreads RSS cover
const coverUrl = coverByAsin.get(mapping?.audibleAsin || '') || mapping?.coverUrl || b.coverUrl;
if (!coverUrl) return null;
return {
coverUrl,
asin: mapping?.audibleAsin || null,
title: mapping?.title || b.title,
author: mapping?.author || b.author,
};
})
.filter((b): b is NonNullable<typeof b> => b !== null)
.slice(0, 8);
// Update shelf metadata
await prisma.goodreadsShelf.update({
where: { id: shelf.id },
data: {
lastSyncAt: new Date(),
bookCount: books.length,
coverUrls: bookData.length > 0 ? JSON.stringify(bookData) : null,
},
});
}
async function performAudibleLookup(
book: GoodreadsRssBook,
log: ReturnType<typeof RMABLogger.forJob> | ReturnType<typeof RMABLogger.create>,
existingMappingId?: string
): Promise<any> {
const audibleService = getAudibleService();
try {
// Try full Goodreads title first, then fall back to stripped title
// (Goodreads titles often include series info like "(Demonica, #2)" that return 0 Audible results)
const fullQuery = `${book.title} ${book.author}`;
log.info(`Searching Audible for: "${fullQuery}"`);
let searchResult = await audibleService.search(fullQuery);
let firstResult = searchResult.results[0];
if (!firstResult?.asin) {
const cleanTitle = book.title.replace(/\s*\(.*\)\s*$/, '').trim();
if (cleanTitle !== book.title) {
const cleanQuery = `${cleanTitle} ${book.author}`;
log.info(`No results with full title, retrying without series info: "${cleanQuery}"`);
searchResult = await audibleService.search(cleanQuery);
firstResult = searchResult.results[0];
}
}
if (firstResult?.asin) {
log.info(`Audible match: "${book.title}" → ASIN ${firstResult.asin} ("${firstResult.title}" by ${firstResult.author})`);
// Use clean Audible/Audnexus metadata instead of Goodreads data
// (Goodreads titles contain series info like "(The Empyrean, #1)" that pollute indexer searches)
const data = {
title: firstResult.title,
author: firstResult.author,
audibleAsin: firstResult.asin,
coverUrl: firstResult.coverArtUrl || book.coverUrl || null,
noMatch: false,
lastSearchAt: new Date(),
};
if (existingMappingId) {
return prisma.goodreadsBookMapping.update({ where: { id: existingMappingId }, data });
}
return prisma.goodreadsBookMapping.create({
data: { goodreadsBookId: book.bookId, ...data },
});
}
// No match found
log.info(`No Audible match for "${book.title}" by ${book.author}`);
const noMatchData = {
title: book.title,
author: book.author,
coverUrl: book.coverUrl || null,
noMatch: true,
lastSearchAt: new Date(),
audibleAsin: null,
};
if (existingMappingId) {
return prisma.goodreadsBookMapping.update({ where: { id: existingMappingId }, data: noMatchData });
}
return prisma.goodreadsBookMapping.create({
data: { goodreadsBookId: book.bookId, ...noMatchData },
});
} catch (error) {
log.error(`Audible lookup failed for "${book.title}": ${error instanceof Error ? error.message : 'Unknown error'}`);
// Still create/update mapping so we don't retry every cycle
const errorData = {
title: book.title,
author: book.author,
coverUrl: book.coverUrl || null,
noMatch: true,
lastSearchAt: new Date(),
};
if (existingMappingId) {
return prisma.goodreadsBookMapping.update({ where: { id: existingMappingId }, data: errorData });
}
return prisma.goodreadsBookMapping.create({
data: { goodreadsBookId: book.bookId, ...errorData },
});
}
}