Add custom search terms & retry download (admin)

Add support for per-request custom search terms and an admin retry-download flow.

- DB/schema: add custom_search_terms column via Prisma migration and schema update.
- Admin UI: new AdjustSearchTermsModal component and UI badges to show custom search status; RequestActionsDropdown and RecentRequestsTable updated to surface adjust/retry actions.
- API: new PATCH /api/admin/requests/[id]/search-terms to set/clear custom terms (optionally trigger a new search) and new POST /api/admin/requests/[id]/retry-download to resume monitoring or re-add downloads using DownloadHistory metadata.
- Behavior: interactive search now prefers customSearchTerms when present; manual import exposes cleanupSource option to organize job; admin requests listing returns downloadAttempts and customSearchTerms.
- UX: add SectionToolbar, LoadMoreBar and HideAvailableToggle components and wire hide-available preference across home, search, author and series pages; authors/series endpoints/page handlers gain pagination metadata.
- Misc: add connection-errors util and update related processors/services and tests to cover the new flows.

These changes enable admins to override search terms per request, trigger searches from the admin UI, and retry failed downloads more robustly.
This commit is contained in:
kikootwo
2026-03-02 17:05:21 -05:00
parent 3ee67c8763
commit d25a6ebf79
39 changed files with 2034 additions and 311 deletions
+11 -4
View File
@@ -288,17 +288,17 @@ function parseSeriesPageSummary(
* Scrape a series page for full detail data including books and similar series.
* Used by the detail API endpoint.
*/
export async function scrapeSeriesPage(asin: string): Promise<SeriesDetail | null> {
export async function scrapeSeriesPage(asin: string, page: number = 1): Promise<(SeriesDetail & { hasMore: boolean; page: number }) | null> {
const service = getAudibleService();
const region = service.getRegion();
const baseUrl = service.getBaseUrl();
const langConfig = getLanguageForRegion(region);
logger.info(`Scraping series detail page: ${asin}`);
logger.info(`Scraping series detail page: ${asin}, page ${page}`);
try {
const { data: response } = await service.fetch(`/series/${asin}`, {
params: { ipRedirectOverride: 'true', pageSize: AUDIBLE_PAGE_SIZE },
params: { ipRedirectOverride: 'true', pageSize: AUDIBLE_PAGE_SIZE, page },
});
const $ = cheerio.load(response.data);
@@ -316,10 +316,15 @@ export async function scrapeSeriesPage(asin: string): Promise<SeriesDetail | nul
// Use actual book count if we got more from scraping
const bookCount = Math.max(summary.bookCount, books.length);
// Calculate hasMore: use header bookCount if available, otherwise check if full page
const hasMore = bookCount > 0
? page * AUDIBLE_PAGE_SIZE < bookCount
: books.length >= AUDIBLE_PAGE_SIZE;
// Parse similar series ("Listeners also enjoyed" or similar section)
const similarSeries = parseSimilarSeries($);
logger.info(`Series detail complete: "${summary.title}" (${books.length} books, ${similarSeries.length} similar)`);
logger.info(`Series detail complete: "${summary.title}" (${books.length} books, page ${page}, hasMore: ${hasMore})`);
return {
asin,
@@ -332,6 +337,8 @@ export async function scrapeSeriesPage(asin: string): Promise<SeriesDetail | nul
books,
similarSeries,
audibleUrl: `${baseUrl}/series/${asin}`,
hasMore,
page,
};
} catch (error) {
logger.error(`Failed to scrape series detail ${asin}`, {
+93 -96
View File
@@ -59,6 +59,13 @@ export interface AudibleSearchResult {
hasMore: boolean;
}
export interface AuthorBooksResult {
books: AudibleAudiobook[];
hasMore: boolean;
page: number;
totalResults: number;
}
export class AudibleService {
private client!: AxiosInstance;
private baseUrl: string = 'https://www.audible.com';
@@ -564,7 +571,9 @@ export class AudibleService {
results: audiobooks,
totalResults,
page,
hasMore: audiobooks.length > 0 && totalResults > page * AUDIBLE_PAGE_SIZE,
hasMore: audiobooks.length > 0 && (totalResults > 0
? totalResults > page * AUDIBLE_PAGE_SIZE
: audiobooks.length >= AUDIBLE_PAGE_SIZE),
};
} catch (error) {
logger.error('Search failed', { error: error instanceof Error ? error.message : String(error) });
@@ -583,123 +592,111 @@ export class AudibleService {
* Uses Audible's searchAuthor parameter and paginates through all results.
* Filters: (1) author link must contain the target ASIN, (2) language must be English.
*/
async searchByAuthorAsin(authorName: string, authorAsin: string): Promise<AudibleAudiobook[]> {
async searchByAuthorAsin(authorName: string, authorAsin: string, page: number = 1): Promise<AuthorBooksResult> {
await this.initialize();
const MAX_PAGES = 10;
const allBooks: AudibleAudiobook[] = [];
const books: AudibleAudiobook[] = [];
const seenAsins = new Set<string>();
try {
logger.info(`Searching books by author "${authorName}" (ASIN: ${authorAsin})...`);
logger.info(`Searching books by author "${authorName}" (ASIN: ${authorAsin}), page ${page}...`);
for (let page = 1; page <= MAX_PAGES; page++) {
const { data: response, meta } = await this.fetchWithRetry('/search', {
params: {
ipRedirectOverride: 'true',
searchAuthor: authorName,
pageSize: AUDIBLE_PAGE_SIZE,
page,
},
const { data: response } = await this.fetchWithRetry('/search', {
params: {
ipRedirectOverride: 'true',
searchAuthor: authorName,
pageSize: AUDIBLE_PAGE_SIZE,
page,
},
});
const $ = cheerio.load(response.data);
// Count raw items on page before filtering (for hasMore fallback)
const pageItemCount = $('.s-result-item, .productListItem').length;
$('.s-result-item, .productListItem').each((_index, element) => {
const $el = $(element);
// --- Language filter: require matching language for region ---
const langConfig = this.getLangConfig();
const langText = $el.find(buildContainsSelector('span', langConfig.scraping.languageLabels)).text().trim() ||
$el.find('.languageLabel').text().trim();
const langLabelPattern = new RegExp(`(?:${langConfig.scraping.languageLabels.map(l => l.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|')})\\s*(.+)`, 'i');
const langMatch = langText.match(langLabelPattern);
const language = langMatch?.[1]?.trim();
if (!language || !isAcceptedLanguage(language, langConfig)) return;
// --- Author ASIN filter: verify target ASIN in author links ---
const authorLinks = $el.find('a[href*="/author/"]');
let hasMatchingAuthor = false;
authorLinks.each((_i, link) => {
const href = $(link).attr('href') || '';
const asinMatch = href.match(/\/author\/[^\/]+\/([A-Z0-9]{10})/);
if (asinMatch && asinMatch[1] === authorAsin) {
hasMatchingAuthor = true;
return false; // break .each()
}
});
if (!hasMatchingAuthor) return;
const $ = cheerio.load(response.data);
let pageResults = 0;
// --- Extract book ASIN ---
const bookAsin = $el.find('li').attr('data-asin') ||
$el.find('a[href*="/pd/"]').attr('href')?.match(/\/pd\/[^\/]+\/([A-Z0-9]{10})/)?.[1] ||
$el.find('a[href*="/ac/"]').attr('href')?.match(/\/ac\/[^\/]+\/([A-Z0-9]{10})/)?.[1] ||
$el.find('a').attr('href')?.match(/\/(?:pd|ac)\/[^\/]+\/([A-Z0-9]{10})/)?.[1] || '';
if (!bookAsin || seenAsins.has(bookAsin)) return;
seenAsins.add(bookAsin);
$('.s-result-item, .productListItem').each((_index, element) => {
const $el = $(element);
// --- Parse book details ---
const title = $el.find('h2').first().text().trim() ||
$el.find('h3 a').text().trim() ||
$el.find('.bc-heading a').text().trim();
// --- Language filter: require matching language for region ---
const langConfig = this.getLangConfig();
const langText = $el.find(buildContainsSelector('span', langConfig.scraping.languageLabels)).text().trim() ||
$el.find('.languageLabel').text().trim();
// Extract language value (e.g. "Language: English" -> "English", "Sprache: Deutsch" -> "Deutsch")
const langLabelPattern = new RegExp(`(?:${langConfig.scraping.languageLabels.map(l => l.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|')})\\s*(.+)`, 'i');
const langMatch = langText.match(langLabelPattern);
const language = langMatch?.[1]?.trim();
if (!language || !isAcceptedLanguage(language, langConfig)) return;
const authorText = $el.find('a[href*="/author/"]').first().text().trim() ||
$el.find('.authorLabel').text().trim() ||
$el.find('.bc-size-small .bc-text-bold').first().text().trim();
// --- Author ASIN filter: verify target ASIN in author links ---
const authorLinks = $el.find('a[href*="/author/"]');
let hasMatchingAuthor = false;
authorLinks.each((_i, link) => {
const href = $(link).attr('href') || '';
const asinMatch = href.match(/\/author\/[^\/]+\/([A-Z0-9]{10})/);
if (asinMatch && asinMatch[1] === authorAsin) {
hasMatchingAuthor = true;
return false; // break .each()
}
});
if (!hasMatchingAuthor) return;
const narratorText = $el.find('a[href*="searchNarrator="]').first().text().trim() ||
$el.find('.narratorLabel').text().trim();
// --- Extract book ASIN ---
const bookAsin = $el.find('li').attr('data-asin') ||
$el.find('a[href*="/pd/"]').attr('href')?.match(/\/pd\/[^\/]+\/([A-Z0-9]{10})/)?.[1] ||
$el.find('a[href*="/ac/"]').attr('href')?.match(/\/ac\/[^\/]+\/([A-Z0-9]{10})/)?.[1] ||
$el.find('a').attr('href')?.match(/\/(?:pd|ac)\/[^\/]+\/([A-Z0-9]{10})/)?.[1] || '';
if (!bookAsin || seenAsins.has(bookAsin)) return;
seenAsins.add(bookAsin);
const coverArtUrl = $el.find('img').attr('src') || '';
// --- Parse book details ---
const title = $el.find('h2').first().text().trim() ||
$el.find('h3 a').text().trim() ||
$el.find('.bc-heading a').text().trim();
const runtimeText = $el.find('.runtimeLabel').text().trim() ||
$el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
const durationMinutes = this.parseRuntime(runtimeText);
const authorText = $el.find('a[href*="/author/"]').first().text().trim() ||
$el.find('.authorLabel').text().trim() ||
$el.find('.bc-size-small .bc-text-bold').first().text().trim();
const ratingText = $el.find('.ratingsLabel').text().trim() ||
$el.find('.a-icon-star span').first().text().trim();
const rating = ratingText ? parseFloat(ratingText.split(' ')[0]) : undefined;
const narratorText = $el.find('a[href*="searchNarrator="]').first().text().trim() ||
$el.find('.narratorLabel').text().trim();
const coverArtUrl = $el.find('img').attr('src') || '';
const runtimeText = $el.find('.runtimeLabel').text().trim() ||
$el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
const durationMinutes = this.parseRuntime(runtimeText);
const ratingText = $el.find('.ratingsLabel').text().trim() ||
$el.find('.a-icon-star span').first().text().trim();
const rating = ratingText ? parseFloat(ratingText.split(' ')[0]) : undefined;
allBooks.push({
asin: bookAsin,
title,
author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
authorAsin,
narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
durationMinutes,
rating,
});
pageResults++;
books.push({
asin: bookAsin,
title,
author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
authorAsin,
narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
durationMinutes,
rating,
});
});
// Check if there are more pages
const resultsText = $('.resultsInfo').text().trim();
const totalResults = parseInt(resultsText.match(/of ([\d,]+)/)?.[1]?.replace(/,/g, '') || '0');
const hasMore = totalResults > page * AUDIBLE_PAGE_SIZE;
// Check total results for pagination
const resultsText = $('.resultsInfo').text().trim();
const totalResults = parseInt(resultsText.match(/of ([\d,]+)/)?.[1]?.replace(/,/g, '') || '0');
// Use totalResults if available; otherwise fall back to whether Audible returned a full page
const hasMore = books.length > 0 && (totalResults > 0
? totalResults > page * AUDIBLE_PAGE_SIZE
: pageItemCount >= AUDIBLE_PAGE_SIZE);
logger.info(`Author books page ${page}: ${pageResults} valid results (${allBooks.length} total, ${totalResults} Audible total)`);
if (!hasMore || pageResults === 0) break;
// Pace between pages
if (page < MAX_PAGES) {
await this.delay(this.pacer.reportPageResult(meta));
}
}
logger.info(`Author books search complete: "${authorName}" → ${allBooks.length} books`);
return allBooks;
logger.info(`Author books page ${page}: ${books.length} valid results (${totalResults} Audible total)`);
return { books, hasMore, page, totalResults };
} catch (error) {
logger.error(`Author books search failed for "${authorName}"`, {
error: error instanceof Error ? error.message : String(error),
collectedSoFar: allBooks.length,
});
// Return what we collected before the error
return allBooks;
return { books, hasMore: false, page, totalResults: 0 };
}
}