Files
ReadMeABook/src/lib/integrations/audible.service.ts
T
kikootwo fb0445d95f Centralize and standardize User-Agent string
Introduce a centralized RMAB_USER_AGENT constant (ReadMeABook/<version>) and update audible service calls to use it instead of hardcoded values. This avoids the default axios UA (which some indexers reject) and replaces the previous `rmab/` identifier. Adds unit tests to verify the User-Agent format and ensure it doesn't resemble generic bot signatures.
2026-05-18 09:45:57 -04:00

1039 lines
31 KiB
TypeScript

/**
* Component: Audible Integration Service
* Documentation: documentation/integrations/audible.md
*/
import axios, { AxiosInstance } from 'axios';
import { RMAB_USER_AGENT } from '../utils/user-agent';
import * as cheerio from 'cheerio';
import { RMABLogger } from '../utils/logger';
import { getConfigService } from '../services/config.service';
import { AudibleRegion, AUDIBLE_REGIONS, DEFAULT_AUDIBLE_REGION } from '../types/audible';
import {
getLanguageForRegion,
isAcceptedLanguage,
stripPrefixes,
buildContainsSelector,
type LanguageConfig,
} from '../constants/language-config';
import {
pickUserAgent,
getBrowserHeaders,
jitteredBackoff,
AdaptivePacer,
FetchResultMeta,
} from '../utils/scrape-resilience';
import { parseRuntime as parseRuntimeUtil } from '../utils/parse-runtime';
import { extractAllNarrators } from '../utils/extract-narrator';
const logger = RMABLogger.create('Audible');
const AUDIBLE_PAGE_SIZE = 50;
const CATALOG_RESPONSE_GROUPS =
'contributors,product_desc,product_attrs,product_extended_attrs,media,rating,series,category_ladders,product_details';
// Retry/backoff knobs for HTML scraping (nightly refresh job only).
// Healthy users still finish quickly — per-page success returns on attempt 0
// with a 2-4s inter-page delay. Struggling users grind through 503 storms
// patiently: up to ~12 retries per request, with each backoff capped at 3 min.
const HTML_MAX_RETRIES = 12;
const HTML_MAX_BACKOFF_MS = 180_000;
export interface AudibleAudiobook {
asin: string;
title: string;
author: string;
authorAsin?: string;
narrator?: string;
description?: string;
coverArtUrl?: string;
durationMinutes?: number;
releaseDate?: string;
rating?: number;
genres?: string[];
series?: string;
seriesPart?: string;
seriesAsin?: string;
language?: string;
formatType?: string;
publisherName?: string;
}
export interface AudibleSearchResult {
query: string;
results: AudibleAudiobook[];
totalResults: number;
page: number;
hasMore: boolean;
}
export interface AuthorBooksResult {
books: AudibleAudiobook[];
hasMore: boolean;
page: number;
totalResults: number;
}
interface CatalogProductAuthor {
asin?: string;
name: string;
}
interface CatalogProductNarrator {
name: string;
}
interface CatalogProductSeries {
asin?: string;
title?: string;
sequence?: string;
}
interface CatalogProductLadderNode {
name: string;
}
interface CatalogProductLadder {
ladder: CatalogProductLadderNode[];
}
interface CatalogProduct {
asin: string;
title?: string;
authors?: CatalogProductAuthor[];
narrators?: CatalogProductNarrator[];
publisher_summary?: string;
merchandising_summary?: string;
product_images?: Record<string, string>;
runtime_length_min?: number;
release_date?: string;
language?: string;
format_type?: string;
publisher_name?: string;
rating?: {
overall_distribution?: {
display_stars?: number;
};
};
category_ladders?: CatalogProductLadder[];
series?: CatalogProductSeries[];
}
interface CatalogProductsResponse {
products: CatalogProduct[];
total_results?: number;
}
interface CatalogProductResponse {
product: CatalogProduct;
}
interface CatalogCategoriesResponse {
categories?: Array<{ id: string; name: string }>;
}
function stripHtml(html: string): string {
return html
.replace(/<[^>]+>/g, '')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&nbsp;/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function mapCatalogProduct(product: CatalogProduct): AudibleAudiobook {
const author = product.authors?.map((a) => a.name).join(', ') ?? '';
const authorAsin = product.authors?.[0]?.asin ?? undefined;
const narrator =
product.narrators && product.narrators.length > 0
? product.narrators.map((n) => n.name).join(', ')
: undefined;
const rawDescription = product.publisher_summary ?? product.merchandising_summary;
const description = rawDescription ? stripHtml(rawDescription) : undefined;
const coverArtUrl = product.product_images?.['500'] ?? undefined;
const genreNames =
product.category_ladders?.flatMap((ladder) =>
ladder.ladder.map((node) => node.name),
) ?? [];
const genres =
genreNames.length > 0
? [...new Set(genreNames)].slice(0, 5)
: undefined;
let series: string | undefined;
let seriesPart: string | undefined;
let seriesAsin: string | undefined;
if (product.series && product.series.length > 0) {
const preferred =
product.series.find((s) => s.sequence && s.sequence.trim() !== '') ??
product.series[0];
series = preferred.title ?? undefined;
seriesAsin = preferred.asin ?? undefined;
if (preferred.sequence && preferred.sequence.trim() !== '') {
const digitMatch = preferred.sequence.match(/\d+(?:\.\d+)?/);
seriesPart = digitMatch ? digitMatch[0] : preferred.sequence;
}
}
return {
asin: product.asin,
title: product.title ?? '',
author,
authorAsin,
narrator,
description,
coverArtUrl,
durationMinutes: product.runtime_length_min ?? undefined,
releaseDate: product.release_date ?? undefined,
rating: product.rating?.overall_distribution?.display_stars ?? undefined,
genres,
series,
seriesPart,
seriesAsin,
language: product.language ?? undefined,
formatType: product.format_type ?? undefined,
publisherName: product.publisher_name ?? undefined,
};
}
export class AudibleService {
private htmlClient!: AxiosInstance;
private apiClient!: AxiosInstance;
private baseUrl: string = 'https://www.audible.com';
private region: AudibleRegion = 'us';
private initialized: boolean = false;
private sessionUserAgent: string = '';
private pacer: AdaptivePacer = new AdaptivePacer();
public getBaseUrl(): string {
return this.baseUrl;
}
public getRegion(): AudibleRegion {
return this.region;
}
public async fetch(url: string, config: any = {}): Promise<{ data: any; meta: FetchResultMeta }> {
await this.initialize();
return this.fetchWithRetry(url, config);
}
public forceReinitialize(): void {
logger.info('Force re-initializing AudibleService');
this.initialized = false;
}
private async initialize(): Promise<void> {
if (this.initialized) {
const configService = getConfigService();
const currentRegion = await configService.getAudibleRegion();
if (currentRegion !== this.region) {
logger.info(`Region changed from ${this.region} to ${currentRegion}, re-initializing`);
this.initialized = false;
} else {
return;
}
}
try {
const configService = getConfigService();
this.region = await configService.getAudibleRegion();
const regionConfig = AUDIBLE_REGIONS[this.region];
this.baseUrl = regionConfig.baseUrl;
this.sessionUserAgent = pickUserAgent();
this.pacer.reset();
logger.info(`Initializing Audible service with region: ${this.region} (${this.baseUrl})`);
const langConfig = getLanguageForRegion(this.region);
this.htmlClient = axios.create({
baseURL: regionConfig.baseUrl,
timeout: 15000,
headers: getBrowserHeaders(this.sessionUserAgent),
params: {
ipRedirectOverride: 'true',
language: langConfig.scraping.audibleLocaleParam,
},
});
this.apiClient = axios.create({
baseURL: regionConfig.apiBaseUrl,
timeout: 10000,
headers: {
Accept: 'application/json',
'User-Agent': RMAB_USER_AGENT,
},
});
this.initialized = true;
} catch (error) {
logger.error('Failed to initialize AudibleService', {
error: error instanceof Error ? error.message : String(error),
});
this.region = DEFAULT_AUDIBLE_REGION;
const fallbackConfig = AUDIBLE_REGIONS[this.region];
this.baseUrl = fallbackConfig.baseUrl;
this.sessionUserAgent = pickUserAgent();
this.pacer.reset();
const fallbackLangConfig = getLanguageForRegion(this.region);
this.htmlClient = axios.create({
baseURL: fallbackConfig.baseUrl,
timeout: 15000,
headers: getBrowserHeaders(this.sessionUserAgent),
params: {
ipRedirectOverride: 'true',
language: fallbackLangConfig.scraping.audibleLocaleParam,
},
});
this.apiClient = axios.create({
baseURL: fallbackConfig.apiBaseUrl,
timeout: 10000,
headers: {
Accept: 'application/json',
'User-Agent': RMAB_USER_AGENT,
},
});
this.initialized = true;
}
}
private async fetchWithRetry(
url: string,
config: any = {},
maxRetries: number = 5,
client: AxiosInstance = this.htmlClient,
maxBackoffMs: number = Number.POSITIVE_INFINITY,
): Promise<{ data: any; meta: FetchResultMeta }> {
let lastError: Error | null = null;
let retriesUsed = 0;
let encountered503 = false;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
const response = await client.get(url, config);
return { data: response, meta: { retriesUsed, encountered503 } };
} catch (error: any) {
lastError = error;
const status = error.response?.status;
const isRetryable = !status || status === 503 || status === 429 || status >= 500;
if (status === 503) encountered503 = true;
if (!isRetryable) {
throw error;
}
if (attempt === maxRetries) {
break;
}
retriesUsed++;
const backoffMs = jitteredBackoff(attempt, 1000, maxBackoffMs);
logger.info(
` Request failed (${status || 'network error'}), retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})...`,
);
await this.delay(backoffMs);
}
}
throw lastError || new Error('Request failed after retries');
}
private async externalFetchWithRetry(
url: string,
config: any = {},
maxRetries: number = 3,
): Promise<any> {
let lastError: Error | null = null;
for (let attempt = 0; attempt <= maxRetries; attempt++) {
try {
return await axios.get(url, config);
} catch (error: any) {
lastError = error;
const status = error.response?.status;
const isRetryable = !status || status === 503 || status === 429 || status >= 500;
if (!isRetryable) {
throw error;
}
if (status === 500) {
const message = error.response?.data?.message || '';
if (message.includes('Release date is in the future')) {
logger.info(` External API returned non-retryable error: ${message}`);
throw error;
}
}
if (attempt === maxRetries) {
break;
}
const backoffMs = Math.pow(2, attempt) * 1000;
logger.info(
` External API request failed (${status || 'network error'}), retrying in ${backoffMs}ms (attempt ${attempt + 1}/${maxRetries})...`,
);
await this.delay(backoffMs);
}
}
throw lastError || new Error('External API request failed after retries');
}
/**
* Popular audiobooks from Audible's curated /adblbestsellers HTML page.
* Uses HTML scraping (not the catalog API) because the API's BestSellers sort
* is a right-now velocity rank that surfaces launch-day shovelware and preorders;
* the HTML page reflects Audible's editorial curation.
*/
async getPopularAudiobooks(limit: number = 20): Promise<AudibleAudiobook[]> {
await this.initialize();
logger.info(` Fetching popular audiobooks (limit: ${limit})...`);
const audiobooks: AudibleAudiobook[] = [];
let page = 1;
const maxPages = Math.ceil(limit / AUDIBLE_PAGE_SIZE);
this.pacer.reset();
while (audiobooks.length < limit && page <= maxPages) {
try {
logger.info(` Fetching page ${page}/${maxPages}...`);
const { data: response, meta } = await this.fetchWithRetry(
'/adblbestsellers',
{
params: {
ipRedirectOverride: 'true',
pageSize: AUDIBLE_PAGE_SIZE,
...(page > 1 ? { page } : {}),
},
},
HTML_MAX_RETRIES,
this.htmlClient,
HTML_MAX_BACKOFF_MS,
);
const foundOnPage = this.parseProductListItems(
response.data,
audiobooks,
limit,
);
logger.info(` Found ${foundOnPage} audiobooks on page ${page}`);
if (foundOnPage < AUDIBLE_PAGE_SIZE / 2) {
logger.info(` Reached end of available pages`);
break;
}
page++;
if (page <= maxPages && audiobooks.length < limit) {
await this.delay(this.pacer.reportPageResult(meta));
}
} catch (error) {
logger.error(`Failed to fetch page ${page} of popular audiobooks`, {
error: error instanceof Error ? error.message : String(error),
collectedSoFar: audiobooks.length,
});
break;
}
}
logger.info(` Found ${audiobooks.length} popular audiobooks across ${page - 1} pages`);
return audiobooks;
}
/**
* New release audiobooks from Audible's curated /newreleases HTML page.
* Uses HTML scraping (not the catalog API) because the API's -ReleaseDate sort
* returns 100% future preorders with no released-only filter available.
*/
async getNewReleases(limit: number = 20): Promise<AudibleAudiobook[]> {
await this.initialize();
logger.info(` Fetching new releases (limit: ${limit})...`);
const audiobooks: AudibleAudiobook[] = [];
let page = 1;
const maxPages = Math.ceil(limit / AUDIBLE_PAGE_SIZE);
this.pacer.reset();
while (audiobooks.length < limit && page <= maxPages) {
try {
logger.info(` Fetching page ${page}/${maxPages}...`);
const { data: response, meta } = await this.fetchWithRetry(
'/newreleases',
{
params: {
ipRedirectOverride: 'true',
pageSize: AUDIBLE_PAGE_SIZE,
...(page > 1 ? { page } : {}),
},
},
HTML_MAX_RETRIES,
this.htmlClient,
HTML_MAX_BACKOFF_MS,
);
const foundOnPage = this.parseProductListItems(
response.data,
audiobooks,
limit,
);
logger.info(` Found ${foundOnPage} audiobooks on page ${page}`);
if (foundOnPage < AUDIBLE_PAGE_SIZE / 2) {
logger.info(` Reached end of available pages`);
break;
}
page++;
if (page <= maxPages && audiobooks.length < limit) {
await this.delay(this.pacer.reportPageResult(meta));
}
} catch (error) {
logger.error(`Failed to fetch page ${page} of new releases`, {
error: error instanceof Error ? error.message : String(error),
collectedSoFar: audiobooks.length,
});
break;
}
}
logger.info(` Found ${audiobooks.length} new releases across ${page - 1} pages`);
return audiobooks;
}
async search(query: string, page: number = 1): Promise<AudibleSearchResult> {
await this.initialize();
try {
logger.info(` Searching for "${query}"...`);
const { data: response } = await this.fetchWithRetry(
'/1.0/catalog/products',
{
params: {
keywords: query,
num_results: AUDIBLE_PAGE_SIZE,
page: page - 1,
response_groups: CATALOG_RESPONSE_GROUPS,
},
},
5,
this.apiClient,
);
const envelope: CatalogProductsResponse = response.data;
const products = envelope.products ?? [];
const totalResults = envelope.total_results ?? 0;
const results = products.map(mapCatalogProduct);
logger.info(` Found ${results.length} results for "${query}"`);
return {
query,
results,
totalResults,
page,
hasMore:
results.length > 0 &&
(totalResults > 0
? totalResults > page * AUDIBLE_PAGE_SIZE
: results.length >= AUDIBLE_PAGE_SIZE),
};
} catch (error) {
logger.error('Search failed', {
error: error instanceof Error ? error.message : String(error),
});
return { query, results: [], totalResults: 0, page, hasMore: false };
}
}
/**
* The catalog API `author=` param takes an author name (not ASIN), so we filter
* client-side by checking that at least one author entry matches the target ASIN.
*/
async searchByAuthorAsin(
authorName: string,
authorAsin: string,
page: number = 1,
): Promise<AuthorBooksResult> {
await this.initialize();
const langConfig = getLanguageForRegion(this.region);
const books: AudibleAudiobook[] = [];
try {
logger.info(`Searching books by author "${authorName}" (ASIN: ${authorAsin}), page ${page}...`);
const { data: response } = await this.fetchWithRetry(
'/1.0/catalog/products',
{
params: {
author: authorName,
num_results: AUDIBLE_PAGE_SIZE,
page: page - 1,
response_groups: CATALOG_RESPONSE_GROUPS,
},
},
5,
this.apiClient,
);
const envelope: CatalogProductsResponse = response.data;
const products = envelope.products ?? [];
const totalResults = envelope.total_results ?? 0;
for (const product of products) {
const authorMatch = product.authors?.some((a) => a.asin === authorAsin) ?? false;
if (!authorMatch) continue;
const langMatch = product.language
? isAcceptedLanguage(product.language, langConfig)
: false;
if (!langMatch) continue;
books.push(mapCatalogProduct(product));
}
const hasMore =
books.length > 0 &&
(totalResults > 0
? totalResults > page * AUDIBLE_PAGE_SIZE
: products.length >= AUDIBLE_PAGE_SIZE);
logger.info(
`Author books page ${page}: ${books.length} valid results (${totalResults} Audible total)`,
);
return { books, hasMore, page, totalResults };
} catch (error) {
logger.error(`Author books search failed for "${authorName}"`, {
error: error instanceof Error ? error.message : String(error),
});
return { books, hasMore: false, page, totalResults: 0 };
}
}
async getAudiobookDetails(asin: string): Promise<AudibleAudiobook | null> {
await this.initialize();
try {
logger.info(` Fetching details for ASIN ${asin}...`);
const audnexusData = await this.fetchFromAudnexus(asin);
if (audnexusData) {
logger.info(` Successfully fetched from Audnexus for "${audnexusData.title}"`);
return audnexusData;
}
logger.info(` Audnexus failed, falling back to Audible catalog API...`);
return await this.fetchAudibleDetailsFromApi(asin);
} catch (error) {
logger.error(`Failed to fetch details for ${asin}`, {
error: error instanceof Error ? error.message : String(error),
});
return null;
}
}
private async fetchFromAudnexus(asin: string): Promise<AudibleAudiobook | null> {
try {
const audnexusRegion = AUDIBLE_REGIONS[this.region].audnexusParam;
logger.debug(`Fetching ASIN from Audnexus: ${asin} (region: ${audnexusRegion})`);
const response = await this.externalFetchWithRetry(
`https://api.audnex.us/books/${asin}`,
{
params: { region: audnexusRegion },
timeout: 10000,
headers: { 'User-Agent': RMAB_USER_AGENT },
},
);
const data = response.data;
const result: AudibleAudiobook = {
asin,
title: data.title || '',
author: data.authors?.map((a: any) => a.name).join(', ') || '',
authorAsin: data.authors?.[0]?.asin || undefined,
narrator: data.narrators?.map((n: any) => n.name).join(', ') || '',
description: data.description || data.summary || '',
coverArtUrl: data.image || '',
durationMinutes: data.runtimeLengthMin ? parseInt(data.runtimeLengthMin) : undefined,
releaseDate: data.releaseDate || undefined,
rating: data.rating ? parseFloat(data.rating) : undefined,
genres: data.genres?.map((g: any) => (typeof g === 'string' ? g : g.name)).slice(0, 5) || undefined,
series: data.seriesPrimary?.name || undefined,
seriesPart: data.seriesPrimary?.position || undefined,
seriesAsin: data.seriesPrimary?.asin || undefined,
language: data.language || undefined,
formatType: data.formatType || undefined,
publisherName: data.publisherName || undefined,
};
if (result.coverArtUrl && !result.coverArtUrl.includes('_SL500_')) {
result.coverArtUrl = result.coverArtUrl.replace(/\._.*_\./, '._SL500_.');
}
logger.debug('Audnexus success', {
title: result.title,
author: result.author,
narrator: result.narrator,
descLength: result.description?.length || 0,
duration: result.durationMinutes,
rating: result.rating,
genreCount: result.genres?.length || 0,
series: result.series,
seriesPart: result.seriesPart,
seriesAsin: result.seriesAsin,
});
return result;
} catch (error: any) {
if (error.response?.status === 404) {
logger.debug(`Book not found (404) on Audnexus for ASIN ${asin}`);
} else {
logger.warn(`Error fetching from Audnexus for ASIN ${asin}`, { error: error.message });
}
return null;
}
}
private async fetchAudibleDetailsFromApi(asin: string): Promise<AudibleAudiobook | null> {
try {
const { data: response } = await this.fetchWithRetry(
`/1.0/catalog/products/${asin}`,
{ params: { response_groups: CATALOG_RESPONSE_GROUPS } },
5,
this.apiClient,
);
const envelope: CatalogProductResponse = response.data;
const product = envelope.product;
// The API returns HTTP 200 with a stub object for invalid ASINs;
// a missing title is the reliable signal that the ASIN is unrecognised.
if (!product?.title) {
logger.debug(`Catalog API returned stub for ASIN ${asin} (no title)`);
return null;
}
return mapCatalogProduct(product);
} catch (error) {
logger.error(`Catalog API details fetch failed for ${asin}`, {
error: error instanceof Error ? error.message : String(error),
});
return null;
}
}
async getRuntime(asin: string): Promise<number | null> {
try {
const audnexusRegion = AUDIBLE_REGIONS[this.region].audnexusParam;
const response = await this.externalFetchWithRetry(
`https://api.audnex.us/books/${asin}`,
{
params: { region: audnexusRegion },
timeout: 5000,
headers: { 'User-Agent': RMAB_USER_AGENT },
},
);
const runtimeMin = response.data?.runtimeLengthMin;
if (runtimeMin) {
return parseInt(runtimeMin);
}
return null;
} catch (error: any) {
if (error.response?.status !== 404) {
logger.debug(`Runtime fetch failed for ASIN ${asin}: ${error.message}`);
}
return null;
}
}
async getCategories(): Promise<{ id: string; name: string }[]> {
await this.initialize();
logger.info('Fetching Audible categories...');
try {
const { data: response } = await this.fetchWithRetry(
'/1.0/catalog/categories',
{},
5,
this.apiClient,
);
const envelope: CatalogCategoriesResponse = response.data;
const categories = (envelope.categories ?? []).map((c) => ({
id: c.id,
name: c.name,
}));
logger.info(`Found ${categories.length} top-level categories`);
return categories;
} catch (error) {
logger.error('Failed to fetch categories', {
error: error instanceof Error ? error.message : String(error),
});
return [];
}
}
/**
* Category audiobooks from Audible's HTML /search?node=<categoryId> page,
* sorted by popularity-rank. Uses HTML scraping (not the catalog API) so
* results match Audible's curated category-storefront ordering.
*/
async getCategoryBooks(categoryId: string, limit: number = 200): Promise<AudibleAudiobook[]> {
await this.initialize();
logger.info(`Fetching category books for node ${categoryId} (limit: ${limit})...`);
const audiobooks: AudibleAudiobook[] = [];
let page = 1;
const maxPages = Math.ceil(limit / AUDIBLE_PAGE_SIZE);
this.pacer.reset();
while (audiobooks.length < limit && page <= maxPages) {
try {
const { data: response, meta } = await this.fetchWithRetry(
'/search',
{
params: {
ipRedirectOverride: 'true',
node: categoryId,
pageSize: AUDIBLE_PAGE_SIZE,
sort: 'popularity-rank',
...(page > 1 ? { page } : {}),
},
},
HTML_MAX_RETRIES,
this.htmlClient,
HTML_MAX_BACKOFF_MS,
);
const foundOnPage = this.parseSearchResultItems(
response.data,
audiobooks,
limit,
);
logger.info(`Category ${categoryId}: found ${foundOnPage} books on page ${page}`);
if (foundOnPage < AUDIBLE_PAGE_SIZE / 2) break;
page++;
if (page <= maxPages && audiobooks.length < limit) {
await this.delay(this.pacer.reportPageResult(meta));
}
} catch (error) {
logger.error(`Failed to fetch category ${categoryId} page ${page}`, {
error: error instanceof Error ? error.message : String(error),
collectedSoFar: audiobooks.length,
});
break;
}
}
logger.info(
`Category ${categoryId}: collected ${audiobooks.length} books across ${page - 1} pages`,
);
return audiobooks;
}
private getLangConfig(): LanguageConfig {
return getLanguageForRegion(this.region);
}
private parseRuntime(runtimeText: string): number | undefined {
return parseRuntimeUtil(runtimeText, this.getLangConfig());
}
/**
* Parse the `.productListItem` blocks used by /adblbestsellers and /newreleases.
* Pushes matched books into `audiobooks` (skipping duplicates and respecting `limit`)
* and returns the count parsed from this page.
*/
private parseProductListItems(
html: string,
audiobooks: AudibleAudiobook[],
limit: number,
): number {
const $ = cheerio.load(html);
const langConfig = this.getLangConfig();
let foundOnPage = 0;
$('.productListItem').each((_index, element) => {
if (audiobooks.length >= limit) return false;
const $el = $(element);
const asin =
$el.find('li').attr('data-asin') ||
$el.find('a').attr('href')?.match(/\/(?:pd|ac)\/[^\/]+\/([A-Z0-9]{10})/)?.[1] ||
'';
if (!asin) return;
if (audiobooks.some((book) => book.asin === asin)) return;
const title =
$el.find('h3 a').text().trim() ||
$el.find('.bc-heading a').text().trim();
const authorText =
$el.find('.authorLabel').text().trim() ||
$el.find('.bc-size-small .bc-text-bold').first().text().trim();
const authorHref = $el.find('a[href*="/author/"]').first().attr('href') || '';
const authorAsinMatch = authorHref.match(/\/author\/[^\/]+\/([A-Z0-9]{10})/);
// Narrator — capture all narrator links (multi-narrator productions are common);
// fall back to .narratorLabel text, then to the bc-text-bold sibling for layouts
// that omit both anchor links and the .narratorLabel span.
const narratorText =
extractAllNarrators($, $el) ||
$el.find('.bc-size-small .bc-text-bold').eq(1).text().trim();
const coverArtUrl = $el.find('img').attr('src') || '';
const ratingText = $el.find('.ratingsLabel').text().trim();
const rating = ratingText ? parseFloat(ratingText.split(' ')[0]) : undefined;
audiobooks.push({
asin,
title,
author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
authorAsin: authorAsinMatch?.[1] || undefined,
narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
rating,
});
foundOnPage++;
});
return foundOnPage;
}
/**
* Parse the `.s-result-item` / `.productListItem` blocks used by
* /search?node=<categoryId>. Pushes matched books into `audiobooks`
* (skipping duplicates and respecting `limit`) and returns the count parsed
* from this page.
*/
private parseSearchResultItems(
html: string,
audiobooks: AudibleAudiobook[],
limit: number,
): number {
const $ = cheerio.load(html);
const langConfig = this.getLangConfig();
let foundOnPage = 0;
$('.s-result-item, .productListItem').each((_index, element) => {
if (audiobooks.length >= limit) return false;
const $el = $(element);
const asin =
$el.find('li').attr('data-asin') ||
$el.find('a').attr('href')?.match(/\/(?:pd|ac)\/[^\/]+\/([A-Z0-9]{10})/)?.[1] ||
'';
if (!asin) return;
if (audiobooks.some((b) => b.asin === asin)) return;
const title =
$el.find('h2').first().text().trim() ||
$el.find('h3 a').text().trim() ||
$el.find('.bc-heading a').text().trim();
const authorLink = $el.find('a[href*="/author/"]').first();
const authorText =
authorLink.text().trim() ||
$el.find('.authorLabel').text().trim();
const authorHref = authorLink.attr('href') || '';
const authorAsinMatch = authorHref.match(/\/author\/[^\/]+\/([A-Z0-9]{10})/);
// Narrator — capture all narrator links (multi-narrator productions are common)
const narratorText = extractAllNarrators($, $el);
const coverArtUrl = $el.find('img').attr('src') || '';
const runtimeText =
$el.find('.runtimeLabel').text().trim() ||
$el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
const durationMinutes = this.parseRuntime(runtimeText);
const ratingText =
$el.find('.ratingsLabel').text().trim() ||
$el.find('.a-icon-star span').first().text().trim();
const rating = ratingText ? parseFloat(ratingText.split(' ')[0]) : undefined;
audiobooks.push({
asin,
title,
author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
authorAsin: authorAsinMatch?.[1] || undefined,
narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
durationMinutes,
rating,
});
foundOnPage++;
});
return foundOnPage;
}
private async delay(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms));
}
}
let audibleService: AudibleService | null = null;
export function getAudibleService(): AudibleService {
if (!audibleService) {
audibleService = new AudibleService();
}
return audibleService;
}