mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-03 04:40:09 +00:00
Add series browsing, search, and detail UI
Introduce full support for Audible series exploration: API routes, frontend pages, components, hooks, and integrations. Key changes: - Prisma: add Audiobook.seriesAsin for linking audiobooks to series detail pages. - Backend: add /api/series/search and /api/series/[asin] routes that require auth; scrape Audible series data and enrich books with library availability. - Integrations/services: add audible-series integration and update request/HTTP services to support the workflow. - Frontend: add /series and /series/[asin] pages, new components (SeriesCard, SeriesGrid, SeriesDetailCard, SimilarSeriesRow) and wire them to a new useSeries hook; update AudiobookDetailsModal to show/link series; add Series link to Header. - Misc: extend audiobook types with series fields and add seriesLabels to language-config for scraping. These changes enable users to search for series, view series metadata and books, and navigate between audiobook and series detail pages.
This commit is contained in:
@@ -31,6 +31,8 @@ export interface ScrapingConfig {
|
||||
languageLabels: string[];
|
||||
/** Release date field labels */
|
||||
releaseDateLabels: string[];
|
||||
/** Series label prefixes used to find series links in search results */
|
||||
seriesLabels: string[];
|
||||
/** Accepted language values for filtering (lowercase) */
|
||||
acceptedLanguageValues: string[];
|
||||
/** Regex patterns that match hour portions in runtime strings */
|
||||
@@ -80,6 +82,7 @@ const ENGLISH_CONFIG: LanguageConfig = {
|
||||
lengthLabels: ['Length:'],
|
||||
languageLabels: ['Language:'],
|
||||
releaseDateLabels: ['Release date:'],
|
||||
seriesLabels: ['Series:'],
|
||||
acceptedLanguageValues: ['english'],
|
||||
runtimeHourPatterns: [/(\d+)\s*hrs?/i, /(\d+)\s*hours?/i],
|
||||
runtimeMinutePatterns: [/(\d+)\s*mins?/i, /(\d+)\s*minutes?/i],
|
||||
@@ -112,6 +115,7 @@ const GERMAN_CONFIG: LanguageConfig = {
|
||||
lengthLabels: ['Spieldauer:', 'Dauer:', 'L\u00e4nge:'],
|
||||
languageLabels: ['Sprache:'],
|
||||
releaseDateLabels: ['Erscheinungsdatum:'],
|
||||
seriesLabels: ['Serie:', 'Reihe:'],
|
||||
acceptedLanguageValues: ['deutsch', 'german'],
|
||||
runtimeHourPatterns: [/(\d+)\s*Std\.?/i, /(\d+)\s*Stunden?/i],
|
||||
runtimeMinutePatterns: [/(\d+)\s*Min\.?/i, /(\d+)\s*Minuten?/i],
|
||||
@@ -145,6 +149,7 @@ const SPANISH_CONFIG: LanguageConfig = {
|
||||
lengthLabels: ['Duraci\u00f3n:'],
|
||||
languageLabels: ['Idioma:'],
|
||||
releaseDateLabels: ['Fecha de lanzamiento:'],
|
||||
seriesLabels: ['Serie:'],
|
||||
acceptedLanguageValues: ['espa\u00f1ol', 'spanish'],
|
||||
runtimeHourPatterns: [/(\d+)\s*h\b/i, /(\d+)\s*horas?/i],
|
||||
runtimeMinutePatterns: [/(\d+)\s*min/i, /(\d+)\s*minutos?/i],
|
||||
|
||||
@@ -20,6 +20,9 @@ export interface Audiobook {
|
||||
releaseDate?: string;
|
||||
rating?: number;
|
||||
genres?: string[];
|
||||
series?: string; // Series name (e.g., "A Song of Ice and Fire")
|
||||
seriesPart?: string; // Position in series (e.g., "1", "1.5")
|
||||
seriesAsin?: string; // Audible ASIN for the series (links to /series/{asin})
|
||||
isAvailable?: boolean; // Set by real-time matching against plex_library
|
||||
plexGuid?: string | null;
|
||||
dbId?: string | null;
|
||||
|
||||
@@ -0,0 +1,75 @@
|
||||
/**
|
||||
* Component: Series Fetching Hooks
|
||||
* Documentation: documentation/frontend/components.md
|
||||
*/
|
||||
|
||||
'use client';
|
||||
|
||||
import useSWR from 'swr';
|
||||
import { authenticatedFetcher } from '@/lib/utils/api';
|
||||
import { Audiobook } from './useAudiobooks';
|
||||
|
||||
export interface SeriesSummary {
|
||||
asin: string;
|
||||
title: string;
|
||||
bookCount: number;
|
||||
rating?: number;
|
||||
ratingCount?: number;
|
||||
tags: string[];
|
||||
coverArtUrl?: string;
|
||||
audibleUrl: string;
|
||||
}
|
||||
|
||||
export interface SimilarSeries {
|
||||
asin: string;
|
||||
title: string;
|
||||
bookCount?: number;
|
||||
coverArtUrl?: string;
|
||||
}
|
||||
|
||||
export interface SeriesDetail {
|
||||
asin: string;
|
||||
title: string;
|
||||
bookCount: number;
|
||||
rating?: number;
|
||||
ratingCount?: number;
|
||||
description?: string;
|
||||
tags: string[];
|
||||
books: Audiobook[];
|
||||
similarSeries: SimilarSeries[];
|
||||
audibleUrl: string;
|
||||
}
|
||||
|
||||
export function useSeriesSearch(query: string) {
|
||||
const shouldFetch = query && query.length > 0;
|
||||
const endpoint = shouldFetch
|
||||
? `/api/series/search?q=${encodeURIComponent(query)}`
|
||||
: null;
|
||||
|
||||
const { data, error, isLoading } = useSWR(endpoint, authenticatedFetcher, {
|
||||
revalidateOnFocus: false,
|
||||
dedupingInterval: 30000,
|
||||
});
|
||||
|
||||
return {
|
||||
series: (data?.series || []) as SeriesSummary[],
|
||||
query: data?.query || '',
|
||||
isLoading: shouldFetch && isLoading,
|
||||
error,
|
||||
};
|
||||
}
|
||||
|
||||
export function useSeriesDetail(asin: string | null) {
|
||||
const endpoint = asin ? `/api/series/${asin}` : null;
|
||||
|
||||
const { data, error, isLoading } = useSWR(endpoint, authenticatedFetcher, {
|
||||
revalidateOnFocus: false,
|
||||
dedupingInterval: 300000, // Cache for 5 minutes
|
||||
});
|
||||
|
||||
return {
|
||||
series: (data?.series || null) as SeriesDetail | null,
|
||||
isLoading,
|
||||
error,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,515 @@
|
||||
/**
|
||||
* Component: Audible Series Scraping
|
||||
* Documentation: documentation/integrations/audible.md
|
||||
*
|
||||
* Standalone series scraping module. Uses the AudibleService fetch wrapper
|
||||
* for HTTP requests and Cheerio for HTML parsing.
|
||||
* Kept separate from audible.service.ts to avoid bloating the main service.
|
||||
*/
|
||||
|
||||
import * as cheerio from 'cheerio';
|
||||
import { getAudibleService, AudibleAudiobook } from './audible.service';
|
||||
import { AUDIBLE_REGIONS } from '../types/audible';
|
||||
import {
|
||||
getLanguageForRegion,
|
||||
buildContainsSelector,
|
||||
stripPrefixes,
|
||||
} from '../constants/language-config';
|
||||
import { RMABLogger } from '../utils/logger';
|
||||
import { randomDelay } from '../utils/scrape-resilience';
|
||||
|
||||
const logger = RMABLogger.create('Audible.Series');
|
||||
|
||||
const AUDIBLE_PAGE_SIZE = 50;
|
||||
const MAX_SERIES_RESULTS = 15;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export interface SeriesSummary {
|
||||
asin: string;
|
||||
title: string;
|
||||
bookCount: number;
|
||||
rating?: number;
|
||||
ratingCount?: number;
|
||||
tags: string[];
|
||||
coverArtUrl?: string;
|
||||
audibleUrl: string;
|
||||
}
|
||||
|
||||
export interface SimilarSeries {
|
||||
asin: string;
|
||||
title: string;
|
||||
bookCount?: number;
|
||||
coverArtUrl?: string;
|
||||
}
|
||||
|
||||
export interface SeriesDetail {
|
||||
asin: string;
|
||||
title: string;
|
||||
bookCount: number;
|
||||
rating?: number;
|
||||
ratingCount?: number;
|
||||
description?: string;
|
||||
tags: string[];
|
||||
books: AudibleAudiobook[];
|
||||
similarSeries: SimilarSeries[];
|
||||
audibleUrl: string;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Search: extract series links from Audible search results
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Search for series by scraping Audible search results and extracting
|
||||
* series links. De-duplicates by ASIN, then scrapes each unique series
|
||||
* page in parallel (capped at MAX_SERIES_RESULTS).
|
||||
*/
|
||||
export async function searchForSeries(query: string): Promise<SeriesSummary[]> {
|
||||
const service = getAudibleService();
|
||||
const region = service.getRegion();
|
||||
const baseUrl = service.getBaseUrl();
|
||||
const langConfig = getLanguageForRegion(region);
|
||||
const seriesLabels = langConfig.scraping.seriesLabels;
|
||||
|
||||
logger.info(`Searching series for "${query}" (region: ${region})`);
|
||||
|
||||
// Step 1: Fetch search results page
|
||||
let $: cheerio.CheerioAPI;
|
||||
try {
|
||||
const { data: response } = await service.fetch('/search', {
|
||||
params: {
|
||||
ipRedirectOverride: 'true',
|
||||
keywords: query,
|
||||
pageSize: AUDIBLE_PAGE_SIZE,
|
||||
},
|
||||
});
|
||||
$ = cheerio.load(response.data);
|
||||
} catch (error) {
|
||||
logger.error('Series search fetch failed', {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
return [];
|
||||
}
|
||||
|
||||
// Step 2: Extract unique series ASINs from search results
|
||||
// Series links appear inside spans containing locale-specific "Series:" text
|
||||
const seriesMap = new Map<string, { title: string; coverArtUrl?: string }>();
|
||||
|
||||
$('.s-result-item, .productListItem').each((_index, element) => {
|
||||
if (seriesMap.size >= MAX_SERIES_RESULTS) return false;
|
||||
|
||||
const $el = $(element);
|
||||
|
||||
// Find the span containing a series label (e.g. "Series:")
|
||||
const seriesSelector = buildContainsSelector('span', seriesLabels);
|
||||
const seriesContainer = $el.find(seriesSelector).first();
|
||||
if (seriesContainer.length === 0) return;
|
||||
|
||||
// Look for series link within or near the series label container
|
||||
// The series link is a child or sibling: <a href="/series/Name/B006K1QER6">
|
||||
const parentEl = seriesContainer.parent();
|
||||
const seriesLink = parentEl.find('a[href*="/series/"]').first();
|
||||
if (seriesLink.length === 0) return;
|
||||
|
||||
const href = seriesLink.attr('href') || '';
|
||||
const asinMatch = href.match(/\/series\/[^/]*\/([A-Z0-9]{10})/);
|
||||
if (!asinMatch) return;
|
||||
|
||||
const asin = asinMatch[1];
|
||||
if (seriesMap.has(asin)) return;
|
||||
|
||||
const title = seriesLink.text().trim();
|
||||
if (!title) return;
|
||||
|
||||
// Use the first book's cover as representative image
|
||||
const coverArtUrl = $el.find('img').first().attr('src')?.replace(/\._.*_\./, '._SL500_.') || undefined;
|
||||
|
||||
seriesMap.set(asin, { title, coverArtUrl });
|
||||
});
|
||||
|
||||
if (seriesMap.size === 0) {
|
||||
logger.info(`No series found for "${query}"`);
|
||||
return [];
|
||||
}
|
||||
|
||||
logger.info(`Found ${seriesMap.size} unique series, scraping detail pages...`);
|
||||
|
||||
// Step 3: Scrape each series page in parallel (with rate limiting)
|
||||
const entries = Array.from(seriesMap.entries());
|
||||
const BATCH_SIZE = 5;
|
||||
const results: SeriesSummary[] = [];
|
||||
|
||||
for (let i = 0; i < entries.length; i += BATCH_SIZE) {
|
||||
const batch = entries.slice(i, i + BATCH_SIZE);
|
||||
const batchResults = await Promise.all(
|
||||
batch.map(async ([asin, meta]) => {
|
||||
try {
|
||||
const detail = await scrapeSeriesPageSummary(asin);
|
||||
if (!detail) return null;
|
||||
return {
|
||||
...detail,
|
||||
coverArtUrl: detail.coverArtUrl || meta.coverArtUrl,
|
||||
audibleUrl: `${baseUrl}/series/${asin}`,
|
||||
} as SeriesSummary;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to scrape series ${asin}`, {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
// Return a minimal result from search data
|
||||
return {
|
||||
asin,
|
||||
title: meta.title,
|
||||
bookCount: 0,
|
||||
tags: [],
|
||||
coverArtUrl: meta.coverArtUrl,
|
||||
audibleUrl: `${baseUrl}/series/${asin}`,
|
||||
} as SeriesSummary;
|
||||
}
|
||||
})
|
||||
);
|
||||
|
||||
results.push(...batchResults.filter((r): r is SeriesSummary => r !== null));
|
||||
|
||||
// Rate limit between batches
|
||||
if (i + BATCH_SIZE < entries.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, randomDelay(1500, 3000)));
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(`Series search complete: "${query}" -> ${results.length} results`);
|
||||
return results;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Series page scraping (summary - for search results)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape a series page for summary data (title, book count, rating, tags).
|
||||
* Used during search to enrich each series result.
|
||||
*/
|
||||
async function scrapeSeriesPageSummary(asin: string): Promise<Omit<SeriesSummary, 'audibleUrl'> | null> {
|
||||
const service = getAudibleService();
|
||||
|
||||
try {
|
||||
const { data: response } = await service.fetch(`/series/${asin}`, {
|
||||
params: { ipRedirectOverride: 'true' },
|
||||
});
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
return parseSeriesPageSummary($, asin);
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to fetch series page ${asin}`, {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse summary fields from a series page's Cheerio document.
|
||||
*/
|
||||
function parseSeriesPageSummary(
|
||||
$: cheerio.CheerioAPI,
|
||||
asin: string
|
||||
): Omit<SeriesSummary, 'audibleUrl'> {
|
||||
// Title - from h1
|
||||
const title = $('h1').first().text().trim() || '';
|
||||
|
||||
// Book count - multiple strategies, most specific first
|
||||
let bookCount = 0;
|
||||
|
||||
// Primary: adbl-metadata[slot="child-count"] in the page header (NOT inside carousels)
|
||||
// Filter out carousel items by excluding those inside adbl-product-carousel
|
||||
$('adbl-metadata[slot="child-count"]').each((_i, el) => {
|
||||
if (bookCount > 0) return false;
|
||||
const $el = $(el);
|
||||
// Skip if inside a carousel (those are similar-series counts)
|
||||
if ($el.closest('adbl-product-carousel').length > 0) return;
|
||||
const text = $el.text().trim();
|
||||
const match = text.match(/(\d+)/);
|
||||
if (match) bookCount = parseInt(match[1]);
|
||||
});
|
||||
|
||||
// Secondary: text matching in spans/headings for "X books/titles/Titel/libros/Bucher"
|
||||
if (bookCount === 0) {
|
||||
const countText = $('span:contains("book"), span:contains("title"), span:contains("Titel"), span:contains("libro"), span:contains("Buch"), span:contains("B\u00fccher")')
|
||||
.text().trim();
|
||||
const countMatch = countText.match(/(\d+)\s*(books?|titles?|Titel|libros?|B(?:uch|\u00fccher))/i);
|
||||
if (countMatch) {
|
||||
bookCount = parseInt(countMatch[1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: count product items on the page
|
||||
if (bookCount === 0) {
|
||||
bookCount = $('.productListItem, .bc-list-item[data-asin]').length;
|
||||
}
|
||||
|
||||
// Rating
|
||||
const { rating, ratingCount } = parseSeriesRating($);
|
||||
|
||||
// Tags/genres: primary from adbl-chip web components, fallback to legacy links
|
||||
const tags: string[] = [];
|
||||
const addTag = (text: string) => {
|
||||
const tag = text.trim();
|
||||
if (tag && tag.length >= 2 && tag.length <= 50 && !tags.includes(tag)) {
|
||||
tags.push(tag);
|
||||
}
|
||||
};
|
||||
|
||||
// Primary: adbl-chip.related-tag elements (modern Audible layout)
|
||||
$('adbl-chip.related-tag').each((_i, el) => {
|
||||
addTag($(el).text());
|
||||
});
|
||||
|
||||
// Fallback: legacy category and tag links
|
||||
if (tags.length === 0) {
|
||||
$('a[href*="/cat/"], a[href*="/tag/"]').each((_i, el) => {
|
||||
addTag($(el).text());
|
||||
});
|
||||
}
|
||||
|
||||
// Cover art from first book image
|
||||
const coverArtUrl = $('.productListItem img, .bc-list-item img').first()
|
||||
.attr('src')?.replace(/\._.*_\./, '._SL500_.') || undefined;
|
||||
|
||||
return { asin, title, bookCount, rating, ratingCount, tags: tags.slice(0, 5), coverArtUrl };
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Series page scraping (full detail)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Scrape a series page for full detail data including books and similar series.
|
||||
* Used by the detail API endpoint.
|
||||
*/
|
||||
export async function scrapeSeriesPage(asin: string): Promise<SeriesDetail | null> {
|
||||
const service = getAudibleService();
|
||||
const region = service.getRegion();
|
||||
const baseUrl = service.getBaseUrl();
|
||||
const langConfig = getLanguageForRegion(region);
|
||||
|
||||
logger.info(`Scraping series detail page: ${asin}`);
|
||||
|
||||
try {
|
||||
const { data: response } = await service.fetch(`/series/${asin}`, {
|
||||
params: { ipRedirectOverride: 'true', pageSize: AUDIBLE_PAGE_SIZE },
|
||||
});
|
||||
const $ = cheerio.load(response.data);
|
||||
|
||||
// Parse summary fields
|
||||
const summary = parseSeriesPageSummary($, asin);
|
||||
|
||||
// Description
|
||||
const description = $('.bc-expander-content').first().text().trim() ||
|
||||
$('[class*="productPublisherSummary"]').first().text().trim() ||
|
||||
undefined;
|
||||
|
||||
// Parse all books from the series page
|
||||
const books = parseSeriesBooks($, langConfig.scraping.authorPrefixes, langConfig.scraping.narratorPrefixes);
|
||||
|
||||
// Use actual book count if we got more from scraping
|
||||
const bookCount = Math.max(summary.bookCount, books.length);
|
||||
|
||||
// Parse similar series ("Listeners also enjoyed" or similar section)
|
||||
const similarSeries = parseSimilarSeries($);
|
||||
|
||||
logger.info(`Series detail complete: "${summary.title}" (${books.length} books, ${similarSeries.length} similar)`);
|
||||
|
||||
return {
|
||||
asin,
|
||||
title: summary.title,
|
||||
bookCount,
|
||||
rating: summary.rating,
|
||||
ratingCount: summary.ratingCount,
|
||||
description,
|
||||
tags: summary.tags,
|
||||
books,
|
||||
similarSeries,
|
||||
audibleUrl: `${baseUrl}/series/${asin}`,
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error(`Failed to scrape series detail ${asin}`, {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Parsing helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Extract rating and rating count from a series page.
|
||||
*
|
||||
* Real HTML uses:
|
||||
* <div aria-label="4.5 out of 5 stars" class="bc-review-stars ...">
|
||||
* <span class="series-rating bc-color-secondary">8,704 ratings</span>
|
||||
*/
|
||||
function parseSeriesRating($: cheerio.CheerioAPI): { rating?: number; ratingCount?: number } {
|
||||
let rating: number | undefined;
|
||||
let ratingCount: number | undefined;
|
||||
|
||||
// Primary: aria-label on div.bc-review-stars (e.g. "4.5 out of 5 stars")
|
||||
const starsDiv = $('div.bc-review-stars');
|
||||
let ariaLabel = starsDiv.attr('aria-label') || '';
|
||||
|
||||
// Fallback: any element with aria-label containing rating pattern
|
||||
if (!ariaLabel) {
|
||||
const fallbackEl = $('[aria-label*="out of"], [aria-label*="von 5"], [aria-label*="de 5"]').first();
|
||||
ariaLabel = fallbackEl.attr('aria-label') || '';
|
||||
}
|
||||
|
||||
// Extract numeric rating from aria-label (handles "4.5 out of 5", "4,5 von 5", "4,5 de 5")
|
||||
const ratingMatch = ariaLabel.match(/(\d+[.,]?\d*)\s*(?:out of|von|de)\s*5/i);
|
||||
if (ratingMatch) {
|
||||
rating = parseFloat(ratingMatch[1].replace(',', '.'));
|
||||
}
|
||||
|
||||
// Rating count from span.series-rating (e.g. "8,704 ratings")
|
||||
const seriesRatingSpan = $('span.series-rating').first();
|
||||
let countText = seriesRatingSpan.text().trim();
|
||||
|
||||
// Fallback: look in broader context for rating count text
|
||||
if (!countText) {
|
||||
const fallbackContainer = $('[class*="rating"], .ratingsLabel').first();
|
||||
countText = fallbackContainer.text().trim();
|
||||
}
|
||||
|
||||
const countMatch = countText.match(/([\d,.]+)\s*(?:ratings?|Bewertungen?|calificaciones?)/i);
|
||||
if (countMatch) {
|
||||
ratingCount = parseInt(countMatch[1].replace(/[.,]/g, ''));
|
||||
}
|
||||
|
||||
return { rating, ratingCount };
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse all books from a series page's product list items.
|
||||
*/
|
||||
function parseSeriesBooks(
|
||||
$: cheerio.CheerioAPI,
|
||||
authorPrefixes: string[],
|
||||
narratorPrefixes: string[]
|
||||
): AudibleAudiobook[] {
|
||||
const books: AudibleAudiobook[] = [];
|
||||
const seenAsins = new Set<string>();
|
||||
|
||||
$('.productListItem, .bc-list-item').each((_index, element) => {
|
||||
const $el = $(element);
|
||||
|
||||
// Extract ASIN
|
||||
const bookAsin = $el.attr('data-asin') ||
|
||||
$el.find('li').attr('data-asin') ||
|
||||
$el.find('a[href*="/pd/"]').attr('href')?.match(/\/pd\/[^/]+\/([A-Z0-9]{10})/)?.[1] ||
|
||||
$el.find('a[href*="/ac/"]').attr('href')?.match(/\/ac\/[^/]+\/([A-Z0-9]{10})/)?.[1] ||
|
||||
$el.find('a').attr('href')?.match(/\/(?:pd|ac)\/[^/]+\/([A-Z0-9]{10})/)?.[1] || '';
|
||||
|
||||
if (!bookAsin || seenAsins.has(bookAsin)) return;
|
||||
seenAsins.add(bookAsin);
|
||||
|
||||
// Title
|
||||
const title = $el.find('h2').first().text().trim() ||
|
||||
$el.find('h3 a').first().text().trim() ||
|
||||
$el.find('.bc-heading a').first().text().trim() ||
|
||||
'';
|
||||
|
||||
if (!title) return;
|
||||
|
||||
// Author
|
||||
const authorLink = $el.find('a[href*="/author/"]').first();
|
||||
const authorText = authorLink.text().trim() ||
|
||||
$el.find('.authorLabel').text().trim() ||
|
||||
'';
|
||||
const authorHref = authorLink.attr('href') || '';
|
||||
const authorAsinMatch = authorHref.match(/\/author\/[^/]+\/([A-Z0-9]{10})/);
|
||||
|
||||
// Narrator
|
||||
const narratorText = $el.find('a[href*="searchNarrator="]').first().text().trim() ||
|
||||
$el.find('.narratorLabel').text().trim() ||
|
||||
'';
|
||||
|
||||
// Cover art
|
||||
const coverArtUrl = $el.find('img').first().attr('src')?.replace(/\._.*_\./, '._SL500_.') || '';
|
||||
|
||||
// Rating
|
||||
const ratingText = $el.find('.ratingsLabel').text().trim() ||
|
||||
$el.find('.a-icon-star span').first().text().trim();
|
||||
const ratingMatch = ratingText ? ratingText.match(/(\d+[.,]?\d*)/) : null;
|
||||
const rating = ratingMatch ? parseFloat(ratingMatch[1].replace(',', '.')) : undefined;
|
||||
|
||||
books.push({
|
||||
asin: bookAsin,
|
||||
title,
|
||||
author: stripPrefixes(authorText, authorPrefixes),
|
||||
authorAsin: authorAsinMatch?.[1] || undefined,
|
||||
narrator: stripPrefixes(narratorText, narratorPrefixes),
|
||||
coverArtUrl,
|
||||
rating,
|
||||
});
|
||||
});
|
||||
|
||||
return books;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse similar series from the "Listeners also enjoyed" carousel.
|
||||
*
|
||||
* Real HTML uses web components:
|
||||
* <adbl-product-carousel id="SeriestoSeries">
|
||||
* <adbl-product-grid-item>
|
||||
* <div class="adbl-impression-emitted" data-asin="B0CGS1LPWJ">
|
||||
* <adbl-metadata slot="title"><a>Hockey Guys</a></adbl-metadata>
|
||||
* <adbl-metadata slot="child-count">3 titles</adbl-metadata>
|
||||
* </adbl-product-grid-item>
|
||||
*/
|
||||
function parseSimilarSeries($: cheerio.CheerioAPI): SimilarSeries[] {
|
||||
const similar: SimilarSeries[] = [];
|
||||
const seenAsins = new Set<string>();
|
||||
|
||||
// Scope to the SeriestoSeries carousel to avoid picking up other series links
|
||||
const carousel = $('adbl-product-carousel#SeriestoSeries');
|
||||
if (carousel.length === 0) return similar;
|
||||
|
||||
carousel.find('adbl-product-grid-item').each((_i, el) => {
|
||||
if (similar.length >= 15) return false;
|
||||
|
||||
const $el = $(el);
|
||||
|
||||
// Extract ASIN: prefer data-asin on impression div, fallback to series href
|
||||
let asin = $el.find('.adbl-impression-emitted, .adbl-asin-impression').first().attr('data-asin') || '';
|
||||
if (!asin) {
|
||||
const seriesHref = $el.find('a[href*="/series/"]').first().attr('href') || '';
|
||||
const hrefMatch = seriesHref.match(/\/series\/[^/]*\/([A-Z0-9]{10})/);
|
||||
if (hrefMatch) asin = hrefMatch[1];
|
||||
}
|
||||
if (!asin || !/^[A-Z0-9]{10}$/.test(asin)) return;
|
||||
if (seenAsins.has(asin)) return;
|
||||
seenAsins.add(asin);
|
||||
|
||||
// Title from metadata slot
|
||||
const title = $el.find('adbl-metadata[slot="title"] a').first().text().trim() ||
|
||||
$el.find('adbl-metadata[slot="title"]').first().text().trim() || '';
|
||||
if (!title || title.length > 200) return;
|
||||
|
||||
// Book count from child-count slot (e.g. "3 titles")
|
||||
const countText = $el.find('adbl-metadata[slot="child-count"]').first().text().trim();
|
||||
const countMatch = countText.match(/(\d+)/);
|
||||
const bookCount = countMatch ? parseInt(countMatch[1]) : undefined;
|
||||
|
||||
// Cover image from adbl-collection-image
|
||||
const coverArtUrl = $el.find('adbl-collection-image img').first().attr('src')?.replace(/\._.*_\./, '._SL500_.') ||
|
||||
$el.find('img').first().attr('src')?.replace(/\._.*_\./, '._SL500_.') ||
|
||||
undefined;
|
||||
|
||||
similar.push({ asin, title, bookCount, coverArtUrl });
|
||||
});
|
||||
|
||||
return similar;
|
||||
}
|
||||
@@ -48,6 +48,7 @@ export interface AudibleAudiobook {
|
||||
genres?: string[];
|
||||
series?: string;
|
||||
seriesPart?: string;
|
||||
seriesAsin?: string;
|
||||
}
|
||||
|
||||
export interface AudibleSearchResult {
|
||||
@@ -77,6 +78,22 @@ export class AudibleService {
|
||||
return this.baseUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current Audible region code
|
||||
*/
|
||||
public getRegion(): AudibleRegion {
|
||||
return this.region;
|
||||
}
|
||||
|
||||
/**
|
||||
* Public fetch wrapper for external scraping modules (e.g. audible-series.ts).
|
||||
* Ensures the service is initialized and delegates to fetchWithRetry.
|
||||
*/
|
||||
public async fetch(url: string, config: any = {}): Promise<{ data: any; meta: FetchResultMeta }> {
|
||||
await this.initialize();
|
||||
return this.fetchWithRetry(url, config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the language config for the current region
|
||||
*/
|
||||
@@ -749,6 +766,7 @@ export class AudibleService {
|
||||
genres: data.genres?.map((g: any) => typeof g === 'string' ? g : g.name).slice(0, 5) || undefined,
|
||||
series: data.seriesPrimary?.name || undefined,
|
||||
seriesPart: data.seriesPrimary?.position || undefined,
|
||||
seriesAsin: data.seriesPrimary?.asin || undefined,
|
||||
};
|
||||
|
||||
// Ensure cover art URL is high quality
|
||||
@@ -765,7 +783,8 @@ export class AudibleService {
|
||||
rating: result.rating,
|
||||
genreCount: result.genres?.length || 0,
|
||||
series: result.series,
|
||||
seriesPart: result.seriesPart
|
||||
seriesPart: result.seriesPart,
|
||||
seriesAsin: result.seriesAsin
|
||||
});
|
||||
|
||||
return result;
|
||||
|
||||
@@ -84,6 +84,7 @@ export async function createRequestForUser(
|
||||
let year: number | undefined;
|
||||
let series: string | undefined;
|
||||
let seriesPart: string | undefined;
|
||||
let seriesAsin: string | undefined;
|
||||
try {
|
||||
const audibleService = getAudibleService();
|
||||
const audnexusData = await audibleService.getAudiobookDetails(audiobook.asin);
|
||||
@@ -100,6 +101,7 @@ export async function createRequestForUser(
|
||||
}
|
||||
if (audnexusData?.series) series = audnexusData.series;
|
||||
if (audnexusData?.seriesPart) seriesPart = audnexusData.seriesPart;
|
||||
if (audnexusData?.seriesAsin) seriesAsin = audnexusData.seriesAsin;
|
||||
} catch (error) {
|
||||
logger.warn(`Failed to fetch Audnexus data for ASIN ${audiobook.asin}: ${error instanceof Error ? error.message : 'Unknown error'}`);
|
||||
}
|
||||
@@ -121,6 +123,7 @@ export async function createRequestForUser(
|
||||
year,
|
||||
series,
|
||||
seriesPart,
|
||||
seriesAsin,
|
||||
status: 'requested',
|
||||
},
|
||||
});
|
||||
@@ -134,6 +137,7 @@ export async function createRequestForUser(
|
||||
if (year) updates.year = year;
|
||||
if (series) updates.series = series;
|
||||
if (seriesPart) updates.seriesPart = seriesPart;
|
||||
if (seriesAsin) updates.seriesAsin = seriesAsin;
|
||||
|
||||
if (Object.keys(updates).length > 0) {
|
||||
audiobookRecord = await prisma.audiobook.update({
|
||||
|
||||
Reference in New Issue
Block a user