mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-03 21:00:09 +00:00
20c8fb0898
Introduce user-reported-issues and Goodreads shelf sync features and wire them into notifications. Adds Prisma migrations and schema changes (ReportedIssue, GoodreadsShelf, GoodreadsBookMapping), API endpoints for reporting (POST /audiobooks/[asin]/report-issue) and admin management (list, resolve/dismiss, replace), and an admin UI section to view/dismiss/replace reported issues. Adds a new notification event (issue_reported) with updates to notification schemas, docs and provider handling, plus a notification-events constants file. Refactors request creation to use createRequestForUser service, adds a Goodreads sync processor/service/hooks/UI modals, a scrape-resilience util, and related tests and minor integration updates.
101 lines
3.7 KiB
TypeScript
101 lines
3.7 KiB
TypeScript
/**
|
||
* Component: Scrape Resilience Utilities
|
||
* Documentation: documentation/integrations/audible.md
|
||
*
|
||
* Anti-503 resilience for Audible scraping: UA rotation, jittered backoff,
|
||
* browser-like headers, adaptive pacing, and circuit breaker.
|
||
*/
|
||
|
||
/** Pool of modern browser User-Agent strings */
|
||
const USER_AGENTS = [
|
||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0',
|
||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15',
|
||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
|
||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||
] as const;
|
||
|
||
/** Randomly select a User-Agent (call once per session, not per request) */
|
||
export function pickUserAgent(): string {
|
||
return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
|
||
}
|
||
|
||
/** Build a full set of realistic browser headers for the given UA */
|
||
export function getBrowserHeaders(userAgent: string): Record<string, string> {
|
||
return {
|
||
'User-Agent': userAgent,
|
||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
||
'Accept-Language': 'en-US,en;q=0.9',
|
||
'Accept-Encoding': 'gzip, deflate, br',
|
||
'Connection': 'keep-alive',
|
||
'Sec-Fetch-Site': 'none',
|
||
'Sec-Fetch-Mode': 'navigate',
|
||
'Sec-Fetch-Dest': 'document',
|
||
'Sec-Fetch-User': '?1',
|
||
'Upgrade-Insecure-Requests': '1',
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Jittered exponential backoff: 2^attempt * baseMs * random(0.5, 1.5)
|
||
* Avoids predictable retry timing that is trivially fingerprinted.
|
||
*/
|
||
export function jitteredBackoff(attempt: number, baseMs: number = 1000): number {
|
||
const jitter = 0.5 + Math.random(); // 0.5 – 1.5
|
||
return Math.round(Math.pow(2, attempt) * baseMs * jitter);
|
||
}
|
||
|
||
/** Random integer in [minMs, maxMs] */
|
||
export function randomDelay(minMs: number, maxMs: number): number {
|
||
return minMs + Math.floor(Math.random() * (maxMs - minMs + 1));
|
||
}
|
||
|
||
/** Metadata returned alongside each fetch result */
|
||
export interface FetchResultMeta {
|
||
retriesUsed: number;
|
||
encountered503: boolean;
|
||
}
|
||
|
||
/**
|
||
* Adaptive pacer that increases inter-page delays when retries are needed,
|
||
* and triggers a circuit-breaker cooldown after consecutive retry-pages.
|
||
*/
|
||
export class AdaptivePacer {
|
||
private consecutiveRetryPages = 0;
|
||
private static readonly CIRCUIT_BREAKER_THRESHOLD = 3;
|
||
|
||
/** Report the result of a page fetch and get the recommended delay before the next page. */
|
||
reportPageResult(meta: FetchResultMeta): number {
|
||
if (meta.retriesUsed > 0) {
|
||
this.consecutiveRetryPages++;
|
||
|
||
// Circuit breaker: pause 45-60s after sustained retries
|
||
if (this.consecutiveRetryPages >= AdaptivePacer.CIRCUIT_BREAKER_THRESHOLD) {
|
||
this.consecutiveRetryPages = 0;
|
||
return randomDelay(45_000, 60_000);
|
||
}
|
||
|
||
// Adaptive increase: multiply delay range by 1 + 0.5 * consecutive
|
||
const multiplier = 1 + 0.5 * this.consecutiveRetryPages;
|
||
return randomDelay(
|
||
Math.round(2000 * multiplier),
|
||
Math.round(4000 * multiplier),
|
||
);
|
||
}
|
||
|
||
// Successful page – gradually recover
|
||
if (this.consecutiveRetryPages > 0) {
|
||
this.consecutiveRetryPages--;
|
||
}
|
||
|
||
// Base delay range
|
||
return randomDelay(2000, 4000);
|
||
}
|
||
|
||
/** Reset state (call between batches or on re-initialization). */
|
||
reset(): void {
|
||
this.consecutiveRetryPages = 0;
|
||
}
|
||
}
|