diff --git a/documentation/integrations/audible.md b/documentation/integrations/audible.md index 2a99641..fbab910 100644 --- a/documentation/integrations/audible.md +++ b/documentation/integrations/audible.md @@ -47,7 +47,8 @@ Configurable Audible region for accurate metadata matching across different inte - `AudibleService` loads region from config on initialization - Dynamically builds base URL: `AUDIBLE_REGIONS[region].baseUrl` - Audnexus API calls include region parameter: `?region={code}` -- IP redirect prevention: `?ipRedirectOverride=true` on all Audible requests +- IP redirect prevention: `?ipRedirectOverride=true` on all Audible requests (region only) +- **Locale enforcement:** Cookie `lc-acbus=en_US` + `handleLocaleRedirect()` detects non-English culture codes in response URLs and re-requests using the English URL from Audible's locale picker - Configuration service helper: `getAudibleRegion()` returns configured region - **Auto-detection of region changes**: Service checks config before each request and re-initializes if region changed - **Cache clearing**: When region changes, ConfigService cache and AudibleService initialization are cleared @@ -225,3 +226,14 @@ interface EnrichedAudibleAudiobook extends AudibleAudiobook { - **Fix:** Added `mapRegionToABSProvider()` to convert RMAB region codes to AudiobookShelf provider values. US → `'audible'`, others → `'audible.{region}'` (e.g., `'audible.ca'`, `'audible.uk'`) - **Location:** `src/lib/services/audiobookshelf/api.ts:14, 147` - **Affects:** All Audiobookshelf metadata matching operations + +**Non-English locale pages served to users outside US (2026-02-05)** +- **Problem:** Audible uses IP geolocation to add culture codes (e.g., `es_US`, `fr_CA`) to URLs, serving locale-specific pages. `ipRedirectOverride=true` only prevents region redirects (audible.com → audible.co.uk), NOT language/locale redirects within the same region. +- **Impact:** Users self-hosting from non-English-speaking countries (e.g., Dominican Republic) got Spanish bestsellers/new releases on their homepage because the `audible_refresh` job scraped locale-redirected pages. +- **Fix:** Three-layer defense in `AudibleService`: + 1. **Cookie:** `lc-acbus=en_US` header hints English locale preference + 2. **Locale picker detection (primary):** After every request, checks response URL for non-`en_*` culture codes (`xx_YY` pattern). If found, parses page HTML for Audible's `` locale picker, extracts the English option's `data-value` URL, and re-requests. Data-driven — uses Audible's own English URL rather than guessing. + 3. **Fallback URL rewrite:** If no locale picker found, strips the culture code from the path and adds `language=en_US` query param (mirrors picker pattern). +- **Verification:** After correction, validates the response URL no longer contains a non-English culture code and logs success/failure. +- **Location:** `src/lib/integrations/audible.service.ts` — `handleLocaleRedirect()`, `initialize()` +- **Affects:** All Audible scraping: popular, new releases, search, detail pages (via `fetchWithRetry`) diff --git a/package.json b/package.json index cf6c8b3..23857e6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "readmeabook", - "version": "1.0.0", + "version": "1.0.1", "private": true, "scripts": { "dev": "next dev", diff --git a/src/lib/integrations/audible.service.ts b/src/lib/integrations/audible.service.ts index f2afe9d..6474122 100644 --- a/src/lib/integrations/audible.service.ts +++ b/src/lib/integrations/audible.service.ts @@ -88,6 +88,7 @@ export class AudibleService { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', + 'Cookie': 'lc-acbus=en_US', // Force English locale (prevents IP-based language redirect for non-US IPs) }, params: { ipRedirectOverride: 'true', // Prevent IP-based region redirects @@ -107,6 +108,7 @@ export class AudibleService { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.9', + 'Cookie': 'lc-acbus=en_US', // Force English locale }, params: { ipRedirectOverride: 'true', @@ -116,6 +118,108 @@ export class AudibleService { } } + /** + * Detect and correct non-English locale pages from Audible. + * + * Audible uses IP geolocation to serve locale-specific pages by adding culture + * codes to URLs (e.g., /adblbestsellers → /es_US/charts/best for Spanish-speaking IPs). + * ipRedirectOverride only prevents region redirects (audible.com → audible.co.uk), + * NOT language/locale redirects within the same region. + * + * Strategy (data-driven): + * 1. Check response URL for any non-English culture code (xx_YY where xx != 'en') + * 2. Parse the page's locale picker (adbl-toggle-chip elements) to find the English URL + * 3. Re-request using Audible's own English URL (from the picker's data-value attribute) + * 4. Fallback: strip culture code from URL + add language=en_US param if no picker found + * + * Returns corrected response, or null if no correction needed. + */ + private async handleLocaleRedirect(response: any): Promise { + try { + // Extract final URL after all redirects (Node.js http internals) + const finalUrl: string = response.request?.res?.responseUrl || + response.request?._redirectable?._currentUrl || ''; + + if (!finalUrl) return null; + + // Check for non-English culture code in URL path + // Culture codes: xx_YY (e.g., es_US, fr_CA, pt_BR, de_DE, ja_JP) + // Match in path segment: must follow a / and be followed by / or end-of-path or query string + const localeMatch = finalUrl.match(/\/([a-z]{2}_[A-Z]{2})(\/|$|\?)/); + if (!localeMatch || localeMatch[1].startsWith('en')) { + return null; // No culture code found, or already English + } + + const detectedLocale = localeMatch[1]; + logger.warn(`Detected non-English locale (${detectedLocale}) in Audible response URL: ${finalUrl}`); + + // --- Primary strategy: parse the locale picker from the page HTML --- + // Audible pages include a locale picker with web components: + // English + // Français + // The English option's data-value gives us the exact correct English URL for this page. + const $ = cheerio.load(response.data); + const englishChip = $('adbl-toggle-chip[data-locale^="en"]').first(); + + if (englishChip.length > 0) { + const englishPath = englishChip.attr('data-value'); + const englishLocale = englishChip.attr('data-locale'); + + if (englishPath) { + logger.info(`Found English option (${englishLocale}) in locale picker: ${englishPath}`); + + // Re-request using the English URL from the picker + // data-value is a relative path (e.g., "/charts/best?language=en_CA") + // Client defaults add ipRedirectOverride=true automatically + const correctedResponse = await this.client.get(englishPath); + + // Verify the correction actually resolved to English + const correctedUrl: string = correctedResponse.request?.res?.responseUrl || + correctedResponse.request?._redirectable?._currentUrl || ''; + if (correctedUrl) { + const verifyMatch = correctedUrl.match(/\/([a-z]{2}_[A-Z]{2})(\/|$|\?)/); + if (verifyMatch && !verifyMatch[1].startsWith('en')) { + logger.warn(`Locale correction incomplete — corrected URL still contains non-English locale (${verifyMatch[1]}): ${correctedUrl}`); + } else { + logger.info(`Locale correction successful (${detectedLocale} → ${englishLocale})`); + } + } + + return correctedResponse; + } + + logger.warn('English locale chip found but missing data-value attribute'); + } else { + logger.warn('No locale picker found on page, attempting fallback URL rewrite'); + } + + // --- Fallback strategy: URL rewrite --- + // Strip the non-English culture code from the path and add language=en_US param. + // This mirrors the locale picker pattern: English URLs have no prefix + language param. + try { + const urlObj = new URL(finalUrl); + urlObj.pathname = urlObj.pathname.replace(`/${detectedLocale}`, ''); + urlObj.searchParams.set('language', 'en_US'); + + // Build relative path (client will prepend baseURL) + const fallbackPath = urlObj.pathname + urlObj.search; + logger.info(`Fallback: re-requesting with URL rewrite: ${fallbackPath}`); + + return await this.client.get(fallbackPath); + } catch (urlError) { + logger.warn('Fallback URL rewrite failed', { + error: urlError instanceof Error ? urlError.message : String(urlError), + }); + } + } catch (error) { + logger.debug('Locale correction failed entirely, using original response', { + error: error instanceof Error ? error.message : String(error), + }); + } + + return null; + } + /** * Fetch with retry logic and exponential backoff * Retries on network errors and rate limiting (503, 429) @@ -129,7 +233,10 @@ export class AudibleService { for (let attempt = 0; attempt <= maxRetries; attempt++) { try { - return await this.client.get(url, config); + const response = await this.client.get(url, config); + + // Check if redirected to non-English locale (e.g., /es_US/) and correct it + return await this.handleLocaleRedirect(response) || response; } catch (error: any) { lastError = error; const status = error.response?.status;