Add language config and locale-aware parsing

Introduce centralized language configuration and wire locale-aware behavior across scraping and ranking. Adds src/lib/constants/language-config.ts with per-language scraping rules, stop words, and character replacements; replaces AudibleRegion.isEnglish with a language field in types and AUDIBLE_REGIONS. Update AudibleService, ebook scraper, processors, and API routes to use getLanguageForRegion so Anna's Archive searches, scraping selectors, runtime/rating parsing, and ranking use language-specific params and filters. Extend ranking algorithm to accept stopWords and characterReplacements and apply them during normalization and matching. Update UI selects to mark non-English regions and adjust tests accordingly.
This commit is contained in:
kikootwo
2026-02-20 06:32:44 -05:00
parent c146383735
commit 5d8ac2f73d
18 changed files with 525 additions and 112 deletions
@@ -18,6 +18,8 @@ import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
import { getAudibleService } from '@/lib/integrations/audible.service';
import { RMABLogger } from '@/lib/utils/logger';
import { resolveInteractiveSearchAccess } from '@/lib/utils/permissions';
import { getLanguageForRegion } from '@/lib/constants/language-config';
import type { AudibleRegion } from '@/lib/types/audible';
import {
searchByAsin,
searchByTitle,
@@ -227,6 +229,11 @@ export async function POST(
const format = preferredFormat || 'epub';
const annasBaseUrl = baseUrl || 'https://annas-archive.li';
// Get language code from Audible region config
const region = await configService.getAudibleRegion() as AudibleRegion;
const langConfig = getLanguageForRegion(region);
const languageCode = langConfig.annasArchiveLang;
if (!isAnnasArchiveEnabled && !isIndexerSearchEnabled) {
return NextResponse.json(
{ error: 'No ebook sources enabled. Enable Anna\'s Archive or Indexer Search in settings.' },
@@ -250,7 +257,8 @@ export async function POST(
audiobook.author,
format,
annasBaseUrl,
flaresolverrUrl || undefined
flaresolverrUrl || undefined,
languageCode
).catch((err) => {
logger.error(`Anna's Archive search failed: ${err.message}`);
return null;
@@ -322,7 +330,8 @@ async function searchAnnasArchiveForInteractive(
author: string,
preferredFormat: string,
baseUrl: string,
flaresolverrUrl?: string
flaresolverrUrl?: string,
languageCode: string = 'en'
): Promise<EbookSearchResult[]> {
let md5: string | null = null;
let searchMethod: 'asin' | 'title' = 'title';
@@ -330,7 +339,7 @@ async function searchAnnasArchiveForInteractive(
// Try ASIN search first
if (asin) {
logger.info(`Searching Anna's Archive by ASIN: ${asin}`);
md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl);
md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
if (md5) {
searchMethod = 'asin';
logger.info(`Found via ASIN: ${md5}`);
@@ -340,7 +349,7 @@ async function searchAnnasArchiveForInteractive(
// Fallback to title search
if (!md5) {
logger.info(`Searching Anna's Archive by title: "${title}"`);
md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl);
md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
if (md5) {
logger.info(`Found via title: ${md5}`);
}
@@ -461,6 +470,10 @@ async function searchIndexersForInteractive(
return [];
}
// Get language-specific stop words for ranking
const rankRegion = await configService.getAudibleRegion() as AudibleRegion;
const rankLangConfig = getLanguageForRegion(rankRegion);
// Rank results with ebook scoring
const rankedResults = rankEbookTorrents(allResults, {
title,
@@ -470,6 +483,8 @@ async function searchIndexersForInteractive(
indexerPriorities,
flagConfigs,
requireAuthor: false,
stopWords: rankLangConfig.stopWords,
characterReplacements: rankLangConfig.characterReplacements,
});
// Convert to unified result type