Add language config and locale-aware parsing

Introduce centralized language configuration and wire locale-aware behavior across scraping and ranking. Adds src/lib/constants/language-config.ts with per-language scraping rules, stop words, and character replacements; replaces AudibleRegion.isEnglish with a language field in types and AUDIBLE_REGIONS. Update AudibleService, ebook scraper, processors, and API routes to use getLanguageForRegion so Anna's Archive searches, scraping selectors, runtime/rating parsing, and ranking use language-specific params and filters. Extend ranking algorithm to accept stopWords and characterReplacements and apply them during normalization and matching. Update UI selects to mark non-English regions and adjust tests accordingly.
2026-07-18 02:31:10 +00:00 · 2026-02-20 06:32:44 -05:00
parent c146383735
commit 5d8ac2f73d
18 changed files with 525 additions and 112 deletions
@@ -164,11 +164,11 @@ export function AudiobookshelfSection({
        >
          {Object.values(AUDIBLE_REGIONS).map((region) => (
            <option key={region.code} value={region.code}>
-              {region.name}{!region.isEnglish ? ' *' : ''}
+              {region.name}{region.language !== 'en' ? ' *' : ''}
            </option>
          ))}
        </select>
-        {AUDIBLE_REGIONS[settings.audibleRegion as keyof typeof AUDIBLE_REGIONS]?.isEnglish === false && (
+        {AUDIBLE_REGIONS[settings.audibleRegion as keyof typeof AUDIBLE_REGIONS]?.language !== 'en' && (
          <div className="bg-amber-50 dark:bg-amber-900/20 rounded-lg p-4 border border-amber-200 dark:border-amber-800 mt-2">
            <div className="flex gap-3">
              <svg
@@ -164,11 +164,11 @@ export function PlexSection({
        >
          {Object.values(AUDIBLE_REGIONS).map((region) => (
            <option key={region.code} value={region.code}>
-              {region.name}{!region.isEnglish ? ' *' : ''}
+              {region.name}{region.language !== 'en' ? ' *' : ''}
            </option>
          ))}
        </select>
-        {AUDIBLE_REGIONS[settings.audibleRegion as keyof typeof AUDIBLE_REGIONS]?.isEnglish === false && (
+        {AUDIBLE_REGIONS[settings.audibleRegion as keyof typeof AUDIBLE_REGIONS]?.language !== 'en' && (
          <div className="bg-amber-50 dark:bg-amber-900/20 rounded-lg p-4 border border-amber-200 dark:border-amber-800 mt-2">
            <div className="flex gap-3">
              <svg
@@ -18,6 +18,8 @@ import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
 import { getAudibleService } from '@/lib/integrations/audible.service';
 import { RMABLogger } from '@/lib/utils/logger';
 import { resolveInteractiveSearchAccess } from '@/lib/utils/permissions';
+import { getLanguageForRegion } from '@/lib/constants/language-config';
+import type { AudibleRegion } from '@/lib/types/audible';
 import {
  searchByAsin,
  searchByTitle,
@@ -227,6 +229,11 @@ export async function POST(
      const format = preferredFormat || 'epub';
      const annasBaseUrl = baseUrl || 'https://annas-archive.li';

+      // Get language code from Audible region config
+      const region = await configService.getAudibleRegion() as AudibleRegion;
+      const langConfig = getLanguageForRegion(region);
+      const languageCode = langConfig.annasArchiveLang;
+
      if (!isAnnasArchiveEnabled && !isIndexerSearchEnabled) {
        return NextResponse.json(
          { error: 'No ebook sources enabled. Enable Anna\'s Archive or Indexer Search in settings.' },
@@ -250,7 +257,8 @@ export async function POST(
            audiobook.author,
            format,
            annasBaseUrl,
-            flaresolverrUrl || undefined
+            flaresolverrUrl || undefined,
+            languageCode
          ).catch((err) => {
            logger.error(`Anna's Archive search failed: ${err.message}`);
            return null;
@@ -322,7 +330,8 @@ async function searchAnnasArchiveForInteractive(
  author: string,
  preferredFormat: string,
  baseUrl: string,
-  flaresolverrUrl?: string
+  flaresolverrUrl?: string,
+  languageCode: string = 'en'
 ): Promise<EbookSearchResult[]> {
  let md5: string | null = null;
  let searchMethod: 'asin' | 'title' = 'title';
@@ -330,7 +339,7 @@ async function searchAnnasArchiveForInteractive(
  // Try ASIN search first
  if (asin) {
    logger.info(`Searching Anna's Archive by ASIN: ${asin}`);
-    md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl);
+    md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
    if (md5) {
      searchMethod = 'asin';
      logger.info(`Found via ASIN: ${md5}`);
@@ -340,7 +349,7 @@ async function searchAnnasArchiveForInteractive(
  // Fallback to title search
  if (!md5) {
    logger.info(`Searching Anna's Archive by title: "${title}"`);
-    md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl);
+    md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
    if (md5) {
      logger.info(`Found via title: ${md5}`);
    }
@@ -461,6 +470,10 @@ async function searchIndexersForInteractive(
    return [];
  }

+  // Get language-specific stop words for ranking
+  const rankRegion = await configService.getAudibleRegion() as AudibleRegion;
+  const rankLangConfig = getLanguageForRegion(rankRegion);
+
  // Rank results with ebook scoring
  const rankedResults = rankEbookTorrents(allResults, {
    title,
@@ -470,6 +483,8 @@ async function searchIndexersForInteractive(
    indexerPriorities,
    flagConfigs,
    requireAuthor: false,
+    stopWords: rankLangConfig.stopWords,
+    characterReplacements: rankLangConfig.characterReplacements,
  });

  // Convert to unified result type
@@ -10,6 +10,8 @@ import { requireAuth, AuthenticatedRequest } from '@/lib/middleware/auth';
 import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
 import { rankTorrents } from '@/lib/utils/ranking-algorithm';
 import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
+import { getLanguageForRegion } from '@/lib/constants/language-config';
+import type { AudibleRegion } from '@/lib/types/audible';
 import { z } from 'zod';
 import { RMABLogger } from '@/lib/utils/logger';

@@ -140,13 +142,19 @@ export async function POST(request: NextRequest) {
        logger.info(`Will filter ${belowThreshold.length} results < ${sizeMBThreshold} MB (likely ebooks)`);
      }

+      // Get language-specific stop words for ranking
+      const region = await configService.getAudibleRegion() as AudibleRegion;
+      const langConfig = getLanguageForRegion(region);
+
      // Rank torrents using the ranking algorithm with indexer priorities and flag configs
      // Note: rankTorrents now filters out results < 20 MB internally
      // requireAuthor: false - interactive search, show all results for user decision
      const rankedResults = rankTorrents(results, { title, author, durationMinutes }, {
        indexerPriorities,
        flagConfigs,
-        requireAuthor: false  // Interactive mode - let user decide
+        requireAuthor: false,  // Interactive mode - let user decide
+        stopWords: langConfig.stopWords,
+        characterReplacements: langConfig.characterReplacements,
      });

      // Log filter results
@@ -14,6 +14,8 @@ import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
 import { rankEbookTorrents, RankedEbookTorrent } from '@/lib/utils/ranking-algorithm';
 import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
 import { RMABLogger } from '@/lib/utils/logger';
+import { getLanguageForRegion } from '@/lib/constants/language-config';
+import type { AudibleRegion } from '@/lib/types/audible';
 import {
  searchByAsin,
  searchByTitle,
@@ -121,6 +123,11 @@ export async function POST(
        const format = preferredFormat || 'epub';
        const annasBaseUrl = baseUrl || 'https://annas-archive.li';

+        // Get language code from Audible region config
+        const region = await configService.getAudibleRegion() as AudibleRegion;
+        const langConfig = getLanguageForRegion(region);
+        const languageCode = langConfig.annasArchiveLang;
+
        if (!isAnnasArchiveEnabled && !isIndexerSearchEnabled) {
          return NextResponse.json(
            { error: 'No ebook sources enabled. Enable Anna\'s Archive or Indexer Search in settings.' },
@@ -145,7 +152,8 @@ export async function POST(
              audiobook.author,
              format,
              annasBaseUrl,
-              flaresolverrUrl || undefined
+              flaresolverrUrl || undefined,
+              languageCode
            ).catch((err) => {
              logger.error(`Anna's Archive search failed: ${err.message}`);
              return null;
@@ -217,7 +225,8 @@ async function searchAnnasArchiveForInteractive(
  author: string,
  preferredFormat: string,
  baseUrl: string,
-  flaresolverrUrl?: string
+  flaresolverrUrl?: string,
+  languageCode: string = 'en'
 ): Promise<EbookSearchResult[]> {
  let md5: string | null = null;
  let searchMethod: 'asin' | 'title' = 'title';
@@ -225,7 +234,7 @@ async function searchAnnasArchiveForInteractive(
  // Try ASIN search first
  if (asin) {
    logger.info(`Searching Anna's Archive by ASIN: ${asin}`);
-    md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl);
+    md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
    if (md5) {
      searchMethod = 'asin';
      logger.info(`Found via ASIN: ${md5}`);
@@ -235,7 +244,7 @@ async function searchAnnasArchiveForInteractive(
  // Fallback to title search
  if (!md5) {
    logger.info(`Searching Anna's Archive by title: "${title}"`);
-    md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl);
+    md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
    if (md5) {
      logger.info(`Found via title: ${md5}`);
    }
@@ -356,6 +365,10 @@ async function searchIndexersForInteractive(
    return [];
  }

+  // Get language-specific stop words for ranking
+  const rankRegion = await configService.getAudibleRegion() as AudibleRegion;
+  const rankLangConfig = getLanguageForRegion(rankRegion);
+
  // Rank results with ebook scoring
  // Use requireAuthor=false for interactive mode (let user decide)
  const rankedResults = rankEbookTorrents(allResults, {
@@ -366,6 +379,8 @@ async function searchIndexersForInteractive(
    indexerPriorities,
    flagConfigs,
    requireAuthor: false,
+    stopWords: rankLangConfig.stopWords,
+    characterReplacements: rankLangConfig.characterReplacements,
  });

  // Log ranking debug info (same format as search-ebook.processor.ts)
@@ -9,6 +9,8 @@ import { prisma } from '@/lib/db';
 import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
 import { rankTorrents } from '@/lib/utils/ranking-algorithm';
 import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
+import { getLanguageForRegion } from '@/lib/constants/language-config';
+import type { AudibleRegion } from '@/lib/types/audible';
 import { RMABLogger } from '@/lib/utils/logger';
 import { resolveInteractiveSearchAccess } from '@/lib/utils/permissions';

@@ -189,6 +191,10 @@ export async function POST(
        }
      }

+      // Get language-specific stop words for ranking
+      const region = await configService.getAudibleRegion() as AudibleRegion;
+      const langConfig = getLanguageForRegion(region);
+
      // Rank torrents using the ranking algorithm with indexer priorities and flag configs
      // Always use the audiobook's title/author for ranking (not custom search query)
      // requireAuthor: false - interactive mode, show all results for user decision
@@ -199,7 +205,9 @@ export async function POST(
      }, {
        indexerPriorities,
        flagConfigs,
-        requireAuthor: false  // Interactive mode - let user decide
+        requireAuthor: false,  // Interactive mode - let user decide
+        stopWords: langConfig.stopWords,
+        characterReplacements: langConfig.characterReplacements,
      });

      // No threshold filtering for interactive search - show all results
@@ -115,11 +115,11 @@ export function BackendSelectionStep({
        >
          {Object.values(AUDIBLE_REGIONS).map((region) => (
            <option key={region.code} value={region.code}>
-              {region.name}{!region.isEnglish ? ' *' : ''}
+              {region.name}{region.language !== 'en' ? ' *' : ''}
            </option>
          ))}
        </select>
-        {AUDIBLE_REGIONS[audibleRegion]?.isEnglish === false && (
+        {AUDIBLE_REGIONS[audibleRegion]?.language !== 'en' && (
          <div className="bg-amber-50 dark:bg-amber-900/20 rounded-lg p-4 border border-amber-200 dark:border-amber-800 mt-2">
            <div className="flex gap-3">
              <svg
@@ -0,0 +1,252 @@
+/**
+ * Component: Centralized Language Configuration
+ * Documentation: documentation/integrations/audible.md
+ *
+ * Single source of truth for all language-specific configuration.
+ * To add a new language:
+ * 1. Add code to SupportedLanguage union
+ * 2. Add full LanguageConfig entry in LANGUAGE_CONFIGS
+ * 3. Map regions in REGION_LANGUAGE_MAP
+ * 4. Add region to AUDIBLE_REGIONS in audible.ts with language: 'xx'
+ */
+
+import type { AudibleRegion } from '../types/audible';
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+export type SupportedLanguage = 'en' | 'de' | 'es';
+
+export interface ScrapingConfig {
+  /** Audible locale query-param value (e.g. 'english', 'deutsch') */
+  audibleLocaleParam: string;
+  /** Author label prefixes to strip (e.g. ['By:', 'Written by:']) */
+  authorPrefixes: string[];
+  /** Narrator label prefixes to strip */
+  narratorPrefixes: string[];
+  /** Length / duration labels used in Cheerio :contains() selectors */
+  lengthLabels: string[];
+  /** Language field labels */
+  languageLabels: string[];
+  /** Release date field labels */
+  releaseDateLabels: string[];
+  /** Accepted language values for filtering (lowercase) */
+  acceptedLanguageValues: string[];
+  /** Regex patterns that match hour portions in runtime strings */
+  runtimeHourPatterns: RegExp[];
+  /** Regex patterns that match minute portions in runtime strings */
+  runtimeMinutePatterns: RegExp[];
+  /** Regex patterns for extracting numeric rating */
+  ratingPatterns: RegExp[];
+  /** Regex patterns for extracting release date text */
+  releaseDatePatterns: RegExp[];
+  /** Promotional / non-description text patterns to exclude */
+  descriptionExcludePatterns: RegExp[];
+  /** Duration detection pattern for generic element scanning */
+  durationDetectionPattern: RegExp;
+  /** Rating text selector pattern (e.g. 'out of 5 stars') */
+  ratingTextSelector: string;
+}
+
+export interface LanguageConfig {
+  code: SupportedLanguage;
+  /** Anna's Archive language filter code */
+  annasArchiveLang: string;
+  /** EPUB language code */
+  epubCode: string;
+  /** Stop words for ranking algorithm (filtered from match scoring) */
+  stopWords: string[];
+  /** Character replacements applied before NFD normalization in ranking (e.g. ß→ss) */
+  characterReplacements: Record<string, string>;
+  /** All scraping-related config */
+  scraping: ScrapingConfig;
+}
+
+// ---------------------------------------------------------------------------
+// Language Configurations
+// ---------------------------------------------------------------------------
+
+const ENGLISH_CONFIG: LanguageConfig = {
+  code: 'en',
+  annasArchiveLang: 'en',
+  epubCode: 'en',
+  stopWords: ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for'],
+  characterReplacements: {},
+  scraping: {
+    audibleLocaleParam: 'english',
+    authorPrefixes: ['By:', 'Written by:'],
+    narratorPrefixes: ['Narrated by:'],
+    lengthLabels: ['Length:'],
+    languageLabels: ['Language:'],
+    releaseDateLabels: ['Release date:'],
+    acceptedLanguageValues: ['english'],
+    runtimeHourPatterns: [/(\d+)\s*hrs?/i, /(\d+)\s*hours?/i],
+    runtimeMinutePatterns: [/(\d+)\s*mins?/i, /(\d+)\s*minutes?/i],
+    ratingPatterns: [/(\d+\.?\d*)\s*out of/i],
+    releaseDatePatterns: [/Release date:\s*(.+)/i],
+    descriptionExcludePatterns: [
+      /\$\d+\.\d+/,
+      /cancel anytime/i,
+      /free trial/i,
+      /membership/i,
+      /subscribe/i,
+      /offer.*ends/i,
+      /^\s*by\s+[\w\s,]+$/i,
+    ],
+    durationDetectionPattern: /\d+\s*(hr|hour|h)\s*\d*\s*(min|minute|m)?/i,
+    ratingTextSelector: 'out of 5 stars',
+  },
+};
+
+const GERMAN_CONFIG: LanguageConfig = {
+  code: 'de',
+  annasArchiveLang: 'de',
+  epubCode: 'de',
+  stopWords: ['der', 'die', 'das', 'ein', 'eine', 'und', 'von', 'zu', 'den', 'dem', 'des'],
+  characterReplacements: { '\u00df': 'ss' },
+  scraping: {
+    audibleLocaleParam: 'deutsch',
+    authorPrefixes: ['Von:', 'Geschrieben von:', 'Autor:'],
+    narratorPrefixes: ['Gesprochen von:', 'Sprecher:'],
+    lengthLabels: ['Spieldauer:', 'Dauer:', 'L\u00e4nge:'],
+    languageLabels: ['Sprache:'],
+    releaseDateLabels: ['Erscheinungsdatum:'],
+    acceptedLanguageValues: ['deutsch', 'german'],
+    runtimeHourPatterns: [/(\d+)\s*Std\.?/i, /(\d+)\s*Stunden?/i],
+    runtimeMinutePatterns: [/(\d+)\s*Min\.?/i, /(\d+)\s*Minuten?/i],
+    ratingPatterns: [/(\d+[.,]?\d*)\s*von\s*5/i],
+    releaseDatePatterns: [/Erscheinungsdatum:\s*(.+)/i],
+    descriptionExcludePatterns: [
+      /\$\d+\.\d+/,
+      /\d+,\d+\s*\u20ac/,
+      /jederzeit k\u00fcndbar/i,
+      /kostenlos testen/i,
+      /Mitgliedschaft/i,
+      /abonnieren/i,
+      /Angebot.*endet/i,
+      /^\s*von\s+[\w\s,]+$/i,
+    ],
+    durationDetectionPattern: /\d+\s*(Std|Stunden?|h)\s*\.?\s*\d*\s*(Min|Minuten?|m)?/i,
+    ratingTextSelector: 'von 5 Sternen',
+  },
+};
+
+const SPANISH_CONFIG: LanguageConfig = {
+  code: 'es',
+  annasArchiveLang: 'es',
+  epubCode: 'es',
+  stopWords: ['el', 'la', 'los', 'las', 'un', 'una', 'de', 'del', 'en', 'y', 'por'],
+  characterReplacements: {},
+  scraping: {
+    audibleLocaleParam: 'espa\u00f1ol',
+    authorPrefixes: ['De:', 'Escrito por:', 'Autor:'],
+    narratorPrefixes: ['Narrado por:'],
+    lengthLabels: ['Duraci\u00f3n:'],
+    languageLabels: ['Idioma:'],
+    releaseDateLabels: ['Fecha de lanzamiento:'],
+    acceptedLanguageValues: ['espa\u00f1ol', 'spanish'],
+    runtimeHourPatterns: [/(\d+)\s*h\b/i, /(\d+)\s*horas?/i],
+    runtimeMinutePatterns: [/(\d+)\s*min/i, /(\d+)\s*minutos?/i],
+    ratingPatterns: [/(\d+[.,]?\d*)\s*de\s*5/i],
+    releaseDatePatterns: [/Fecha de lanzamiento:\s*(.+)/i],
+    descriptionExcludePatterns: [
+      /\$\d+\.\d+/,
+      /\d+,\d+\s*\u20ac/,
+      /cancela cuando quieras/i,
+      /prueba gratis/i,
+      /suscripci\u00f3n/i,
+      /suscr\u00edbete/i,
+      /oferta.*termina/i,
+      /^\s*de\s+[\w\s,]+$/i,
+    ],
+    durationDetectionPattern: /\d+\s*(h|horas?)\s*\d*\s*(min|minutos?)?/i,
+    ratingTextSelector: 'de 5 estrellas',
+  },
+};
+
+// ---------------------------------------------------------------------------
+// Lookup Maps
+// ---------------------------------------------------------------------------
+
+export const LANGUAGE_CONFIGS: Record<SupportedLanguage, LanguageConfig> = {
+  en: ENGLISH_CONFIG,
+  de: GERMAN_CONFIG,
+  es: SPANISH_CONFIG,
+};
+
+/**
+ * Maps Audible region codes to language codes.
+ * All English-speaking regions map to 'en'.
+ */
+export const REGION_LANGUAGE_MAP: Record<AudibleRegion, SupportedLanguage> = {
+  us: 'en',
+  ca: 'en',
+  uk: 'en',
+  au: 'en',
+  in: 'en',
+  de: 'de',
+  es: 'es',
+};
+
+// ---------------------------------------------------------------------------
+// Helper Functions
+// ---------------------------------------------------------------------------
+
+/**
+ * Get the full language configuration for an Audible region.
+ */
+export function getLanguageForRegion(region: AudibleRegion): LanguageConfig {
+  const langCode = REGION_LANGUAGE_MAP[region];
+  return LANGUAGE_CONFIGS[langCode];
+}
+
+/**
+ * Strip any matching prefixes from text (case-insensitive).
+ * Returns the text with the first matching prefix removed, trimmed.
+ *
+ * Example: stripPrefixes('By: Author Name', ['By:', 'Written by:']) => 'Author Name'
+ */
+export function stripPrefixes(text: string, prefixes: string[]): string {
+  const trimmed = text.trim();
+  for (const prefix of prefixes) {
+    if (trimmed.toLowerCase().startsWith(prefix.toLowerCase())) {
+      return trimmed.slice(prefix.length).trim();
+    }
+  }
+  return trimmed;
+}
+
+/**
+ * Build a Cheerio selector that matches any of the given labels using :contains().
+ * Returns a comma-separated selector string.
+ *
+ * Example: buildContainsSelector('span', ['Length:', 'Dauer:'])
+ *   => 'span:contains("Length:"), span:contains("Dauer:")'
+ */
+export function buildContainsSelector(element: string, labels: string[]): string {
+  return labels.map(label => `${element}:contains("${label}")`).join(', ');
+}
+
+/**
+ * Extract a value from text by trying multiple label patterns.
+ * Returns the captured group from the first matching pattern, or null.
+ */
+export function extractByPatterns(text: string, patterns: RegExp[]): string | null {
+  for (const pattern of patterns) {
+    const match = text.match(pattern);
+    if (match?.[1]) {
+      return match[1].trim();
+    }
+  }
+  return null;
+}
+
+/**
+ * Check if a language value matches the accepted values for a language config.
+ * Comparison is case-insensitive.
+ */
+export function isAcceptedLanguage(languageValue: string, config: LanguageConfig): boolean {
+  const normalized = languageValue.toLowerCase().trim();
+  return config.scraping.acceptedLanguageValues.includes(normalized);
+}
@@ -8,6 +8,14 @@ import * as cheerio from 'cheerio';
 import { RMABLogger } from '../utils/logger';
 import { getConfigService } from '../services/config.service';
 import { AudibleRegion, AUDIBLE_REGIONS, DEFAULT_AUDIBLE_REGION } from '../types/audible';
+import {
+  getLanguageForRegion,
+  stripPrefixes,
+  buildContainsSelector,
+  extractByPatterns,
+  isAcceptedLanguage,
+  type LanguageConfig,
+} from '../constants/language-config';
 import {
  pickUserAgent,
  getBrowserHeaders,
@@ -69,6 +77,13 @@ export class AudibleService {
    return this.baseUrl;
  }

+  /**
+   * Get the language config for the current region
+   */
+  private getLangConfig(): LanguageConfig {
+    return getLanguageForRegion(this.region);
+  }
+
  /**
   * Force re-initialization (used when region config changes)
   */
@@ -106,6 +121,9 @@ export class AudibleService {

      logger.info(`Initializing Audible service with region: ${this.region} (${this.baseUrl})`);

+      // Get language config for the region
+      const langConfig = getLanguageForRegion(this.region);
+
      // Create axios client with region-specific base URL and realistic browser headers
      this.client = axios.create({
        baseURL: this.baseUrl,
@@ -113,7 +131,7 @@ export class AudibleService {
        headers: getBrowserHeaders(this.sessionUserAgent),
        params: {
          ipRedirectOverride: 'true', // Prevent IP-based region redirects
-          language: 'english', // Force English locale (prevents IP-based language serving for non-English IPs)
+          language: langConfig.scraping.audibleLocaleParam, // Force locale (prevents IP-based language serving)
        },
      });

@@ -125,13 +143,16 @@ export class AudibleService {
      this.baseUrl = AUDIBLE_REGIONS[this.region].baseUrl;
      this.sessionUserAgent = pickUserAgent();
      this.pacer.reset();
+
+      const fallbackLangConfig = getLanguageForRegion(this.region);
+
      this.client = axios.create({
        baseURL: this.baseUrl,
        timeout: 15000,
        headers: getBrowserHeaders(this.sessionUserAgent),
        params: {
          ipRedirectOverride: 'true',
-          language: 'english',
+          language: fallbackLangConfig.scraping.audibleLocaleParam,
        },
      });
      this.initialized = true;
@@ -289,12 +310,14 @@ export class AudibleService {
          const ratingText = $el.find('.ratingsLabel').text().trim();
          const rating = ratingText ? parseFloat(ratingText.split(' ')[0]) : undefined;

+          const langConfig = this.getLangConfig();
+
          audiobooks.push({
            asin,
            title,
-            author: authorText.replace('By:', '').replace('Written by:', '').trim(),
+            author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
            authorAsin: authorAsinMatch?.[1] || undefined,
-            narrator: narratorText.replace('Narrated by:', '').trim(),
+            narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
            coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
            rating,
          });
@@ -391,12 +414,14 @@ export class AudibleService {
          const ratingText = $el.find('.ratingsLabel').text().trim();
          const rating = ratingText ? parseFloat(ratingText.split(' ')[0]) : undefined;

+          const langConfig = this.getLangConfig();
+
          audiobooks.push({
            asin,
            title,
-            author: authorText.replace('By:', '').replace('Written by:', '').trim(),
+            author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
            authorAsin: authorAsinMatch?.[1] || undefined,
-            narrator: narratorText.replace('Narrated by:', '').trim(),
+            narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
            coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
            rating,
          });
@@ -487,9 +512,11 @@ export class AudibleService {

        const coverArtUrl = $el.find('img').attr('src') || '';

+        const langConfig = this.getLangConfig();
+
        // Extract runtime/duration
        const runtimeText = $el.find('.runtimeLabel').text().trim() ||
-                           $el.find('span:contains("Length:")').text().trim();
+                           $el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
        const durationMinutes = this.parseRuntime(runtimeText);

        // Extract rating
@@ -500,9 +527,9 @@ export class AudibleService {
        audiobooks.push({
          asin,
          title,
-          author: authorText.replace('By:', '').replace('Written by:', '').trim(),
+          author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
          authorAsin: authorAsinMatch?.[1] || undefined,
-          narrator: narratorText.replace('Narrated by:', '').trim(),
+          narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
          coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
          durationMinutes,
          rating,
@@ -565,13 +592,15 @@ export class AudibleService {
        $('.s-result-item, .productListItem').each((_index, element) => {
          const $el = $(element);

-          // --- Language filter: require explicit "English" ---
-          const langText = $el.find('span:contains("Language:")').text().trim() ||
+          // --- Language filter: require matching language for region ---
+          const langConfig = this.getLangConfig();
+          const langText = $el.find(buildContainsSelector('span', langConfig.scraping.languageLabels)).text().trim() ||
                           $el.find('.languageLabel').text().trim();
-          // Extract language value (e.g. "Language: English" → "English")
-          const langMatch = langText.match(/Language:\s*(.+)/i);
+          // Extract language value (e.g. "Language: English" -> "English", "Sprache: Deutsch" -> "Deutsch")
+          const langLabelPattern = new RegExp(`(?:${langConfig.scraping.languageLabels.map(l => l.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('|')})\\s*(.+)`, 'i');
+          const langMatch = langText.match(langLabelPattern);
          const language = langMatch?.[1]?.trim();
-          if (!language || language.toLowerCase() !== 'english') return;
+          if (!language || !isAcceptedLanguage(language, langConfig)) return;

          // --- Author ASIN filter: verify target ASIN in author links ---
          const authorLinks = $el.find('a[href*="/author/"]');
@@ -609,7 +638,7 @@ export class AudibleService {
          const coverArtUrl = $el.find('img').attr('src') || '';

          const runtimeText = $el.find('.runtimeLabel').text().trim() ||
-                              $el.find('span:contains("Length:")').text().trim();
+                              $el.find(buildContainsSelector('span', langConfig.scraping.lengthLabels)).text().trim();
          const durationMinutes = this.parseRuntime(runtimeText);

          const ratingText = $el.find('.ratingsLabel').text().trim() ||
@@ -619,9 +648,9 @@ export class AudibleService {
          allBooks.push({
            asin: bookAsin,
            title,
-            author: authorText.replace('By:', '').replace('Written by:', '').trim(),
+            author: stripPrefixes(authorText, langConfig.scraping.authorPrefixes),
            authorAsin,
-            narrator: narratorText.replace('Narrated by:', '').trim(),
+            narrator: stripPrefixes(narratorText, langConfig.scraping.narratorPrefixes),
            coverArtUrl: coverArtUrl.replace(/\._.*_\./, '._SL500_.'),
            durationMinutes,
            rating,
@@ -867,7 +896,8 @@ export class AudibleService {
          result.author = [...new Set(authors)].slice(0, 3).join(', ');
        }

-        result.author = result.author.replace(/^By:\s*/i, '').replace(/^Written by:\s*/i, '').trim();
+        const authorLangConfig = this.getLangConfig();
+        result.author = stripPrefixes(result.author, authorLangConfig.scraping.authorPrefixes);
        logger.info(` Author from HTML: "${result.author}"`);
      }

@@ -911,22 +941,16 @@ export class AudibleService {
        }

        if (result.narrator) {
-          result.narrator = result.narrator.replace(/^Narrated by:\s*/i, '').trim();
+          const detailLangConfig = this.getLangConfig();
+          result.narrator = stripPrefixes(result.narrator, detailLangConfig.scraping.narratorPrefixes);
        }
        logger.info(` Narrator from HTML: "${result.narrator || ''}"`);
      }

      // Description - try multiple approaches with strict filtering
      if (!result.description) {
-        const excludePatterns = [
-          /\$\d+\.\d+/,  // Price patterns
-          /cancel anytime/i,
-          /free trial/i,
-          /membership/i,
-          /subscribe/i,
-          /offer.*ends/i,
-          /^\s*by\s+[\w\s,]+$/i,  // Just author names
-        ];
+        const descLangConfig = this.getLangConfig();
+        const excludePatterns = descLangConfig.scraping.descriptionExcludePatterns;

        const isValidDescription = (text: string): boolean => {
          if (!text || text.length < 50 || text.length > 5000) return false;
@@ -982,18 +1006,20 @@ export class AudibleService {

      // Runtime/Duration - try multiple approaches
      if (!result.durationMinutes) {
+        const rtLangConfig = this.getLangConfig();
+
        // Look for runtime text in various places
        const runtimeText =
          $('li.runtimeLabel span').text().trim() ||
          $('.runtimeLabel').text().trim() ||
-          $('span:contains("Length:")').parent().text().trim() ||
-          $('li:contains("Length:")').text().trim() ||
+          $(buildContainsSelector('span', rtLangConfig.scraping.lengthLabels)).parent().text().trim() ||
+          $(buildContainsSelector('li', rtLangConfig.scraping.lengthLabels)).text().trim() ||
          (() => {
            // Look for any text matching duration pattern
            let found = '';
            $('li, span, div').each((_, elem) => {
              const text = $(elem).text().trim();
-              if (text.match(/\d+\s*(hr|hour|h)\s*\d*\s*(min|minute|m)?/i) && text.length < 100) {
+              if (text.match(rtLangConfig.scraping.durationDetectionPattern) && text.length < 100) {
                found = text;
                return false; // break
              }
@@ -1007,41 +1033,55 @@ export class AudibleService {

      // Rating - try multiple approaches
      if (!result.rating) {
+        const ratingLangConfig = this.getLangConfig();
        const ratingText =
          $('.ratingsLabel').text().trim() ||
          $('[class*="rating"]').first().text().trim() ||
-          $('span:contains("out of 5 stars")').parent().text().trim() ||
+          $(`span:contains("${ratingLangConfig.scraping.ratingTextSelector}")`).parent().text().trim() ||
          (() => {
-            // Look for rating pattern
+            // Look for rating pattern using language-specific patterns
            let found = '';
            $('span, div').each((_, elem) => {
              const text = $(elem).text().trim();
-              if (text.match(/\d+\.?\d*\s*out of\s*5/i) && text.length < 50) {
-                found = text;
-                return false;
+              if (text.length < 50) {
+                for (const pattern of ratingLangConfig.scraping.ratingPatterns) {
+                  if (pattern.test(text)) {
+                    found = text;
+                    return false;
+                  }
+                }
              }
            });
            return found;
          })();

        if (ratingText) {
-          const ratingMatch = ratingText.match(/(\d+\.?\d*)\s*out of/i);
-          result.rating = ratingMatch ? parseFloat(ratingMatch[1]) : undefined;
+          let ratingValue: number | undefined;
+          for (const pattern of ratingLangConfig.scraping.ratingPatterns) {
+            const ratingMatch = ratingText.match(pattern);
+            if (ratingMatch) {
+              // Handle comma as decimal separator (e.g. "4,5" in German/Spanish)
+              ratingValue = parseFloat(ratingMatch[1].replace(',', '.'));
+              break;
+            }
+          }
+          result.rating = ratingValue;
        }
        logger.info(` Rating from "${ratingText}": ${result.rating}`);
      }

      // Release date - try multiple selectors
      if (!result.releaseDate) {
+        const rdLangConfig = this.getLangConfig();
        const releaseDateText =
-          $('li:contains("Release date:")').text().trim() ||
-          $('span:contains("Release date:")').parent().text().trim() ||
+          $(buildContainsSelector('li', rdLangConfig.scraping.releaseDateLabels)).text().trim() ||
+          $(buildContainsSelector('span', rdLangConfig.scraping.releaseDateLabels)).parent().text().trim() ||
          $('[class*="release"]').text().trim();

-        const dateMatch = releaseDateText.match(/Release date:\s*(.+)/i) ||
-                         releaseDateText.match(/(\w+ \d{1,2},? \d{4})/);
+        const dateMatch = extractByPatterns(releaseDateText, rdLangConfig.scraping.releaseDatePatterns) ||
+                         releaseDateText.match(/(\w+ \d{1,2},? \d{4})/)?.[1];
        if (dateMatch) {
-          result.releaseDate = dateMatch[1].trim();
+          result.releaseDate = dateMatch.trim();
        }
        logger.info(` Release date from "${releaseDateText}": ${result.releaseDate}`);
      }
@@ -1078,20 +1118,30 @@ export class AudibleService {
  }

  /**
-   * Parse runtime text to minutes
+   * Parse runtime text to minutes using language-specific patterns
   */
  private parseRuntime(runtimeText: string): number | undefined {
    if (!runtimeText) return undefined;

-    const hoursMatch = runtimeText.match(/(\d+)\s*hrs?/i);
-    const minutesMatch = runtimeText.match(/(\d+)\s*mins?/i);
-
+    const langConfig = this.getLangConfig();
    let totalMinutes = 0;
-    if (hoursMatch) {
-      totalMinutes += parseInt(hoursMatch[1]) * 60;
+
+    // Try each hour pattern until one matches
+    for (const pattern of langConfig.scraping.runtimeHourPatterns) {
+      const match = runtimeText.match(pattern);
+      if (match) {
+        totalMinutes += parseInt(match[1]) * 60;
+        break;
+      }
    }
-    if (minutesMatch) {
-      totalMinutes += parseInt(minutesMatch[1]);
+
+    // Try each minute pattern until one matches
+    for (const pattern of langConfig.scraping.runtimeMinutePatterns) {
+      const match = runtimeText.match(pattern);
+      if (match) {
+        totalMinutes += parseInt(match[1]);
+        break;
+      }
    }

    return totalMinutes > 0 ? totalMinutes : undefined;
@@ -14,6 +14,8 @@ import { RMABLogger } from '../utils/logger';
 import { getProwlarrService } from '../integrations/prowlarr.service';
 import { rankEbookTorrents, RankedEbookTorrent } from '../utils/ranking-algorithm';
 import { groupIndexersByCategories, getGroupDescription } from '../utils/indexer-grouping';
+import { getLanguageForRegion } from '../constants/language-config';
+import type { AudibleRegion } from '../types/audible';

 // Import ebook scraper functions for Anna's Archive
 import {
@@ -151,6 +153,11 @@ async function searchAnnasArchive(
  const baseUrl = await configService.get('ebook_sidecar_base_url') || 'https://annas-archive.li';
  const flaresolverrUrl = await configService.get('ebook_sidecar_flaresolverr_url') || undefined;

+  // Get language code from Audible region config
+  const region = await configService.getAudibleRegion() as AudibleRegion;
+  const langConfig = getLanguageForRegion(region);
+  const languageCode = langConfig.annasArchiveLang;
+
  if (flaresolverrUrl) {
    logger.info(`Using FlareSolverr at ${flaresolverrUrl}`);
  }
@@ -161,7 +168,7 @@ async function searchAnnasArchive(
  // Try ASIN search first (exact match - best)
  if (audiobook.asin) {
    logger.info(`Searching Anna's Archive by ASIN: ${audiobook.asin} (format: ${preferredFormat})...`);
-    md5 = await searchByAsin(audiobook.asin, preferredFormat, baseUrl, logger, flaresolverrUrl);
+    md5 = await searchByAsin(audiobook.asin, preferredFormat, baseUrl, logger, flaresolverrUrl, languageCode);

    if (md5) {
      logger.info(`Found via ASIN: ${md5}`);
@@ -174,7 +181,7 @@ async function searchAnnasArchive(
  // Fallback to title + author search
  if (!md5) {
    logger.info(`Searching Anna's Archive by title + author: "${audiobook.title}" by ${audiobook.author}...`);
-    md5 = await searchByTitle(audiobook.title, audiobook.author, preferredFormat, baseUrl, logger, flaresolverrUrl);
+    md5 = await searchByTitle(audiobook.title, audiobook.author, preferredFormat, baseUrl, logger, flaresolverrUrl, languageCode);

    if (md5) {
      logger.info(`Found via title search: ${md5}`);
@@ -301,6 +308,10 @@ async function searchIndexers(
    logger.info(`Will filter ${aboveThreshold.length} results > 20 MB (too large for ebooks)`);
  }

+  // Get language-specific stop words for ranking
+  const ebookRegion = await configService.getAudibleRegion() as AudibleRegion;
+  const ebookLangConfig = getLanguageForRegion(ebookRegion);
+
  // Rank results with ebook-specific scoring
  // This filters out > 20MB and uses inverted size scoring
  const rankedResults = rankEbookTorrents(allResults, {
@@ -311,6 +322,8 @@ async function searchIndexers(
    indexerPriorities,
    flagConfigs,
    requireAuthor: true, // Automatic mode - prevent wrong authors
+    stopWords: ebookLangConfig.stopWords,
+    characterReplacements: ebookLangConfig.characterReplacements,
  });

  // Log filter results
@@ -9,6 +9,8 @@ import { getProwlarrService } from '../integrations/prowlarr.service';
 import { getRankingAlgorithm } from '../utils/ranking-algorithm';
 import { groupIndexersByCategories, getGroupDescription } from '../utils/indexer-grouping';
 import { RMABLogger } from '../utils/logger';
+import { getLanguageForRegion } from '../constants/language-config';
+import type { AudibleRegion } from '../types/audible';

 /**
 * Process search indexers job
@@ -146,8 +148,10 @@ export async function processSearchIndexers(payload: SearchIndexersPayload): Pro
      logger.info(`Will filter ${belowThreshold.length} results < ${sizeMBThreshold} MB (likely ebooks)`);
    }

-    // Get ranking algorithm
+    // Get ranking algorithm and language-specific stop words
    const ranker = getRankingAlgorithm();
+    const region = await configService.getAudibleRegion() as AudibleRegion;
+    const langConfig = getLanguageForRegion(region);

    // Rank results with indexer priorities and flag configs
    // Note: rankTorrents now filters out results < 20 MB internally
@@ -159,7 +163,9 @@ export async function processSearchIndexers(payload: SearchIndexersPayload): Pro
    }, {
      indexerPriorities,
      flagConfigs,
-      requireAuthor: true  // Automatic mode - prevent wrong authors
+      requireAuthor: true,  // Automatic mode - prevent wrong authors
+      stopWords: langConfig.stopWords,
+      characterReplacements: langConfig.characterReplacements,
    });

    // Log filter results
@@ -170,7 +170,8 @@ export async function downloadEbook(
  preferredFormat: string = 'epub',
  baseUrl: string = 'https://annas-archive.li',
  logger?: RMABLogger,
-  flaresolverrUrl?: string
+  flaresolverrUrl?: string,
+  languageCode: string = 'en'
 ): Promise<EbookDownloadResult> {
  try {
    let md5: string | null = null;
@@ -183,7 +184,7 @@ export async function downloadEbook(
    // Step 1: Try ASIN search (exact match - best)
    if (asin) {
      await logger?.info(`Searching by ASIN: ${asin} (format: ${preferredFormat})...`);
-      md5 = await searchByAsin(asin, preferredFormat, baseUrl, logger, flaresolverrUrl);
+      md5 = await searchByAsin(asin, preferredFormat, baseUrl, logger, flaresolverrUrl, languageCode);

      if (md5) {
        await logger?.info(`Found via ASIN: ${md5}`);
@@ -195,7 +196,7 @@ export async function downloadEbook(
    // Step 2: Fallback to title + author search
    if (!md5) {
      await logger?.info(`Searching by title + author: "${title}" by ${author}...`);
-      md5 = await searchByTitle(title, author, preferredFormat, baseUrl, logger, flaresolverrUrl);
+      md5 = await searchByTitle(title, author, preferredFormat, baseUrl, logger, flaresolverrUrl, languageCode);

      if (md5) {
        await logger?.info(`Found via title search: ${md5}`);
@@ -312,10 +313,11 @@ export async function searchByAsin(
  format: string,
  baseUrl: string,
  logger?: RMABLogger,
-  flaresolverrUrl?: string
+  flaresolverrUrl?: string,
+  languageCode: string = 'en'
 ): Promise<string | null> {
  // Check cache first
-  const cacheKey = `${asin}-${format}`;
+  const cacheKey = `${asin}-${format}-${languageCode}`;
  if (md5Cache.has(cacheKey)) {
    const cached = md5Cache.get(cacheKey);
    if (cached) {
@@ -327,7 +329,7 @@ export async function searchByAsin(
  try {
    // Build search URL with ASIN and optional format filter
    const formatParam = format && format !== 'any' ? `ext=${format}&` : '';
-    const searchUrl = `${baseUrl}/search?${formatParam}lang=en&q=%22asin:${asin}%22`;
+    const searchUrl = `${baseUrl}/search?${formatParam}lang=${languageCode}&q=%22asin:${asin}%22`;

    moduleLogger.debug(`ASIN search URL: ${searchUrl}`);

@@ -404,10 +406,11 @@ export async function searchByTitle(
  format: string,
  baseUrl: string,
  logger?: RMABLogger,
-  flaresolverrUrl?: string
+  flaresolverrUrl?: string,
+  languageCode: string = 'en'
 ): Promise<string | null> {
  // Check cache first
-  const cacheKey = `title-${title}-${author}-${format}`.toLowerCase();
+  const cacheKey = `title-${title}-${author}-${format}-${languageCode}`.toLowerCase();
  if (md5Cache.has(cacheKey)) {
    const cached = md5Cache.get(cacheKey);
    if (cached) {
@@ -432,8 +435,8 @@ export async function searchByTitle(
    // Add content type filters (books only, all fiction/nonfiction/unknown)
    searchUrl += '&content=book_nonfiction&content=book_fiction&content=book_unknown';

-    // Add language filter (English)
-    searchUrl += '&lang=en';
+    // Add language filter
+    searchUrl += `&lang=${languageCode}`;

    // Empty raw query (we're using specific terms instead)
    searchUrl += '&q=';
@@ -3,6 +3,8 @@
 * Documentation: documentation/integrations/audible.md
 */

+import type { SupportedLanguage } from '../constants/language-config';
+
 export type AudibleRegion = 'us' | 'ca' | 'uk' | 'au' | 'in' | 'de' | 'es';

 export interface AudibleRegionConfig {
@@ -10,7 +12,7 @@ export interface AudibleRegionConfig {
  name: string;
  baseUrl: string;
  audnexusParam: string;
-  isEnglish: boolean;
+  language: SupportedLanguage;
 }

 export const AUDIBLE_REGIONS: Record<AudibleRegion, AudibleRegionConfig> = {
@@ -19,49 +21,49 @@ export const AUDIBLE_REGIONS: Record<AudibleRegion, AudibleRegionConfig> = {
    name: 'United States',
    baseUrl: 'https://www.audible.com',
    audnexusParam: 'us',
-    isEnglish: true,
+    language: 'en',
  },
  ca: {
    code: 'ca',
    name: 'Canada',
    baseUrl: 'https://www.audible.ca',
    audnexusParam: 'ca',
-    isEnglish: true,
+    language: 'en',
  },
  uk: {
    code: 'uk',
    name: 'United Kingdom',
    baseUrl: 'https://www.audible.co.uk',
    audnexusParam: 'uk',
-    isEnglish: true,
+    language: 'en',
  },
  au: {
    code: 'au',
    name: 'Australia',
    baseUrl: 'https://www.audible.com.au',
    audnexusParam: 'au',
-    isEnglish: true,
+    language: 'en',
  },
  in: {
    code: 'in',
    name: 'India',
    baseUrl: 'https://www.audible.in',
    audnexusParam: 'in',
-    isEnglish: true,
+    language: 'en',
  },
  de: {
    code: 'de',
    name: 'Germany',
    baseUrl: 'https://www.audible.de',
    audnexusParam: 'de',
-    isEnglish: false,
+    language: 'de',
  },
  es: {
    code: 'es',
    name: 'Spain',
    baseUrl: 'https://www.audible.es',
    audnexusParam: 'es',
-    isEnglish: false,
+    language: 'es',
  }
 };

@@ -40,6 +40,8 @@ export interface RankTorrentsOptions {
  indexerPriorities?: Map<number, number>;  // indexerId -> priority (1-25)
  flagConfigs?: IndexerFlagConfig[];         // Flag bonus configurations
  requireAuthor?: boolean;                   // Enforce author presence check (default: true)
+  stopWords?: string[];                      // Language-specific stop words for matching
+  characterReplacements?: Record<string, string>;  // Language-specific char replacements (e.g. ß→ss)
 }

 export interface EbookTorrentRequest {
@@ -52,6 +54,8 @@ export interface RankEbookTorrentsOptions {
  indexerPriorities?: Map<number, number>;  // indexerId -> priority (1-25)
  flagConfigs?: IndexerFlagConfig[];         // Flag bonus configurations
  requireAuthor?: boolean;                   // Enforce author presence check (default: true)
+  stopWords?: string[];                      // Language-specific stop words for matching
+  characterReplacements?: Record<string, string>;  // Language-specific char replacements (e.g. ß→ss)
 }

 export interface BonusModifier {
@@ -113,7 +117,9 @@ export class RankingAlgorithm {
    const {
      indexerPriorities,
      flagConfigs,
-      requireAuthor = true  // Safe default: require author in automatic mode
+      requireAuthor = true,  // Safe default: require author in automatic mode
+      stopWords,
+      characterReplacements,
    } = options;
    // Filter out files < 20 MB (likely ebooks/samples)
    const filteredTorrents = torrents.filter((torrent) => {
@@ -126,7 +132,7 @@ export class RankingAlgorithm {
      const formatScore = this.scoreFormat(torrent);
      const sizeScore = this.scoreSize(torrent, audiobook.durationMinutes);
      const seederScore = this.scoreSeeders(torrent.seeders);
-      const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor);
+      const matchScore = this.scoreMatch(torrent, audiobook, requireAuthor, stopWords, characterReplacements);

      const baseScore = formatScore + sizeScore + seederScore + matchScore;

@@ -340,11 +346,22 @@ export class RankingAlgorithm {
   * "Twelve.Months-Jim.Butcher" → "twelve months jim butcher"
   * "Author_Name_Book" → "author name book"
   */
-  private normalizeForMatching(text: string): string {
-    return text
+  private normalizeForMatching(text: string, characterReplacements?: Record<string, string>): string {
+    let result = text
      // Split CamelCase FIRST (before lowercasing): "TheCorrespondent" → "The Correspondent"
      .replace(/([a-z])([A-Z])/g, '$1 $2')
-      .toLowerCase()
+      .toLowerCase();
+    // Apply language-specific character replacements before NFD (e.g. ß→ss)
+    if (characterReplacements) {
+      for (const [from, to] of Object.entries(characterReplacements)) {
+        result = result.replace(new RegExp(from, 'g'), to);
+      }
+    }
+    return result
+      // NFD normalization: convert accented chars to ASCII base forms
+      // e.g. "uber" from "uber", "senor" from "senor", "cafe" from "cafe"
+      .normalize('NFD')
+      .replace(/[\u0300-\u036f]/g, '')
      // Replace underscores with spaces (must be explicit since \w includes _)
      .replace(/_/g, ' ')
      // Replace other punctuation/separators with spaces (preserves apostrophes in contractions)
@@ -362,11 +379,13 @@ export class RankingAlgorithm {
  private scoreMatch(
    torrent: TorrentResult,
    audiobook: AudiobookRequest,
-    requireAuthor: boolean = true
+    requireAuthor: boolean = true,
+    customStopWords?: string[],
+    characterReplacements?: Record<string, string>
  ): number {
-    // Normalize for matching (handles CamelCase, punctuation separators)
-    const torrentTitle = this.normalizeForMatching(torrent.title);
-    const requestTitle = this.normalizeForMatching(audiobook.title);
+    // Normalize for matching (handles CamelCase, punctuation separators, diacritics)
+    const torrentTitle = this.normalizeForMatching(torrent.title, characterReplacements);
+    const requestTitle = this.normalizeForMatching(audiobook.title, characterReplacements);

    // Parse authors from RAW string first (preserving commas for splitting)
    // Then normalize individual authors for matching
@@ -377,19 +396,30 @@ export class RankingAlgorithm {
      .filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));

    // Normalize parsed authors for matching (handles CamelCase in author names)
-    const normalizedAuthors = parsedAuthors.map(a => this.normalizeForMatching(a));
+    const normalizedAuthors = parsedAuthors.map(a => this.normalizeForMatching(a, characterReplacements));
    // Combined normalized author string for fuzzy matching
    const requestAuthorNormalized = normalizedAuthors.join(' ');

    // ========== STAGE 1: WORD COVERAGE FILTER (MANDATORY) ==========
    // Extract significant words (filter out common stop words)
-    const stopWords = ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for'];
+    // Use provided language-specific stop words, or fall back to English defaults
+    const stopWords = customStopWords || ['the', 'a', 'an', 'of', 'on', 'in', 'at', 'by', 'for'];

    const extractWords = (text: string, stopList: string[]): string[] => {
-      return text
+      let processed = text
        // Split CamelCase FIRST: "TheCorrespondent" → "The Correspondent"
        .replace(/([a-z])([A-Z])/g, '$1 $2')
-        .toLowerCase()
+        .toLowerCase();
+      // Apply language-specific character replacements before NFD
+      if (characterReplacements) {
+        for (const [from, to] of Object.entries(characterReplacements)) {
+          processed = processed.replace(new RegExp(from, 'g'), to);
+        }
+      }
+      return processed
+        // NFD normalization for accented characters
+        .normalize('NFD')
+        .replace(/[\u0300-\u036f]/g, '')
        // Replace underscores with spaces (must be explicit since \w includes _)
        .replace(/_/g, ' ')
        // Remove other punctuation (but keep apostrophes for contractions)
@@ -431,7 +461,7 @@ export class RankingAlgorithm {
      }

      // Normalize the required portion (handles CamelCase, punctuation)
-      const required = this.normalizeForMatching(requiredRaw);
+      const required = this.normalizeForMatching(requiredRaw, characterReplacements);
      const optional = optionalMatches.join(' ');

      return { required, optional };
@@ -653,7 +683,7 @@ export class RankingAlgorithm {
   * @param requestAuthor - Raw author string (will be parsed and normalized internally)
   * @returns true if at least ONE author is present with high confidence
   */
-  private checkAuthorPresence(torrentTitle: string, requestAuthor: string): boolean {
+  private checkAuthorPresence(torrentTitle: string, requestAuthor: string, characterReplacements?: Record<string, string>): boolean {
    // Parse multiple authors (same logic as Stage 3 author matching)
    const authors = requestAuthor
      .split(/,|&| and | - /)
@@ -661,7 +691,7 @@ export class RankingAlgorithm {
      .filter(a => a.length > 2 && !['translator', 'narrator'].includes(a));

    // Normalize each author for matching
-    const normalizedAuthors = authors.map(a => this.normalizeForMatching(a));
+    const normalizedAuthors = authors.map(a => this.normalizeForMatching(a, characterReplacements));

    return this.checkAuthorPresenceWithParsed(torrentTitle, normalizedAuthors);
  }
@@ -788,7 +818,9 @@ export class RankingAlgorithm {
    const {
      indexerPriorities,
      flagConfigs,
-      requireAuthor = true  // Safe default: require author in automatic mode
+      requireAuthor = true,  // Safe default: require author in automatic mode
+      stopWords,
+      characterReplacements,
    } = options;

    // Filter out files > 20 MB (too large for ebooks)
@@ -809,7 +841,7 @@ export class RankingAlgorithm {
      const matchScore = this.scoreMatch(torrent, {
        title: ebook.title,
        author: ebook.author,
-      }, requireAuthor);
+      }, requireAuthor, stopWords, characterReplacements);

      const baseScore = formatScore + sizeScore + seederScore + matchScore;