Add language config and locale-aware parsing

Introduce centralized language configuration and wire locale-aware behavior across scraping and ranking. Adds src/lib/constants/language-config.ts with per-language scraping rules, stop words, and character replacements; replaces AudibleRegion.isEnglish with a language field in types and AUDIBLE_REGIONS. Update AudibleService, ebook scraper, processors, and API routes to use getLanguageForRegion so Anna's Archive searches, scraping selectors, runtime/rating parsing, and ranking use language-specific params and filters. Extend ranking algorithm to accept stopWords and characterReplacements and apply them during normalization and matching. Update UI selects to mark non-English regions and adjust tests accordingly.
This commit is contained in:
kikootwo
2026-02-20 06:32:44 -05:00
parent c146383735
commit 5d8ac2f73d
18 changed files with 525 additions and 112 deletions
@@ -9,6 +9,8 @@ import { getProwlarrService } from '../integrations/prowlarr.service';
import { getRankingAlgorithm } from '../utils/ranking-algorithm';
import { groupIndexersByCategories, getGroupDescription } from '../utils/indexer-grouping';
import { RMABLogger } from '../utils/logger';
import { getLanguageForRegion } from '../constants/language-config';
import type { AudibleRegion } from '../types/audible';
/**
* Process search indexers job
@@ -146,8 +148,10 @@ export async function processSearchIndexers(payload: SearchIndexersPayload): Pro
logger.info(`Will filter ${belowThreshold.length} results < ${sizeMBThreshold} MB (likely ebooks)`);
}
// Get ranking algorithm
// Get ranking algorithm and language-specific stop words
const ranker = getRankingAlgorithm();
const region = await configService.getAudibleRegion() as AudibleRegion;
const langConfig = getLanguageForRegion(region);
// Rank results with indexer priorities and flag configs
// Note: rankTorrents now filters out results < 20 MB internally
@@ -159,7 +163,9 @@ export async function processSearchIndexers(payload: SearchIndexersPayload): Pro
}, {
indexerPriorities,
flagConfigs,
requireAuthor: true // Automatic mode - prevent wrong authors
requireAuthor: true, // Automatic mode - prevent wrong authors
stopWords: langConfig.stopWords,
characterReplacements: langConfig.characterReplacements,
});
// Log filter results