Add language config and locale-aware parsing

Introduce centralized language configuration and wire locale-aware behavior across scraping and ranking. Adds src/lib/constants/language-config.ts with per-language scraping rules, stop words, and character replacements; replaces AudibleRegion.isEnglish with a language field in types and AUDIBLE_REGIONS. Update AudibleService, ebook scraper, processors, and API routes to use getLanguageForRegion so Anna's Archive searches, scraping selectors, runtime/rating parsing, and ranking use language-specific params and filters. Extend ranking algorithm to accept stopWords and characterReplacements and apply them during normalization and matching. Update UI selects to mark non-English regions and adjust tests accordingly.
This commit is contained in:
kikootwo
2026-02-20 06:32:44 -05:00
parent c146383735
commit 5d8ac2f73d
18 changed files with 525 additions and 112 deletions
@@ -10,6 +10,8 @@ import { requireAuth, AuthenticatedRequest } from '@/lib/middleware/auth';
import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
import { rankTorrents } from '@/lib/utils/ranking-algorithm';
import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
import { getLanguageForRegion } from '@/lib/constants/language-config';
import type { AudibleRegion } from '@/lib/types/audible';
import { z } from 'zod';
import { RMABLogger } from '@/lib/utils/logger';
@@ -140,13 +142,19 @@ export async function POST(request: NextRequest) {
logger.info(`Will filter ${belowThreshold.length} results < ${sizeMBThreshold} MB (likely ebooks)`);
}
// Get language-specific stop words for ranking
const region = await configService.getAudibleRegion() as AudibleRegion;
const langConfig = getLanguageForRegion(region);
// Rank torrents using the ranking algorithm with indexer priorities and flag configs
// Note: rankTorrents now filters out results < 20 MB internally
// requireAuthor: false - interactive search, show all results for user decision
const rankedResults = rankTorrents(results, { title, author, durationMinutes }, {
indexerPriorities,
flagConfigs,
requireAuthor: false // Interactive mode - let user decide
requireAuthor: false, // Interactive mode - let user decide
stopWords: langConfig.stopWords,
characterReplacements: langConfig.characterReplacements,
});
// Log filter results