Add language config and locale-aware parsing

Introduce centralized language configuration and wire locale-aware behavior across scraping and ranking. Adds src/lib/constants/language-config.ts with per-language scraping rules, stop words, and character replacements; replaces AudibleRegion.isEnglish with a language field in types and AUDIBLE_REGIONS. Update AudibleService, ebook scraper, processors, and API routes to use getLanguageForRegion so Anna's Archive searches, scraping selectors, runtime/rating parsing, and ranking use language-specific params and filters. Extend ranking algorithm to accept stopWords and characterReplacements and apply them during normalization and matching. Update UI selects to mark non-English regions and adjust tests accordingly.
This commit is contained in:
kikootwo
2026-02-20 06:32:44 -05:00
parent c146383735
commit 5d8ac2f73d
18 changed files with 525 additions and 112 deletions
@@ -9,6 +9,8 @@ import { prisma } from '@/lib/db';
import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
import { rankTorrents } from '@/lib/utils/ranking-algorithm';
import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
import { getLanguageForRegion } from '@/lib/constants/language-config';
import type { AudibleRegion } from '@/lib/types/audible';
import { RMABLogger } from '@/lib/utils/logger';
import { resolveInteractiveSearchAccess } from '@/lib/utils/permissions';
@@ -189,6 +191,10 @@ export async function POST(
}
}
// Get language-specific stop words for ranking
const region = await configService.getAudibleRegion() as AudibleRegion;
const langConfig = getLanguageForRegion(region);
// Rank torrents using the ranking algorithm with indexer priorities and flag configs
// Always use the audiobook's title/author for ranking (not custom search query)
// requireAuthor: false - interactive mode, show all results for user decision
@@ -199,7 +205,9 @@ export async function POST(
}, {
indexerPriorities,
flagConfigs,
requireAuthor: false // Interactive mode - let user decide
requireAuthor: false, // Interactive mode - let user decide
stopWords: langConfig.stopWords,
characterReplacements: langConfig.characterReplacements,
});
// No threshold filtering for interactive search - show all results