mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-03 04:40:09 +00:00
Add language config and locale-aware parsing
Introduce centralized language configuration and wire locale-aware behavior across scraping and ranking. Adds src/lib/constants/language-config.ts with per-language scraping rules, stop words, and character replacements; replaces AudibleRegion.isEnglish with a language field in types and AUDIBLE_REGIONS. Update AudibleService, ebook scraper, processors, and API routes to use getLanguageForRegion so Anna's Archive searches, scraping selectors, runtime/rating parsing, and ranking use language-specific params and filters. Extend ranking algorithm to accept stopWords and characterReplacements and apply them during normalization and matching. Update UI selects to mark non-English regions and adjust tests accordingly.
This commit is contained in:
@@ -18,6 +18,8 @@ import { findPlexMatch } from '@/lib/utils/audiobook-matcher';
|
||||
import { getAudibleService } from '@/lib/integrations/audible.service';
|
||||
import { RMABLogger } from '@/lib/utils/logger';
|
||||
import { resolveInteractiveSearchAccess } from '@/lib/utils/permissions';
|
||||
import { getLanguageForRegion } from '@/lib/constants/language-config';
|
||||
import type { AudibleRegion } from '@/lib/types/audible';
|
||||
import {
|
||||
searchByAsin,
|
||||
searchByTitle,
|
||||
@@ -227,6 +229,11 @@ export async function POST(
|
||||
const format = preferredFormat || 'epub';
|
||||
const annasBaseUrl = baseUrl || 'https://annas-archive.li';
|
||||
|
||||
// Get language code from Audible region config
|
||||
const region = await configService.getAudibleRegion() as AudibleRegion;
|
||||
const langConfig = getLanguageForRegion(region);
|
||||
const languageCode = langConfig.annasArchiveLang;
|
||||
|
||||
if (!isAnnasArchiveEnabled && !isIndexerSearchEnabled) {
|
||||
return NextResponse.json(
|
||||
{ error: 'No ebook sources enabled. Enable Anna\'s Archive or Indexer Search in settings.' },
|
||||
@@ -250,7 +257,8 @@ export async function POST(
|
||||
audiobook.author,
|
||||
format,
|
||||
annasBaseUrl,
|
||||
flaresolverrUrl || undefined
|
||||
flaresolverrUrl || undefined,
|
||||
languageCode
|
||||
).catch((err) => {
|
||||
logger.error(`Anna's Archive search failed: ${err.message}`);
|
||||
return null;
|
||||
@@ -322,7 +330,8 @@ async function searchAnnasArchiveForInteractive(
|
||||
author: string,
|
||||
preferredFormat: string,
|
||||
baseUrl: string,
|
||||
flaresolverrUrl?: string
|
||||
flaresolverrUrl?: string,
|
||||
languageCode: string = 'en'
|
||||
): Promise<EbookSearchResult[]> {
|
||||
let md5: string | null = null;
|
||||
let searchMethod: 'asin' | 'title' = 'title';
|
||||
@@ -330,7 +339,7 @@ async function searchAnnasArchiveForInteractive(
|
||||
// Try ASIN search first
|
||||
if (asin) {
|
||||
logger.info(`Searching Anna's Archive by ASIN: ${asin}`);
|
||||
md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl);
|
||||
md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
|
||||
if (md5) {
|
||||
searchMethod = 'asin';
|
||||
logger.info(`Found via ASIN: ${md5}`);
|
||||
@@ -340,7 +349,7 @@ async function searchAnnasArchiveForInteractive(
|
||||
// Fallback to title search
|
||||
if (!md5) {
|
||||
logger.info(`Searching Anna's Archive by title: "${title}"`);
|
||||
md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl);
|
||||
md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
|
||||
if (md5) {
|
||||
logger.info(`Found via title: ${md5}`);
|
||||
}
|
||||
@@ -461,6 +470,10 @@ async function searchIndexersForInteractive(
|
||||
return [];
|
||||
}
|
||||
|
||||
// Get language-specific stop words for ranking
|
||||
const rankRegion = await configService.getAudibleRegion() as AudibleRegion;
|
||||
const rankLangConfig = getLanguageForRegion(rankRegion);
|
||||
|
||||
// Rank results with ebook scoring
|
||||
const rankedResults = rankEbookTorrents(allResults, {
|
||||
title,
|
||||
@@ -470,6 +483,8 @@ async function searchIndexersForInteractive(
|
||||
indexerPriorities,
|
||||
flagConfigs,
|
||||
requireAuthor: false,
|
||||
stopWords: rankLangConfig.stopWords,
|
||||
characterReplacements: rankLangConfig.characterReplacements,
|
||||
});
|
||||
|
||||
// Convert to unified result type
|
||||
|
||||
@@ -10,6 +10,8 @@ import { requireAuth, AuthenticatedRequest } from '@/lib/middleware/auth';
|
||||
import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
|
||||
import { rankTorrents } from '@/lib/utils/ranking-algorithm';
|
||||
import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
|
||||
import { getLanguageForRegion } from '@/lib/constants/language-config';
|
||||
import type { AudibleRegion } from '@/lib/types/audible';
|
||||
import { z } from 'zod';
|
||||
import { RMABLogger } from '@/lib/utils/logger';
|
||||
|
||||
@@ -140,13 +142,19 @@ export async function POST(request: NextRequest) {
|
||||
logger.info(`Will filter ${belowThreshold.length} results < ${sizeMBThreshold} MB (likely ebooks)`);
|
||||
}
|
||||
|
||||
// Get language-specific stop words for ranking
|
||||
const region = await configService.getAudibleRegion() as AudibleRegion;
|
||||
const langConfig = getLanguageForRegion(region);
|
||||
|
||||
// Rank torrents using the ranking algorithm with indexer priorities and flag configs
|
||||
// Note: rankTorrents now filters out results < 20 MB internally
|
||||
// requireAuthor: false - interactive search, show all results for user decision
|
||||
const rankedResults = rankTorrents(results, { title, author, durationMinutes }, {
|
||||
indexerPriorities,
|
||||
flagConfigs,
|
||||
requireAuthor: false // Interactive mode - let user decide
|
||||
requireAuthor: false, // Interactive mode - let user decide
|
||||
stopWords: langConfig.stopWords,
|
||||
characterReplacements: langConfig.characterReplacements,
|
||||
});
|
||||
|
||||
// Log filter results
|
||||
|
||||
@@ -14,6 +14,8 @@ import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
|
||||
import { rankEbookTorrents, RankedEbookTorrent } from '@/lib/utils/ranking-algorithm';
|
||||
import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
|
||||
import { RMABLogger } from '@/lib/utils/logger';
|
||||
import { getLanguageForRegion } from '@/lib/constants/language-config';
|
||||
import type { AudibleRegion } from '@/lib/types/audible';
|
||||
import {
|
||||
searchByAsin,
|
||||
searchByTitle,
|
||||
@@ -121,6 +123,11 @@ export async function POST(
|
||||
const format = preferredFormat || 'epub';
|
||||
const annasBaseUrl = baseUrl || 'https://annas-archive.li';
|
||||
|
||||
// Get language code from Audible region config
|
||||
const region = await configService.getAudibleRegion() as AudibleRegion;
|
||||
const langConfig = getLanguageForRegion(region);
|
||||
const languageCode = langConfig.annasArchiveLang;
|
||||
|
||||
if (!isAnnasArchiveEnabled && !isIndexerSearchEnabled) {
|
||||
return NextResponse.json(
|
||||
{ error: 'No ebook sources enabled. Enable Anna\'s Archive or Indexer Search in settings.' },
|
||||
@@ -145,7 +152,8 @@ export async function POST(
|
||||
audiobook.author,
|
||||
format,
|
||||
annasBaseUrl,
|
||||
flaresolverrUrl || undefined
|
||||
flaresolverrUrl || undefined,
|
||||
languageCode
|
||||
).catch((err) => {
|
||||
logger.error(`Anna's Archive search failed: ${err.message}`);
|
||||
return null;
|
||||
@@ -217,7 +225,8 @@ async function searchAnnasArchiveForInteractive(
|
||||
author: string,
|
||||
preferredFormat: string,
|
||||
baseUrl: string,
|
||||
flaresolverrUrl?: string
|
||||
flaresolverrUrl?: string,
|
||||
languageCode: string = 'en'
|
||||
): Promise<EbookSearchResult[]> {
|
||||
let md5: string | null = null;
|
||||
let searchMethod: 'asin' | 'title' = 'title';
|
||||
@@ -225,7 +234,7 @@ async function searchAnnasArchiveForInteractive(
|
||||
// Try ASIN search first
|
||||
if (asin) {
|
||||
logger.info(`Searching Anna's Archive by ASIN: ${asin}`);
|
||||
md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl);
|
||||
md5 = await searchByAsin(asin, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
|
||||
if (md5) {
|
||||
searchMethod = 'asin';
|
||||
logger.info(`Found via ASIN: ${md5}`);
|
||||
@@ -235,7 +244,7 @@ async function searchAnnasArchiveForInteractive(
|
||||
// Fallback to title search
|
||||
if (!md5) {
|
||||
logger.info(`Searching Anna's Archive by title: "${title}"`);
|
||||
md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl);
|
||||
md5 = await searchByTitle(title, author, preferredFormat, baseUrl, undefined, flaresolverrUrl, languageCode);
|
||||
if (md5) {
|
||||
logger.info(`Found via title: ${md5}`);
|
||||
}
|
||||
@@ -356,6 +365,10 @@ async function searchIndexersForInteractive(
|
||||
return [];
|
||||
}
|
||||
|
||||
// Get language-specific stop words for ranking
|
||||
const rankRegion = await configService.getAudibleRegion() as AudibleRegion;
|
||||
const rankLangConfig = getLanguageForRegion(rankRegion);
|
||||
|
||||
// Rank results with ebook scoring
|
||||
// Use requireAuthor=false for interactive mode (let user decide)
|
||||
const rankedResults = rankEbookTorrents(allResults, {
|
||||
@@ -366,6 +379,8 @@ async function searchIndexersForInteractive(
|
||||
indexerPriorities,
|
||||
flagConfigs,
|
||||
requireAuthor: false,
|
||||
stopWords: rankLangConfig.stopWords,
|
||||
characterReplacements: rankLangConfig.characterReplacements,
|
||||
});
|
||||
|
||||
// Log ranking debug info (same format as search-ebook.processor.ts)
|
||||
|
||||
@@ -9,6 +9,8 @@ import { prisma } from '@/lib/db';
|
||||
import { getProwlarrService } from '@/lib/integrations/prowlarr.service';
|
||||
import { rankTorrents } from '@/lib/utils/ranking-algorithm';
|
||||
import { groupIndexersByCategories, getGroupDescription } from '@/lib/utils/indexer-grouping';
|
||||
import { getLanguageForRegion } from '@/lib/constants/language-config';
|
||||
import type { AudibleRegion } from '@/lib/types/audible';
|
||||
import { RMABLogger } from '@/lib/utils/logger';
|
||||
import { resolveInteractiveSearchAccess } from '@/lib/utils/permissions';
|
||||
|
||||
@@ -189,6 +191,10 @@ export async function POST(
|
||||
}
|
||||
}
|
||||
|
||||
// Get language-specific stop words for ranking
|
||||
const region = await configService.getAudibleRegion() as AudibleRegion;
|
||||
const langConfig = getLanguageForRegion(region);
|
||||
|
||||
// Rank torrents using the ranking algorithm with indexer priorities and flag configs
|
||||
// Always use the audiobook's title/author for ranking (not custom search query)
|
||||
// requireAuthor: false - interactive mode, show all results for user decision
|
||||
@@ -199,7 +205,9 @@ export async function POST(
|
||||
}, {
|
||||
indexerPriorities,
|
||||
flagConfigs,
|
||||
requireAuthor: false // Interactive mode - let user decide
|
||||
requireAuthor: false, // Interactive mode - let user decide
|
||||
stopWords: langConfig.stopWords,
|
||||
characterReplacements: langConfig.characterReplacements,
|
||||
});
|
||||
|
||||
// No threshold filtering for interactive search - show all results
|
||||
|
||||
Reference in New Issue
Block a user