From 35cb3183898421838f02edfd93bf63a5ab119984 Mon Sep 17 00:00:00 2001 From: Mattias Carlsson Date: Fri, 10 Apr 2026 10:22:01 +0200 Subject: [PATCH 1/5] Fix bulk import: group tagless files by folder, use folder name as search fallback --- documentation/features/bulk-import.md | 13 +- src/app/api/admin/bulk-import/scan/route.ts | 25 +++- .../admin/bulk-import/MatchReviewStep.tsx | 7 +- src/components/admin/bulk-import/types.ts | 4 +- src/lib/utils/bulk-import-scanner.ts | 136 +++++++++++++----- 5 files changed, 140 insertions(+), 45 deletions(-) diff --git a/documentation/features/bulk-import.md b/documentation/features/bulk-import.md index 6b1c0f9..a6e6404 100644 --- a/documentation/features/bulk-import.md +++ b/documentation/features/bulk-import.md @@ -13,9 +13,13 @@ Lets admins scan a server folder recursively, discover audiobook subfolders, mat ## Key Details - **Access:** Admin-only, modal opened from admin dashboard Quick Actions - **Audio detection:** Uses `AUDIO_EXTENSIONS` from `src/lib/constants/audio-formats.ts` -- **Audiobook boundary:** A folder containing audio files = one audiobook; subfolders not scanned further -- **Metadata extraction:** ffprobe reads `album` (title), `album_artist` (author), `composer` (narrator) from first audio file -- **Fallback:** If metadata tags are empty, folder name used as search term; "Low Confidence" badge shown +- **Audiobook boundary:** A folder containing audio files = one audiobook. Files with matching metadata tags are grouped by title+author+narrator. Files with no metadata title tag are all grouped together per folder (one entry, not one per file). +- **Metadata extraction:** ffprobe reads `album` (title), `album_artist` (author), `composer` (narrator) from all audio files in folder +- **Search term fallback chain** (when no `album` tag): + 1. **ASIN in folder name** — scans folder name for pattern `B[A-Z0-9]{9}` bounded by bracket/paren/space; if found, uses direct ASIN lookup instead of text search; no badge shown + 2. **Folder name** — cleaned (strips bracketed ASIN/year, underscores→spaces); skipped if generic (CD1, Disc 2, Part 3, Vol 1, etc.); shows "Low Confidence" badge + 3. **First file name** — last resort; shows "Low Confidence" badge +- **Generic folder detection:** `/^(cd|disc|disk|part|vol(ume)?)\s*\d+$/i` — these names are skipped as search terms - **Author/narrator dedup:** Splits on `,;& ` delimiters, removes names appearing in both fields - **Scan depth:** Max 10 levels recursion - **Rate limiting:** 1.5s delay between Audible searches (same as existing scraping rate limit) @@ -56,7 +60,8 @@ Lets admins scan a server folder recursively, discover audiobook subfolders, mat | Already in library | 40% opacity, green "In Library" badge, toggle disabled | | Active request exists | 40% opacity, purple "Requested" badge, toggle disabled | | No Audible match | Red "No Match" badge, folder name shown, pre-skipped | -| Low confidence (folder name fallback) | Amber "Low Confidence" badge | +| ASIN extracted from folder name | No badge (high confidence — direct ASIN lookup) | +| Low confidence (folder name or file name fallback, no ASIN) | Amber "Low Confidence" badge | ## Files diff --git a/src/app/api/admin/bulk-import/scan/route.ts b/src/app/api/admin/bulk-import/scan/route.ts index a74e217..c3f52f7 100644 --- a/src/app/api/admin/bulk-import/scan/route.ts +++ b/src/app/api/admin/bulk-import/scan/route.ts @@ -159,7 +159,29 @@ export async function POST(request: NextRequest) { let hasActiveRequest = false; try { - const searchResult = await audibleService.search(book.searchTerm); + // If the scanner extracted an ASIN directly from the folder name, + // try an exact ASIN lookup first — faster and more accurate than + // a text search. Fall back to text search if it fails or returns + // no result. + let searchResult: Awaited> | null = null; + + if (book.extractedAsin) { + try { + const asinResult = await audibleService.search(book.extractedAsin); + if ( + asinResult.results.length > 0 && + asinResult.results[0].asin === book.extractedAsin + ) { + searchResult = asinResult; + } + } catch { + /* ASIN lookup failed — fall through to text search */ + } + } + + if (!searchResult) { + searchResult = await audibleService.search(book.searchTerm); + } if (searchResult.results.length > 0) { match = searchResult.results[0]; @@ -208,6 +230,7 @@ export async function POST(request: NextRequest) { audioFileCount: book.audioFileCount, totalSizeBytes: book.totalSizeBytes, metadataSource: book.metadataSource, + extractedAsin: book.extractedAsin, searchTerm: book.searchTerm, audioFiles: book.audioFiles, match: match diff --git a/src/components/admin/bulk-import/MatchReviewStep.tsx b/src/components/admin/bulk-import/MatchReviewStep.tsx index 5bce97c..e51458c 100644 --- a/src/components/admin/bulk-import/MatchReviewStep.tsx +++ b/src/components/admin/bulk-import/MatchReviewStep.tsx @@ -39,7 +39,12 @@ function BookRow({ const isDisabled = book.inLibrary || book.hasActiveRequest; const isSkipped = book.skipped; const hasMatch = book.match !== null; - const isLowConfidence = book.metadataSource === 'file_name'; + // Low confidence when search term came from a filename or folder name fallback, + // BUT not when an ASIN was extracted directly from the folder name (that's a + // direct lookup and is as reliable as embedded metadata tags). + const isLowConfidence = + (book.metadataSource === 'file_name' || book.metadataSource === 'folder_name') && + !book.extractedAsin; return (
( * Group audio files in a directory by their metadata. * Reads metadata from all files using a concurrency pool, then groups them * by a normalized key of title + author + narrator. - * Files with no metadata title each become their own group. + * + * Files with a metadata title are grouped by their shared key. Files with no + * metadata title are all grouped together under a single '__ungrouped_folder' + * key (rather than one entry per file), treating the folder as one book. + * If a folder contains both tagged and untagged files, the untagged files form + * one extra group alongside the tagged groups. */ async function groupAudioFilesByMetadata( dirPath: string, audioFiles: string[], - audioSizes: Map + audioSizes: Map, + folderName: string ): Promise> { @@ -291,14 +337,12 @@ async function groupAudioFilesByMetadata( metadata: AudioFileMetadata; }>(); - let ungroupedCounter = 0; - for (const { fileName, metadata } of metadataResults) { const key = buildGroupingKey(metadata); const fileSize = audioSizes.get(fileName) || 0; if (key) { - // Has metadata — group with others sharing the same key + // Has metadata title — group with others sharing the same key const existing = groups.get(key); if (existing) { existing.files.push(fileName); @@ -311,20 +355,28 @@ async function groupAudioFilesByMetadata( }); } } else { - // No title metadata — treat as individual book - const uniqueKey = `__ungrouped_${ungroupedCounter++}`; - groups.set(uniqueKey, { - files: [fileName], - totalSize: fileSize, - metadata, - }); + // No title metadata — collect all such files under one folder-level group. + // Key must start with '__ungrouped_' so deduplicateDiscoveries treats it + // as unique per folder (prefixes it with folderPath before deduplication). + const ungroupedKey = '__ungrouped_folder'; + const existing = groups.get(ungroupedKey); + if (existing) { + existing.files.push(fileName); + existing.totalSize += fileSize; + } else { + groups.set(ungroupedKey, { + files: [fileName], + totalSize: fileSize, + metadata, + }); + } } } // Build result with search terms return Array.from(groups.entries()).map(([groupingKey, group]) => { group.files.sort((a, b) => a.localeCompare(b)); - const { searchTerm, source } = buildSearchTerm(group.metadata, group.files[0]); + const { searchTerm, source } = buildSearchTerm(group.metadata, group.files[0], folderName); return { files: group.files, totalSize: group.totalSize, @@ -398,6 +450,7 @@ function deduplicateDiscoveries( metadata: first.metadata, searchTerm: first.searchTerm, metadataSource: first.metadataSource, + extractedAsin: first.extractedAsin, audioFiles: combinedFiles, groupingKey: first.groupingKey, }); @@ -434,9 +487,10 @@ function findCommonParent(paths: string[]): string { * * Scans every folder for audio files. When audio files are found, they are * grouped by metadata (title + author + narrator) — each group becomes a - * separate discovered audiobook. Files with no metadata are treated as - * individual books. Scanning ALWAYS recurses into subfolders regardless of - * whether the current folder has audio files. + * separate discovered audiobook. Files with no metadata are all grouped + * together per folder (treated as one book) rather than one entry per file. + * Scanning ALWAYS recurses into subfolders regardless of whether the current + * folder has audio files. * * After the full walk, discoveries sharing the same grouping key across * different folders (e.g., CD1/ and CD2/) are merged. @@ -460,11 +514,13 @@ export async function discoverAudiobooks( foldersScanned++; + const folderName = path.basename(currentPath); + onProgress?.({ phase: 'discovering', foldersScanned, audiobooksFound: results.length, - currentFolder: path.basename(currentPath), + currentFolder: folderName, }); // Check if this folder contains audio files @@ -486,19 +542,22 @@ export async function discoverAudiobooks( phase: 'grouping', foldersScanned, audiobooksFound: results.length, - currentFolder: path.basename(currentPath), + currentFolder: folderName, }); - // Group audio files by metadata + // Group audio files by metadata, passing folder name for fallback search terms const groups = await groupAudioFilesByMetadata( currentPath, audioResult.audioFiles, - audioSizes + audioSizes, + folderName ); - const folderName = path.basename(currentPath); const relativePath = path.relative(rootPath, currentPath).replace(/\\/g, '/'); + // Extract ASIN from folder name once for all groups in this folder + const extractedAsin = extractAsinFromString(folderName) ?? undefined; + for (const group of groups) { results.push({ folderPath: currentPath.replace(/\\/g, '/'), @@ -509,6 +568,7 @@ export async function discoverAudiobooks( metadata: group.metadata, searchTerm: group.searchTerm, metadataSource: group.metadataSource, + extractedAsin, audioFiles: group.files, groupingKey: group.groupingKey, }); @@ -518,7 +578,7 @@ export async function discoverAudiobooks( phase: 'reading_metadata', foldersScanned, audiobooksFound: results.length, - currentFolder: path.basename(currentPath), + currentFolder: folderName, }); } From ad1ab3af05ef63e897873fc7729fe874b7944e36 Mon Sep 17 00:00:00 2001 From: Mattias Carlsson Date: Sun, 19 Apr 2026 21:14:14 +0200 Subject: [PATCH 2/5] Better searching when using ASIN from folder names. --- src/app/api/admin/bulk-import/scan/route.ts | 26 +++++++++------------ 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/src/app/api/admin/bulk-import/scan/route.ts b/src/app/api/admin/bulk-import/scan/route.ts index c3f52f7..834865c 100644 --- a/src/app/api/admin/bulk-import/scan/route.ts +++ b/src/app/api/admin/bulk-import/scan/route.ts @@ -160,31 +160,27 @@ export async function POST(request: NextRequest) { try { // If the scanner extracted an ASIN directly from the folder name, - // try an exact ASIN lookup first — faster and more accurate than - // a text search. Fall back to text search if it fails or returns - // no result. - let searchResult: Awaited> | null = null; - + // use a direct ASIN lookup (Audnexus API) — more reliable than a + // keyword text search. Fall back to text search if the lookup fails. if (book.extractedAsin) { try { - const asinResult = await audibleService.search(book.extractedAsin); - if ( - asinResult.results.length > 0 && - asinResult.results[0].asin === book.extractedAsin - ) { - searchResult = asinResult; + const asinResult = await audibleService.getAudiobookDetails(book.extractedAsin); + if (asinResult) { + match = asinResult; } } catch { /* ASIN lookup failed — fall through to text search */ } } - if (!searchResult) { - searchResult = await audibleService.search(book.searchTerm); + if (!match) { + const searchResult = await audibleService.search(book.searchTerm); + if (searchResult.results.length > 0) { + match = searchResult.results[0]; + } } - if (searchResult.results.length > 0) { - match = searchResult.results[0]; + if (match) { // Check library availability const plexMatch = await findPlexMatch({ From 9a6062d86053d4e858a1f35f9e22422698ffcfb8 Mon Sep 17 00:00:00 2001 From: Mattias Carlsson Date: Sun, 19 Apr 2026 21:53:28 +0200 Subject: [PATCH 3/5] Decreased audible retries when doing manual imports. --- src/app/api/admin/bulk-import/scan/route.ts | 2 +- src/lib/integrations/audible.service.ts | 55 ++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/app/api/admin/bulk-import/scan/route.ts b/src/app/api/admin/bulk-import/scan/route.ts index 834865c..aaacca8 100644 --- a/src/app/api/admin/bulk-import/scan/route.ts +++ b/src/app/api/admin/bulk-import/scan/route.ts @@ -164,7 +164,7 @@ export async function POST(request: NextRequest) { // keyword text search. Fall back to text search if the lookup fails. if (book.extractedAsin) { try { - const asinResult = await audibleService.getAudiobookDetails(book.extractedAsin); + const asinResult = await audibleService.lookupAsinFast(book.extractedAsin); if (asinResult) { match = asinResult; } diff --git a/src/lib/integrations/audible.service.ts b/src/lib/integrations/audible.service.ts index bc1bd90..157dfad 100644 --- a/src/lib/integrations/audible.service.ts +++ b/src/lib/integrations/audible.service.ts @@ -738,6 +738,56 @@ export class AudibleService { } } + /** + * Fast ASIN lookup for bulk import contexts. + * Tries Audnexus first (5s timeout, 1 retry), then Audible scraping once (0 retries). + * Fails fast so the caller can fall back to a text search quickly. + */ + async lookupAsinFast(asin: string): Promise { + await this.initialize(); + + // 1. Try Audnexus with tight limits + try { + const audnexusRegion = AUDIBLE_REGIONS[this.region].audnexusParam; + const response = await this.externalFetchWithRetry( + `https://api.audnex.us/books/${asin}`, + { + params: { region: audnexusRegion }, + timeout: 5000, + headers: { 'User-Agent': 'ReadMeABook/1.0' }, + }, + 1 // 1 retry max + ); + const data = response.data; + const result: AudibleAudiobook = { + asin, + title: data.title || '', + author: data.authors?.map((a: any) => a.name).join(', ') || '', + authorAsin: data.authors?.[0]?.asin || undefined, + narrator: data.narrators?.map((n: any) => n.name).join(', ') || '', + description: data.description || data.summary || '', + coverArtUrl: data.image || '', + durationMinutes: data.runtimeLengthMin ? parseInt(data.runtimeLengthMin) : undefined, + releaseDate: data.releaseDate || undefined, + rating: data.rating ? parseFloat(data.rating) : undefined, + genres: data.genres?.map((g: any) => typeof g === 'string' ? g : g.name).slice(0, 5) || undefined, + series: data.seriesPrimary?.name || undefined, + seriesPart: data.seriesPrimary?.position || undefined, + seriesAsin: data.seriesPrimary?.asin || undefined, + }; + if (result.coverArtUrl && !result.coverArtUrl.includes('_SL500_')) { + result.coverArtUrl = result.coverArtUrl.replace(/\._.*_\./, '._SL500_.'); + } + logger.info(` lookupAsinFast: Audnexus hit for "${result.title}" (${asin})`); + return result; + } catch { + logger.debug(` lookupAsinFast: Audnexus miss for ${asin}, trying Audible scraping...`); + } + + // 2. Try Audible scraping once — no retries + return await this.scrapeAudibleDetails(asin, 0); + } + /** * Fetch audiobook details from Audnexus API */ @@ -807,14 +857,15 @@ export class AudibleService { /** * Scrape audiobook details from Audible (fallback method) + * @param maxRetries - Maximum retry attempts (default 5). Pass 0 for a single attempt with no retries. */ - private async scrapeAudibleDetails(asin: string): Promise { + private async scrapeAudibleDetails(asin: string, maxRetries: number = 5): Promise { try { const { data: response } = await this.fetchWithRetry(`/pd/${asin}`, { params: { ipRedirectOverride: 'true', // Explicitly include to prevent IP-based region redirects }, - }); + }, maxRetries); const $ = cheerio.load(response.data); // Initialize result object From 7b01cda9558362fde3667c9d74fe34bb913b5606 Mon Sep 17 00:00:00 2001 From: Mattias Carlsson Date: Sun, 19 Apr 2026 22:03:45 +0200 Subject: [PATCH 4/5] Fix bulk import: merge untagged files into single tagged group per folder --- src/lib/utils/bulk-import-scanner.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/lib/utils/bulk-import-scanner.ts b/src/lib/utils/bulk-import-scanner.ts index 09d50ed..9a2da4e 100644 --- a/src/lib/utils/bulk-import-scanner.ts +++ b/src/lib/utils/bulk-import-scanner.ts @@ -373,6 +373,23 @@ async function groupAudioFilesByMetadata( } } + // If there is exactly one tagged group alongside an ungrouped group, absorb + // the untagged files into the tagged group. Untagged files in the same folder + // almost certainly belong to the same book (e.g. one chapter was ripped + // without tags, or a cover/intro file carries different metadata). + // Only do this when there is a single tagged group — multiple tagged groups + // mean genuinely different books are mixed in the folder, so keep them separate. + const ungrouped = groups.get('__ungrouped_folder'); + if (ungrouped) { + const taggedKeys = Array.from(groups.keys()).filter((k) => k !== '__ungrouped_folder'); + if (taggedKeys.length === 1) { + const taggedGroup = groups.get(taggedKeys[0])!; + taggedGroup.files.push(...ungrouped.files); + taggedGroup.totalSize += ungrouped.totalSize; + groups.delete('__ungrouped_folder'); + } + } + // Build result with search terms return Array.from(groups.entries()).map(([groupingKey, group]) => { group.files.sort((a, b) => a.localeCompare(b)); From c9392c49c969c4652ddbabd59c761fced97c3808 Mon Sep 17 00:00:00 2001 From: Mattias Carlsson Date: Sun, 19 Apr 2026 22:09:46 +0200 Subject: [PATCH 5/5] If ASIN lookup fails, use the folder name instead of the tag. --- src/app/api/admin/bulk-import/scan/route.ts | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/app/api/admin/bulk-import/scan/route.ts b/src/app/api/admin/bulk-import/scan/route.ts index aaacca8..9c98b19 100644 --- a/src/app/api/admin/bulk-import/scan/route.ts +++ b/src/app/api/admin/bulk-import/scan/route.ts @@ -174,7 +174,21 @@ export async function POST(request: NextRequest) { } if (!match) { - const searchResult = await audibleService.search(book.searchTerm); + // When an ASIN was extracted from the folder name but the direct + // lookup failed, prefer the folder name as the text search term + // over book.searchTerm. book.searchTerm may come from a single + // tagged file whose album tag is unreliable (e.g. a series name + // or intro track), whereas the folder name is the human-assigned + // title and is more likely to be accurate. + const textSearchTerm = book.extractedAsin + ? book.folderName + .replace(/[\[\(][A-Z0-9]{10}[\]\)]/g, '') // strip ASIN + .replace(/[\[\(]\d{4}[\]\)]/g, '') // strip year + .replace(/[_]/g, ' ') + .replace(/\s+/g, ' ') + .trim() + : book.searchTerm; + const searchResult = await audibleService.search(textSearchTerm); if (searchResult.results.length > 0) { match = searchResult.results[0]; }