diff --git a/documentation/features/bulk-import.md b/documentation/features/bulk-import.md index 6b1c0f9..a6e6404 100644 --- a/documentation/features/bulk-import.md +++ b/documentation/features/bulk-import.md @@ -13,9 +13,13 @@ Lets admins scan a server folder recursively, discover audiobook subfolders, mat ## Key Details - **Access:** Admin-only, modal opened from admin dashboard Quick Actions - **Audio detection:** Uses `AUDIO_EXTENSIONS` from `src/lib/constants/audio-formats.ts` -- **Audiobook boundary:** A folder containing audio files = one audiobook; subfolders not scanned further -- **Metadata extraction:** ffprobe reads `album` (title), `album_artist` (author), `composer` (narrator) from first audio file -- **Fallback:** If metadata tags are empty, folder name used as search term; "Low Confidence" badge shown +- **Audiobook boundary:** A folder containing audio files = one audiobook. Files with matching metadata tags are grouped by title+author+narrator. Files with no metadata title tag are all grouped together per folder (one entry, not one per file). +- **Metadata extraction:** ffprobe reads `album` (title), `album_artist` (author), `composer` (narrator) from all audio files in folder +- **Search term fallback chain** (when no `album` tag): + 1. **ASIN in folder name** — scans folder name for pattern `B[A-Z0-9]{9}` bounded by bracket/paren/space; if found, uses direct ASIN lookup instead of text search; no badge shown + 2. **Folder name** — cleaned (strips bracketed ASIN/year, underscores→spaces); skipped if generic (CD1, Disc 2, Part 3, Vol 1, etc.); shows "Low Confidence" badge + 3. **First file name** — last resort; shows "Low Confidence" badge +- **Generic folder detection:** `/^(cd|disc|disk|part|vol(ume)?)\s*\d+$/i` — these names are skipped as search terms - **Author/narrator dedup:** Splits on `,;& ` delimiters, removes names appearing in both fields - **Scan depth:** Max 10 levels recursion - **Rate limiting:** 1.5s delay between Audible searches (same as existing scraping rate limit) @@ -56,7 +60,8 @@ Lets admins scan a server folder recursively, discover audiobook subfolders, mat | Already in library | 40% opacity, green "In Library" badge, toggle disabled | | Active request exists | 40% opacity, purple "Requested" badge, toggle disabled | | No Audible match | Red "No Match" badge, folder name shown, pre-skipped | -| Low confidence (folder name fallback) | Amber "Low Confidence" badge | +| ASIN extracted from folder name | No badge (high confidence — direct ASIN lookup) | +| Low confidence (folder name or file name fallback, no ASIN) | Amber "Low Confidence" badge | ## Files diff --git a/src/app/api/admin/bulk-import/scan/route.ts b/src/app/api/admin/bulk-import/scan/route.ts index a74e217..c3f52f7 100644 --- a/src/app/api/admin/bulk-import/scan/route.ts +++ b/src/app/api/admin/bulk-import/scan/route.ts @@ -159,7 +159,29 @@ export async function POST(request: NextRequest) { let hasActiveRequest = false; try { - const searchResult = await audibleService.search(book.searchTerm); + // If the scanner extracted an ASIN directly from the folder name, + // try an exact ASIN lookup first — faster and more accurate than + // a text search. Fall back to text search if it fails or returns + // no result. + let searchResult: Awaited> | null = null; + + if (book.extractedAsin) { + try { + const asinResult = await audibleService.search(book.extractedAsin); + if ( + asinResult.results.length > 0 && + asinResult.results[0].asin === book.extractedAsin + ) { + searchResult = asinResult; + } + } catch { + /* ASIN lookup failed — fall through to text search */ + } + } + + if (!searchResult) { + searchResult = await audibleService.search(book.searchTerm); + } if (searchResult.results.length > 0) { match = searchResult.results[0]; @@ -208,6 +230,7 @@ export async function POST(request: NextRequest) { audioFileCount: book.audioFileCount, totalSizeBytes: book.totalSizeBytes, metadataSource: book.metadataSource, + extractedAsin: book.extractedAsin, searchTerm: book.searchTerm, audioFiles: book.audioFiles, match: match diff --git a/src/components/admin/bulk-import/MatchReviewStep.tsx b/src/components/admin/bulk-import/MatchReviewStep.tsx index 5bce97c..e51458c 100644 --- a/src/components/admin/bulk-import/MatchReviewStep.tsx +++ b/src/components/admin/bulk-import/MatchReviewStep.tsx @@ -39,7 +39,12 @@ function BookRow({ const isDisabled = book.inLibrary || book.hasActiveRequest; const isSkipped = book.skipped; const hasMatch = book.match !== null; - const isLowConfidence = book.metadataSource === 'file_name'; + // Low confidence when search term came from a filename or folder name fallback, + // BUT not when an ASIN was extracted directly from the folder name (that's a + // direct lookup and is as reliable as embedded metadata tags). + const isLowConfidence = + (book.metadataSource === 'file_name' || book.metadataSource === 'folder_name') && + !book.extractedAsin; return (
( * Group audio files in a directory by their metadata. * Reads metadata from all files using a concurrency pool, then groups them * by a normalized key of title + author + narrator. - * Files with no metadata title each become their own group. + * + * Files with a metadata title are grouped by their shared key. Files with no + * metadata title are all grouped together under a single '__ungrouped_folder' + * key (rather than one entry per file), treating the folder as one book. + * If a folder contains both tagged and untagged files, the untagged files form + * one extra group alongside the tagged groups. */ async function groupAudioFilesByMetadata( dirPath: string, audioFiles: string[], - audioSizes: Map + audioSizes: Map, + folderName: string ): Promise> { @@ -291,14 +337,12 @@ async function groupAudioFilesByMetadata( metadata: AudioFileMetadata; }>(); - let ungroupedCounter = 0; - for (const { fileName, metadata } of metadataResults) { const key = buildGroupingKey(metadata); const fileSize = audioSizes.get(fileName) || 0; if (key) { - // Has metadata — group with others sharing the same key + // Has metadata title — group with others sharing the same key const existing = groups.get(key); if (existing) { existing.files.push(fileName); @@ -311,20 +355,28 @@ async function groupAudioFilesByMetadata( }); } } else { - // No title metadata — treat as individual book - const uniqueKey = `__ungrouped_${ungroupedCounter++}`; - groups.set(uniqueKey, { - files: [fileName], - totalSize: fileSize, - metadata, - }); + // No title metadata — collect all such files under one folder-level group. + // Key must start with '__ungrouped_' so deduplicateDiscoveries treats it + // as unique per folder (prefixes it with folderPath before deduplication). + const ungroupedKey = '__ungrouped_folder'; + const existing = groups.get(ungroupedKey); + if (existing) { + existing.files.push(fileName); + existing.totalSize += fileSize; + } else { + groups.set(ungroupedKey, { + files: [fileName], + totalSize: fileSize, + metadata, + }); + } } } // Build result with search terms return Array.from(groups.entries()).map(([groupingKey, group]) => { group.files.sort((a, b) => a.localeCompare(b)); - const { searchTerm, source } = buildSearchTerm(group.metadata, group.files[0]); + const { searchTerm, source } = buildSearchTerm(group.metadata, group.files[0], folderName); return { files: group.files, totalSize: group.totalSize, @@ -398,6 +450,7 @@ function deduplicateDiscoveries( metadata: first.metadata, searchTerm: first.searchTerm, metadataSource: first.metadataSource, + extractedAsin: first.extractedAsin, audioFiles: combinedFiles, groupingKey: first.groupingKey, }); @@ -434,9 +487,10 @@ function findCommonParent(paths: string[]): string { * * Scans every folder for audio files. When audio files are found, they are * grouped by metadata (title + author + narrator) — each group becomes a - * separate discovered audiobook. Files with no metadata are treated as - * individual books. Scanning ALWAYS recurses into subfolders regardless of - * whether the current folder has audio files. + * separate discovered audiobook. Files with no metadata are all grouped + * together per folder (treated as one book) rather than one entry per file. + * Scanning ALWAYS recurses into subfolders regardless of whether the current + * folder has audio files. * * After the full walk, discoveries sharing the same grouping key across * different folders (e.g., CD1/ and CD2/) are merged. @@ -460,11 +514,13 @@ export async function discoverAudiobooks( foldersScanned++; + const folderName = path.basename(currentPath); + onProgress?.({ phase: 'discovering', foldersScanned, audiobooksFound: results.length, - currentFolder: path.basename(currentPath), + currentFolder: folderName, }); // Check if this folder contains audio files @@ -486,19 +542,22 @@ export async function discoverAudiobooks( phase: 'grouping', foldersScanned, audiobooksFound: results.length, - currentFolder: path.basename(currentPath), + currentFolder: folderName, }); - // Group audio files by metadata + // Group audio files by metadata, passing folder name for fallback search terms const groups = await groupAudioFilesByMetadata( currentPath, audioResult.audioFiles, - audioSizes + audioSizes, + folderName ); - const folderName = path.basename(currentPath); const relativePath = path.relative(rootPath, currentPath).replace(/\\/g, '/'); + // Extract ASIN from folder name once for all groups in this folder + const extractedAsin = extractAsinFromString(folderName) ?? undefined; + for (const group of groups) { results.push({ folderPath: currentPath.replace(/\\/g, '/'), @@ -509,6 +568,7 @@ export async function discoverAudiobooks( metadata: group.metadata, searchTerm: group.searchTerm, metadataSource: group.metadataSource, + extractedAsin, audioFiles: group.files, groupingKey: group.groupingKey, }); @@ -518,7 +578,7 @@ export async function discoverAudiobooks( phase: 'reading_metadata', foldersScanned, audiobooksFound: results.length, - currentFolder: path.basename(currentPath), + currentFolder: folderName, }); }