diff --git a/src/app/api/admin/bulk-import/scan/route.ts b/src/app/api/admin/bulk-import/scan/route.ts index 408d4b5..a160ce2 100644 --- a/src/app/api/admin/bulk-import/scan/route.ts +++ b/src/app/api/admin/bulk-import/scan/route.ts @@ -10,7 +10,7 @@ import { NextRequest, NextResponse } from 'next/server'; import { requireAuth, requireAdmin, AuthenticatedRequest } from '@/lib/middleware/auth'; import { prisma } from '@/lib/db'; import { RMABLogger } from '@/lib/utils/logger'; -import { discoverAudiobooks } from '@/lib/utils/bulk-import-scanner'; +import { discoverAudiobooks, cleanSearchString } from '@/lib/utils/bulk-import-scanner'; import { getAudibleService } from '@/lib/integrations/audible.service'; import { findPlexMatch } from '@/lib/utils/audiobook-matcher'; @@ -181,12 +181,7 @@ export async function POST(request: NextRequest) { // or intro track), whereas the folder name is the human-assigned // title and is more likely to be accurate. const textSearchTerm = book.extractedAsin - ? book.folderName - .replace(/[\[\(][A-Z0-9]{10}[\]\)]/g, '') // strip ASIN - .replace(/[\[\(]\d{4}[\]\)]/g, '') // strip year - .replace(/[_]/g, ' ') - .replace(/\s+/g, ' ') - .trim() + ? cleanSearchString(book.folderName) : book.searchTerm; const searchResult = await audibleService.search(textSearchTerm); if (searchResult.results.length > 0) { diff --git a/src/lib/utils/bulk-import-scanner.ts b/src/lib/utils/bulk-import-scanner.ts index 9a2da4e..820ce11 100644 --- a/src/lib/utils/bulk-import-scanner.ts +++ b/src/lib/utils/bulk-import-scanner.ts @@ -75,8 +75,8 @@ function isAudioFile(filename: string): boolean { * Returns the ASIN string or null if not found. */ export function extractAsinFromString(str: string): string | null { - const match = str.match(/(?:^|[\s\[\(])([B][A-Z0-9]{9})(?:$|[\s\]\)])/); - return match ? match[1] : null; + const match = str.match(/(?:^|[^A-Z0-9])(B[A-Z0-9]{9})(?:$|[^A-Z0-9])/i); + return match ? match[1].toUpperCase() : null; } /** @@ -163,7 +163,7 @@ export function deduplicateNames( * Strips file extension, bracketed ASINs, bracketed years, leading track numbers, * underscores, and collapses whitespace. */ -function cleanSearchString(raw: string): string { +export function cleanSearchString(raw: string): string { return raw .replace(/\.[^.]+$/, '') // Remove file extension .replace(/[\[\(][A-Z0-9]{10}[\]\)]/g, '') // Remove ASIN in brackets @@ -458,16 +458,17 @@ function deduplicateDiscoveries( combinedCount += disc.audioFileCount; } + const mergedFolderName = path.basename(commonParent); merged.push({ folderPath: commonParent, - folderName: path.basename(commonParent), - relativePath: first.relativePath.split('/').slice(0, -1).join('/') || path.basename(commonParent), + folderName: mergedFolderName, + relativePath: first.relativePath.split('/').slice(0, -1).join('/') || mergedFolderName, audioFileCount: combinedCount, totalSizeBytes: combinedSize, metadata: first.metadata, searchTerm: first.searchTerm, metadataSource: first.metadataSource, - extractedAsin: first.extractedAsin, + extractedAsin: extractAsinFromString(mergedFolderName) ?? first.extractedAsin, audioFiles: combinedFiles, groupingKey: first.groupingKey, }); diff --git a/tests/utils/bulk-import-scanner.test.ts b/tests/utils/bulk-import-scanner.test.ts new file mode 100644 index 0000000..cc69cc3 --- /dev/null +++ b/tests/utils/bulk-import-scanner.test.ts @@ -0,0 +1,316 @@ +/** + * Component: Bulk Import Scanner Tests + * Documentation: documentation/features/bulk-import.md + */ + +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import path from 'path'; +import os from 'os'; + +const execMock = vi.hoisted(() => { + const mockFn = vi.fn(); + // util.promisify on child_process.exec resolves to { stdout, stderr } + // (via the [util.promisify.custom] symbol). Attach the same shape here so + // code that destructures `{ stdout } = await execPromise(...)` works. + const customSymbol = Symbol.for('nodejs.util.promisify.custom'); + (mockFn as unknown as Record)[customSymbol] = ( + ...args: unknown[] + ) => + new Promise((resolve, reject) => { + mockFn( + ...args, + (err: Error | null, stdout: string, stderr: string) => { + if (err) reject(err); + else resolve({ stdout, stderr }); + }, + ); + }); + return mockFn; +}); + +vi.mock('child_process', () => ({ + exec: execMock, +})); + +import fs from 'fs/promises'; +import { + buildSearchTerm, + cleanSearchString, + discoverAudiobooks, + extractAsinFromString, +} from '@/lib/utils/bulk-import-scanner'; + +/** + * Configure the ffprobe mock so each invocation returns canned tags + * keyed by the file path embedded in the command string. + */ +function mockFfprobeByFile(tagsByFile: Record>) { + execMock.mockImplementation( + (command: string, options: unknown, callback?: unknown) => { + const cb = (typeof options === 'function' ? options : callback) as ( + err: Error | null, + stdout: string, + stderr: string, + ) => void; + const match = command.match(/"([^"]+)"\s*$/); + const filePath = match ? match[1].replace(/\\/g, '/') : ''; + const tags = tagsByFile[filePath] ?? {}; + const payload = JSON.stringify({ format: { tags } }); + cb(null, payload, ''); + }, + ); +} + +describe('extractAsinFromString', () => { + it.each([ + ['parenthesized', 'Stephen King - The Gunslinger (B019NOKST6)', 'B019NOKST6'], + ['bracketed', 'Some Book [B019NOKST6]', 'B019NOKST6'], + ['whitespace-separated', 'Some Book B019NOKST6 extra', 'B019NOKST6'], + ['at start of string', 'B019NOKST6 some title', 'B019NOKST6'], + ['at end of string', 'some title B019NOKST6', 'B019NOKST6'], + ['hyphen-delimited', 'Some Book-B019NOKST6-end', 'B019NOKST6'], + ['lowercase folder name', 'some book (b019nokst6)', 'B019NOKST6'], + ['mixed case', 'Some Book (b019nOkSt6)', 'B019NOKST6'], + ])('extracts ASIN from %s', (_label, input, expected) => { + expect(extractAsinFromString(input)).toBe(expected); + }); + + it.each([ + ['no ASIN at all', 'Stephen King - The Gunslinger'], + ['does not start with B', 'Some Book (A019NOKST6)'], + ['too short', 'Some Book (B019NOKST)'], + ['too long is rejected by boundary', 'Some Book (B019NOKST6A)'], + ['embedded in longer alphanumeric word', 'fooB019NOKST6bar'], + ['not starting with B at all', '0019NOKST6'], + ])('returns null when %s', (_label, input) => { + expect(extractAsinFromString(input)).toBeNull(); + }); +}); + +describe('cleanSearchString', () => { + it('strips a file extension', () => { + expect(cleanSearchString('The Gunslinger.m4b')).toBe('The Gunslinger'); + }); + + it('strips a bracketed ASIN', () => { + expect(cleanSearchString('The Gunslinger [B019NOKST6]')).toBe('The Gunslinger'); + }); + + it('strips a parenthesized ASIN', () => { + expect(cleanSearchString('The Gunslinger (B019NOKST6)')).toBe('The Gunslinger'); + }); + + it('strips a bracketed year', () => { + expect(cleanSearchString('The Gunslinger (1982)')).toBe('The Gunslinger'); + }); + + it.each([ + ['01 - The Gunslinger', 'The Gunslinger'], + ['001_The Gunslinger', 'The Gunslinger'], + ['12 The Gunslinger.m4b', 'The Gunslinger'], + ])('strips leading track number from "%s"', (input, expected) => { + expect(cleanSearchString(input)).toBe(expected); + }); + + it('converts underscores to spaces', () => { + expect(cleanSearchString('The_Gunslinger')).toBe('The Gunslinger'); + }); + + it('collapses internal whitespace', () => { + expect(cleanSearchString('The Gunslinger Book')).toBe('The Gunslinger Book'); + }); + + it('combines multiple transformations', () => { + expect( + cleanSearchString('01_The_Gunslinger_[B019NOKST6]_(1982).m4b'), + ).toBe('The Gunslinger'); + }); +}); + +describe('buildSearchTerm', () => { + it('uses tags when title is present (title + author + narrator)', () => { + expect( + buildSearchTerm( + { title: 'The Gunslinger', author: 'Stephen King', narrator: 'George Guidall' }, + 'whatever.m4b', + ), + ).toEqual({ + searchTerm: 'The Gunslinger Stephen King George Guidall', + source: 'tags', + }); + }); + + it('uses title alone when no other metadata fields are present', () => { + expect(buildSearchTerm({ title: 'The Gunslinger' }, 'whatever.m4b')).toEqual({ + searchTerm: 'The Gunslinger', + source: 'tags', + }); + }); + + it('falls back to folder name when no title and folder is non-generic', () => { + expect( + buildSearchTerm({}, 'track01.m4b', 'The Gunslinger (B019NOKST6)'), + ).toEqual({ searchTerm: 'The Gunslinger', source: 'folder_name' }); + }); + + it('falls back to file name when folder name is generic', () => { + expect(buildSearchTerm({}, 'The Gunslinger Chapter 1.m4b', 'CD1')).toEqual({ + searchTerm: 'The Gunslinger Chapter 1', + source: 'file_name', + }); + }); + + it.each([ + 'CD1', + 'CD 1', + 'cd2', + 'Disc 2', + 'disc3', + 'Disk 4', + 'DISK 5', + 'Part 1', + 'part2', + 'Vol 1', + 'vol2', + 'Volume 3', + 'VOLUME 99', + ])('treats "%s" as a generic folder name', (folderName) => { + const result = buildSearchTerm({}, 'whatever.m4b', folderName); + expect(result.source).toBe('file_name'); + }); + + it.each(['CD Player', 'Discworld', 'Particle Physics', 'Volumetric Sound'])( + 'does not treat "%s" as a generic folder name', + (folderName) => { + const result = buildSearchTerm({}, 'whatever.m4b', folderName); + expect(result.source).toBe('folder_name'); + }, + ); + + it('falls back to file name when no title and no folder is provided', () => { + expect(buildSearchTerm({}, '01 - The Gunslinger.m4b')).toEqual({ + searchTerm: 'The Gunslinger', + source: 'file_name', + }); + }); +}); + +describe('discoverAudiobooks integration', () => { + let tmpDir: string; + + beforeEach(async () => { + tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'rmab-bulk-import-')); + }); + + afterEach(async () => { + await fs.rm(tmpDir, { recursive: true, force: true }); + }); + + async function createAudioFiles(dir: string, names: string[]): Promise { + await fs.mkdir(dir, { recursive: true }); + for (const name of names) { + await fs.writeFile(path.join(dir, name), ''); + } + } + + function fwd(p: string): string { + return p.replace(/\\/g, '/'); + } + + it('absorbs untagged files into the single tagged group in the same folder', async () => { + const bookDir = path.join(tmpDir, 'The Gunslinger'); + await createAudioFiles(bookDir, ['01.m4b', '02.m4b', '03.m4b']); + + mockFfprobeByFile({ + [fwd(path.join(bookDir, '01.m4b'))]: { + album: 'The Gunslinger', + album_artist: 'Stephen King', + }, + [fwd(path.join(bookDir, '02.m4b'))]: { + album: 'The Gunslinger', + album_artist: 'Stephen King', + }, + // 03.m4b returns empty tags -> ungrouped, then absorbed + }); + + const results = await discoverAudiobooks(tmpDir); + + expect(results).toHaveLength(1); + expect(results[0].audioFileCount).toBe(3); + expect(results[0].audioFiles).toEqual(['01.m4b', '02.m4b', '03.m4b']); + expect(results[0].metadata.title).toBe('The Gunslinger'); + expect(results[0].metadataSource).toBe('tags'); + }); + + it('keeps untagged group separate when multiple tagged groups exist in the same folder', async () => { + const mixedDir = path.join(tmpDir, 'Mixed'); + await createAudioFiles(mixedDir, ['a1.m4b', 'b1.m4b', 'untagged.m4b']); + + mockFfprobeByFile({ + [fwd(path.join(mixedDir, 'a1.m4b'))]: { + album: 'Book A', + album_artist: 'Author A', + }, + [fwd(path.join(mixedDir, 'b1.m4b'))]: { + album: 'Book B', + album_artist: 'Author B', + }, + // untagged.m4b empty + }); + + const results = await discoverAudiobooks(tmpDir); + + expect(results).toHaveLength(3); + const titles = results.map((r) => r.metadata.title).sort(); + expect(titles).toEqual(['Book A', 'Book B', undefined]); + + const untagged = results.find((r) => !r.metadata.title); + expect(untagged?.audioFiles).toEqual(['untagged.m4b']); + expect(untagged?.metadataSource).toBe('folder_name'); + }); + + it('re-derives extractedAsin from the common parent on cross-folder merge', async () => { + const parentDir = path.join(tmpDir, 'Some Book (B019NOKST6)'); + const cd1Dir = path.join(parentDir, 'CD1'); + const cd2Dir = path.join(parentDir, 'CD2'); + await createAudioFiles(cd1Dir, ['01.m4b']); + await createAudioFiles(cd2Dir, ['02.m4b']); + + mockFfprobeByFile({ + [fwd(path.join(cd1Dir, '01.m4b'))]: { + album: 'Some Book', + album_artist: 'Some Author', + }, + [fwd(path.join(cd2Dir, '02.m4b'))]: { + album: 'Some Book', + album_artist: 'Some Author', + }, + }); + + const results = await discoverAudiobooks(tmpDir); + + expect(results).toHaveLength(1); + const merged = results[0]; + expect(merged.folderName).toBe('Some Book (B019NOKST6)'); + expect(merged.extractedAsin).toBe('B019NOKST6'); + expect(merged.audioFileCount).toBe(2); + expect(merged.audioFiles.sort()).toEqual(['CD1/01.m4b', 'CD2/02.m4b']); + }); + + it('extracts ASIN from a single-folder book', async () => { + const bookDir = path.join(tmpDir, 'The Gunslinger (B019NOKST6)'); + await createAudioFiles(bookDir, ['01.m4b']); + + mockFfprobeByFile({ + [fwd(path.join(bookDir, '01.m4b'))]: { + album: 'The Gunslinger', + album_artist: 'Stephen King', + }, + }); + + const results = await discoverAudiobooks(tmpDir); + + expect(results).toHaveLength(1); + expect(results[0].extractedAsin).toBe('B019NOKST6'); + }); +});