Improve ASIN/cleaning logic and add tests

Refactor bulk-import scanner to make ASIN extraction and search-string cleaning more robust, and add tests.

- Tighten and case-insensitize the ASIN regex, always return ASIN in uppercase.
- Export and use cleanSearchString (replaces inline folder-name sanitization in the scan route).
- When merging discoveries across folders, derive folderName/relativePath consistently and re-extract ASIN from the merged common parent if available.
- Add comprehensive unit/integration tests for extractAsinFromString, cleanSearchString, buildSearchTerm, and discoverAudiobooks (with an ffprobe mock).

These changes improve detection of ASINs in varied naming patterns, reduce duplicated cleanup logic, and ensure merged groups correctly inherit ASIN metadata.
This commit is contained in:
kikootwo
2026-05-15 05:25:32 -04:00
parent 1711d256c2
commit f56efa8b15
3 changed files with 325 additions and 13 deletions
+7 -6
View File
@@ -75,8 +75,8 @@ function isAudioFile(filename: string): boolean {
* Returns the ASIN string or null if not found.
*/
export function extractAsinFromString(str: string): string | null {
const match = str.match(/(?:^|[\s\[\(])([B][A-Z0-9]{9})(?:$|[\s\]\)])/);
return match ? match[1] : null;
const match = str.match(/(?:^|[^A-Z0-9])(B[A-Z0-9]{9})(?:$|[^A-Z0-9])/i);
return match ? match[1].toUpperCase() : null;
}
/**
@@ -163,7 +163,7 @@ export function deduplicateNames(
* Strips file extension, bracketed ASINs, bracketed years, leading track numbers,
* underscores, and collapses whitespace.
*/
function cleanSearchString(raw: string): string {
export function cleanSearchString(raw: string): string {
return raw
.replace(/\.[^.]+$/, '') // Remove file extension
.replace(/[\[\(][A-Z0-9]{10}[\]\)]/g, '') // Remove ASIN in brackets
@@ -458,16 +458,17 @@ function deduplicateDiscoveries(
combinedCount += disc.audioFileCount;
}
const mergedFolderName = path.basename(commonParent);
merged.push({
folderPath: commonParent,
folderName: path.basename(commonParent),
relativePath: first.relativePath.split('/').slice(0, -1).join('/') || path.basename(commonParent),
folderName: mergedFolderName,
relativePath: first.relativePath.split('/').slice(0, -1).join('/') || mergedFolderName,
audioFileCount: combinedCount,
totalSizeBytes: combinedSize,
metadata: first.metadata,
searchTerm: first.searchTerm,
metadataSource: first.metadataSource,
extractedAsin: first.extractedAsin,
extractedAsin: extractAsinFromString(mergedFolderName) ?? first.extractedAsin,
audioFiles: combinedFiles,
groupingKey: first.groupingKey,
});