Implement file hash-based library matching and remove fuzzy ASIN matching

Adds file hash-based matching for Audiobookshelf library items to ensure 100% accurate ASIN assignment for RMAB-organized content. Removes fuzzy matching from library availability checks, making all matching ASIN-only to eliminate false positives and race conditions. Updates database schema, processors, and matcher utilities; adds new tests and documentation for the new matching strategy. Removes obsolete scripts, Dockerfile, and related tests; updates docker-compose for test environments.
This commit is contained in:
kikootwo
2026-01-28 10:32:14 -05:00
parent 497849f427
commit a97979358f
111 changed files with 6571 additions and 1426 deletions
+74
View File
@@ -0,0 +1,74 @@
/**
* File Hash Utility
* Documentation: documentation/fixes/file-hash-matching.md
*
* Generates deterministic hashes of audio file collections for accurate library matching.
* Used to match RMAB-organized audiobooks with Audiobookshelf library items.
*/
import crypto from 'crypto';
import path from 'path';
/**
* Supported audio file extensions for hash generation
*/
const AUDIO_EXTENSIONS = ['.m4b', '.m4a', '.mp3', '.mp4', '.aa', '.aax'];
/**
* Generates a SHA256 hash of audio filenames for library matching.
*
* Process:
* 1. Extract basenames from file paths
* 2. Filter to supported audio extensions
* 3. Normalize to lowercase
* 4. Sort alphabetically
* 5. Generate SHA256 hash
*
* @param filePaths - Array of absolute or relative file paths
* @returns 64-character hex string (SHA256 hash) or empty string if no audio files
*
* @example
* ```typescript
* const hash = generateFilesHash([
* '/path/to/Chapter 01.mp3',
* '/path/to/Chapter 02.mp3',
* '/path/to/cover.jpg' // Filtered out (not audio)
* ]);
* // Returns: "abc123def456..." (64 chars)
* ```
*/
export function generateFilesHash(filePaths: string[]): string {
if (!filePaths || filePaths.length === 0) {
return '';
}
// Extract basenames and filter to audio files only
const audioBasenames = filePaths
.map((filePath) => path.basename(filePath))
.filter((basename) => {
const ext = path.extname(basename).toLowerCase();
return AUDIO_EXTENSIONS.includes(ext);
})
.map((basename) => basename.toLowerCase()) // Normalize case
.sort(); // Sort alphabetically for deterministic hash
// No audio files found
if (audioBasenames.length === 0) {
return '';
}
// Generate SHA256 hash
const hash = crypto
.createHash('sha256')
.update(JSON.stringify(audioBasenames))
.digest('hex');
return hash;
}
/**
* Validates if a hash string is a valid SHA256 hash
*/
export function isValidHash(hash: string): boolean {
return /^[a-f0-9]{64}$/i.test(hash);
}