mirror of
https://github.com/kikootwo/ReadMeABook.git
synced 2026-06-28 00:50:12 +00:00
Implement file hash-based library matching and remove fuzzy ASIN matching
Adds file hash-based matching for Audiobookshelf library items to ensure 100% accurate ASIN assignment for RMAB-organized content. Removes fuzzy matching from library availability checks, making all matching ASIN-only to eliminate false positives and race conditions. Updates database schema, processors, and matcher utilities; adds new tests and documentation for the new matching strategy. Removes obsolete scripts, Dockerfile, and related tests; updates docker-compose for test environments.
This commit is contained in:
@@ -0,0 +1,74 @@
|
||||
/**
|
||||
* File Hash Utility
|
||||
* Documentation: documentation/fixes/file-hash-matching.md
|
||||
*
|
||||
* Generates deterministic hashes of audio file collections for accurate library matching.
|
||||
* Used to match RMAB-organized audiobooks with Audiobookshelf library items.
|
||||
*/
|
||||
|
||||
import crypto from 'crypto';
|
||||
import path from 'path';
|
||||
|
||||
/**
|
||||
* Supported audio file extensions for hash generation
|
||||
*/
|
||||
const AUDIO_EXTENSIONS = ['.m4b', '.m4a', '.mp3', '.mp4', '.aa', '.aax'];
|
||||
|
||||
/**
|
||||
* Generates a SHA256 hash of audio filenames for library matching.
|
||||
*
|
||||
* Process:
|
||||
* 1. Extract basenames from file paths
|
||||
* 2. Filter to supported audio extensions
|
||||
* 3. Normalize to lowercase
|
||||
* 4. Sort alphabetically
|
||||
* 5. Generate SHA256 hash
|
||||
*
|
||||
* @param filePaths - Array of absolute or relative file paths
|
||||
* @returns 64-character hex string (SHA256 hash) or empty string if no audio files
|
||||
*
|
||||
* @example
|
||||
* ```typescript
|
||||
* const hash = generateFilesHash([
|
||||
* '/path/to/Chapter 01.mp3',
|
||||
* '/path/to/Chapter 02.mp3',
|
||||
* '/path/to/cover.jpg' // Filtered out (not audio)
|
||||
* ]);
|
||||
* // Returns: "abc123def456..." (64 chars)
|
||||
* ```
|
||||
*/
|
||||
export function generateFilesHash(filePaths: string[]): string {
|
||||
if (!filePaths || filePaths.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Extract basenames and filter to audio files only
|
||||
const audioBasenames = filePaths
|
||||
.map((filePath) => path.basename(filePath))
|
||||
.filter((basename) => {
|
||||
const ext = path.extname(basename).toLowerCase();
|
||||
return AUDIO_EXTENSIONS.includes(ext);
|
||||
})
|
||||
.map((basename) => basename.toLowerCase()) // Normalize case
|
||||
.sort(); // Sort alphabetically for deterministic hash
|
||||
|
||||
// No audio files found
|
||||
if (audioBasenames.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Generate SHA256 hash
|
||||
const hash = crypto
|
||||
.createHash('sha256')
|
||||
.update(JSON.stringify(audioBasenames))
|
||||
.digest('hex');
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates if a hash string is a valid SHA256 hash
|
||||
*/
|
||||
export function isValidHash(hash: string): boolean {
|
||||
return /^[a-f0-9]{64}$/i.test(hash);
|
||||
}
|
||||
Reference in New Issue
Block a user