/* Author : Steffen Polcz Description: Process audio files and compare them for duplicates with logging. The procedure is suitable for medium to large music collections. Using same setting as Similarity my settings: content 85 precise 70 tags 30 duration 95 default adding pairs when: duration>95 or content>85 or precise>70 or tags>60 my plan adding pairs when: duration>95 & content>85 & precise>70 & (tags>30 or myTags>60) If desired, files can be analyzed Export result to csv file utf-8 encoded Version : 2021-12-15 */ var analysis = true; var exportFile = "F:\export-my-results.csv"; // --- FUNCTIONS --- // wait milliseconds function myWait(ms){ var onset = new Date().getTime(); var end = onset; while(end < onset + ms) { end = new Date().getTime(); } } // convert (double * 100) to string function convertDoubleToString(value) { if (!value) return "NaN"; value = Math.round(value * 1000.00) / 10.0 + ""; // replace to german excel return value.replace(".",","); } // delete bad passages and characters (artist & title tag) function myReplace(str) { str = str.replace(/ & | vs | feat | and /gi, ' '); return str.replace(/\(|\)|'|\.|%|\+/g, ''); } // skip short tags and check tags with similarity function checkTags(item1, item2) { if (item1.audio.artist.length < 2 || item2.audio.artist.length < 2) return 0.0; if (item1.audio.title.length < 3 || item2.audio.title.length < 3) return 0.0; var artist = text.calculate(myReplace(item1.audio.artist), myReplace(item2.audio.artist)) var title = text.calculate(myReplace(item1.audio.title), myReplace(item2.audio.title)) return artist + title; } // get pathname without ending backslash function getPathName(value) { let res = value.lastIndexOf("\\"); if (res > 0) return value.substr(0,value.lastIndexOf("\\")); else return ""; } // helper for sorting function comparePrecise(a,b) { if (a.precise < b.precise) return -1; if (a.precise > b.precise) return 1; return 0; } // compare 2 audio items (asynchronous) function oncompareAudio(item1, item2) { // check duration if (item1.audio.duration > 30000 && item2.audio.duration > 30000) { if (item1.audio.duration < item2.audio.duration) { if (item1.audio.duration < item2.audio.duration * duration) return; } else if (item2.audio.duration < item1.audio.duration * duration) return; } else return; // calculate scores for 2 items audio.calculate(item1, item2, audioAlgs, function (item1, item2, result) { // check for errors if (!result) return; ++total; // check thresholds if ((result.content > content) && (result.precise > precise)) { // >0.85 && >0.7 if (result.tags > tags || checkTags(item1, item2) > myTag) { // >0.3 || >0.6 results.audio.add(item1, item2, result); log("Add \"" + item1.filename + "\", \"" + item2.filename + "\""); if (analysis) { results.analysis.add(item1, 'rating'); results.analysis.add(item2, 'rating'); } } } }); } // file processing callback (asynchronous) function onfile(file) { // asynchronously open audio file firstly audio.queue(file, audioAlgs, function (file, result) { // start comparision with all items added previously (asynchronously) audio.compare(file, oncompareAudio); }); } // folder processing callback (asynchronous) function onfolder(folder) { // recursively process subdir with same callbacks process(folder, onfile, onfolder); } // --- MAIN --- var tags = 0.6; var myTag = 0.6; var duration = 0.95; var content = 0.85; var precise = 0.7; var total = 0; if (settings.audio.tags > 0.0) tags = settings.audio.tags; if (settings.audio.content > 0.0) content = settings.audio.content; if (settings.audio.precise > 0.0) precise = settings.audio.precise; if (settings.audio.duration > 0.0) duration = settings.audio.duration; var audioAlgs = []; audioAlgs.push('tags','content','precise'); // clear all results in program results.audio.clear(); // start logging var start = new Date(); log("scan start: " + start); start = start.getTime(); // start processing files via callbacks (this function asynchronous) for (var idx = 0; idx < settings.folders.length; ++idx) process(settings.folders[idx], onfile, onfolder); // wait for the end of the process wait(); // load result array var dups = results.audio.dups; var cnt = 0; // run through all pairs and export together with their data for further processing // if necessary, the files can be analyzed and the result included in the export if (dups.length > 0) { dups.sort(comparePrecise); var i1rating = false; var i2rating = false; var file = openFile(exportFile, "w"); file.write("Group;File;Path;Counter;Duration sec;Bitrate Byte;Rating %;Precise %;Content %;Tags %;Year\r\n"); log("export result to CSV file ..."); for (var idx = 0; idx < dups.length; ++idx) { // skip counter-pair (1-2 and 2-1), process pair only once if (dups[idx].item1.path > dups[idx].item2.path) continue; // prepare the comparative data of the pair var i1 = dups[idx].item1; var duration1 = Math.round(i1.audio.duration / 1000); var i2 = dups[idx].item2; var duration2 = Math.round(i2.audio.duration / 1000); if (analysis) { i1rating = i1.analysis.rating; i2rating = i2.analysis.rating; } // myWait(100); ++cnt; file.write( idx + ";\"" + i1.filename + "\";\"" + getPathName(i1.path) + "\";2;" + duration1 + ";" + i1.audio.bitrate + ";" + convertDoubleToString(i1rating) + ";" + convertDoubleToString(dups[idx].precise) + ";" + convertDoubleToString(dups[idx].content) + ";" + convertDoubleToString(dups[idx].tags) + ";" + i1.tags.year + "\r\n" ); ++cnt; file.write( idx + ";\"" + i2.filename + "\";\"" + getPathName(i2.path) + "\";2;" + duration2 + ";" + i2.audio.bitrate + ";" + convertDoubleToString(i2rating) + ";" + convertDoubleToString(dups[idx].precise) + ";" + convertDoubleToString(dups[idx].content) + ";" + convertDoubleToString(dups[idx].tags) + ";" + i2.tags.year + "\r\n" ); } } else log("no results for export file ..."); // stop logging and print a summary var time = Math.round((new Date().getTime() - start) / 60000); log("runtime total: " + time + " minute(s), compare total: " + total + ", export files: " + cnt);