The problem is well-known and fun to play with - I will do a more formal explanation but a first attempt has been made here: Musaiking non-overlaping spectral descriptor spaces - an attempt at explaining the issue clearly - #4 by tremblap
Yesterday in the workshop I’ve shown this bit of SC code. It is underdocumented, but might be helpful as a starting point. It also shows bundled batch processing which quite efficient.
This was devised with one scaler when @weefuzzy and I were working on our set for MusAI - soon online. I’ve since then added a few. The next step is instead to curate mappings by the performer itself using MLP like I’ve shown here: Latent-space mapping example code - the setup will be used in both my works in progress (violin+electro and electric guitar and electro). Obviously the player linking the granular density to the volume won’t be the same - but by then in the patch, the hard stuff is done.
Enjoy!
//jump to reload if needs be
~corpus = Buffer.read(s,"/Volumes/machins/projets/sylvain/patch/Audio/corpus.wav")
b = Buffer(s); c = Buffer(s); d = Buffer(s);
~corpusDS = FluidDataSet(s)
FluidBufOnsetSlice.process(s, ~corpus, numChans: 1,indices: b, metric: 9, threshold: 0.1, minSliceLength: 10, hopSize: 256, action: {b.loadToFloatArray(action: {|x| ~slices = x.postln;})})
if (~corpus.numFrames - ~slices.last > 2560, {~slices = ~slices.add(~corpus.numFrames)})
~slices.size
~corpusDS.clear
//run the mfcc analysis
(
{
var time = Main.elapsedTime;
var jobcount = 0;
var nb2do = (~slices.size-1).asInteger;
~slices.doAdjacentPairs{arg in, out, idx;
var dur = out - in;
"processing slice % of %\n".postf((idx+1).asInteger, nb2do);
FluidBufMFCC.processBlocking(s, source: ~corpus, startFrame: in, numFrames: dur, numChans: 1, features: b, startCoeff: 1, minFreq: 40, maxFreq: 10000, windowSize: 2048, hopSize: 512);
FluidBufStats.processBlocking(s, source: b, stats: c, select: [\mean, \std]);
FluidBufFlatten.processBlocking(s, c, destination: d);
~corpusDS.addPoint(in.asInteger, d);
jobcount = jobcount + 1;
if (jobcount > 500, {s.sync;jobcount = 0});
};
s.sync;
time = Main.elapsedTime - time;
"elapsed time = %m %s\n".postf(time.div(60), time.mod(60));
}.fork
)
~corpusDS.size
~corpusDS.print
//transfer the slice (relative) positions to another DS
~idx2pos = FluidDataSet(s)
~idx2pos.load(Dictionary.newFrom([\cols, 1, \data, Dictionary.newFrom(~slices.drop(-1).collect{|i|[i.asInteger.asSymbol,i/~corpus.numFrames]}.flat)]))
~idx2pos.print
//OPTION A - robust scaling the corpus
~corpusDSscaled = FluidDataSet(s)
~scaler = FluidRobustScale(s, 10, 90)
~scaler.fitTransform(~corpusDS, ~corpusDSscaled, {\done.postln})
~corpusDSscaled.print
~treeR = FluidKDTree(s);
~treeR.fit(~corpusDSscaled);
//OPTION B - std
~corpusDSstd = FluidDataSet(s)
~stan = FluidStandardize(s)
~stan.fitTransform(~corpusDS, ~corpusDSstd, {\done.postln})
~corpusDSstd.print
~treeS = FluidKDTree(s);
~treeS.fit(~corpusDSstd);
//OPTION C - PCA
~corpusDSpca = FluidDataSet(s)
~pca = FluidPCA(s,16,1)
// ~pca.numDimensions = 16 // to get 99% variance accounted for
~pca.fitTransform(~corpusDS, ~corpusDSpca, {|x|x.postln})
~corpusDSpca.print
~treeP = FluidKDTree(s);
~treeP.fit(~corpusDSpca);
//could write it here if one wants
// ~tree.write("/Volumes/machins/projets/sylvain/patch/Audio/corpus-tree.json")
// ~idx2pos.write("/Volumes/machins/projets/sylvain/patch/Audio/corpus-db-pos.json")
/////////////////////
// IF READING
// ~tree = FluidKDTree(s);
// ~scaler = FluidRobustScale(s, 10, 90)
// ~idx2pos = FluidDataSet(s);
// ~corpus = Buffer.read(s,"/Volumes/machins/projets/sylvain/patch/Audio/corpus.wav")
// ~tree.read("/Volumes/machins/projets/sylvain/patch/Audio/corpus-tree.json")
// ~idx2pos.read("/Volumes/machins/projets/sylvain/patch/Audio/corpus-db-pos.json")
// ENDIF
~inputDS = FluidDataSet(s);
~dabus = Bus.control(s,26);
// RT analysis of the input and optional DS writer
(
~inputMUS = {arg in = 0, out = 0, rec = 0;
var ratio = 512 / BlockSize.ir;
var trig = Impulse.kr(ControlRate.ir / ratio) * rec;
var analysis = FluidStats.kr(FluidMFCC.kr(SoundIn.ar(~inputs[0]), startCoeff: 1, minFreq: 40, maxFreq: 10000, windowSize: 2048, hopSize: 512), 10*ratio).flatten;
var buf = LocalBuf(26);
FluidKrToBuf.kr(analysis, buf);
FluidDataSetWr.kr(~inputDS, buf: buf, idNumber: Stepper.kr(trig, min: 0, max: 10000,resetval: -1), trig: trig);
Out.kr(~dabus,analysis);
}.play(~guit, addAction: \addAfter);
)
~inputDS.clear
~inputDS.print
~inputMUS.set(\rec, 1);
~inputMUS.set(\rec, 0);
~inputMUS.free
~dabus.scope
//
(
~scaler.fit(~inputDS);
~stan.fit(~inputDS);
~pca.fit(~inputDS);
)
(
~playerDS = {arg in = 0, sidechain = 0, play = 0, vol = 0.1, which = 0;
var buf = LocalBuf(26);
var bufScaled = LocalBuf(26);
var bufStan = LocalBuf(26);
var bufPCA = LocalBuf(16);
var out = LocalBuf(1);
var trig = Impulse.kr(SampleRate.ir / 512);
var pos, doneScale, doneStan, donePCA, playrate, whichbin;
FluidKrToBuf.kr(In.kr(~dabus,26),buf);
whichbin = 2.pow(which);
doneScale = ~scaler.kr(trig * (whichbin&1), buf, bufScaled);
~treeR.kr(doneScale, bufScaled, out, 1, 0, ~idx2pos);//.poll(doneScale, \scale);
doneStan = ~stan.kr(trig * (whichbin&2), buf, bufStan);
~treeS.kr(doneStan, bufStan, out, 1, 0, ~idx2pos);//.poll(doneStan, \sta);
donePCA = ~pca.kr(trig * (whichbin&4), buf, bufPCA);
~treeP.kr(donePCA, bufPCA, out, 1, 0, ~idx2pos);//.poll(donePCA,\pca);
pos = FluidBufToKr.kr(out);
playrate = Impulse.ar(LagUD.ar((SoundIn.ar(~inputs[0]) * 10).abs, 0.001, 0.2, 50));
Out.ar(~outbus.index+TIRand.ar(0,2,playrate), GrainBuf.ar(1,playrate,0.2,~corpus,pos: pos, interp: 1) * LagUD.ar((SoundIn.ar(~inputs[0]) * 5).atan.abs, 0.005, 0.1, 10).atan * vol);
}.play(~inputMUS,addAction: \addAfter);
)
~playerDS.set(\which,0)
~playerDS.free
~inputMUS.free
// loop to leaky adjust the range
(
//first time round just pick up the state
~scaler.fit(~inputDS, action:{
~scaler.dump{|state|
~runningmedian = state["median"];
~runningrange = state["range"];
};
});
~leakyfollow = Routine{
~input.set(\rec, 1);
1.wait;
\refit.postln;
~scaler.fit(~inputDS, action:{
~scaler.dump{|state|
~runningmedian = (state["median"] * 0.1) + (~runningmedian * 0.9);
~runningrange = (state["range"] * 0.1) + (~runningrange * 0.9);
state["median"] = ~runningmedian;
state["range"] = ~runningrange;
~scaler.load(state);
};
});
}.loop.play;
)
~leakyfollow.stop
~leakyfollow.reset.loop.play
// test attack player
(
~attack = {arg vol = 3;
(FluidHPSS.ar(SoundIn.ar(~inputs[0]),harmFilterSize: 5, percFilterSize: 11, windowSize: 128,maxHarmFilterSize: 5, maxPercFilterSize: 31)[1] * vol).atan ;
}.play(outbus:~outbus.index+3);
)
~attack.free
OSCFunc({|x|var val = ((x[1] - 100) / 2).dbamp; ~player.set(\vol,val);}, "/sb3/Fader1",recvPort: 7773);
OSCFunc({|x|var val = ((x[1] - 70) / 2).dbamp; ~attack.set(\vol,val);}, "/sb3/Fader2",recvPort: 7773);
// 5 other hypothesis to match better
// 1. remove the outliers first, go through PCA with whithening (rotations and inverses possible)
// 2. umap 1d normalised (2 orders)
// 3. grid to map 2d to 2d (again flips)
// 4. cluster mapping - largest cluster to largest cluster, etc
// 5. nn mapping between spaces (arbitrary or not)"/Users/pa/Desktop/reçu-suzanne.pdf"