Here’s the clustering idea. Basic idea is to take many NMF components, and then try to group them via K Means using some criterea or other. Here I’ve played with
- using the windowed maxima of the activations to try and group components with similar temporal profiles
- using the spectral profile from the NMF bases
- combining the two
You can compare and play with the number of clusters below:
//NMF Clustering Experiments for separation
(
~reef = "/Users/owen/Downloads/190712-naturalReef_crackle.wav";
~source = Buffer.read(
s,
~reef,
action:{"Audio Loaded".postln});
)
// 1 Use FluidBufNNSVD to seed NMF with lots of components, which we'll try and cluster later
(
~bases = Buffer.new;
~activations = Buffer.new;
FluidBufNNDSVD.process(
s,
~source,
~bases,
~activations,
maxComponents:300,
coverage: 0.95,
action:{"Seeding Done".postln});
)
// 2 Develop the seeds with NMF
//We don't reynthesise yet, because it'll take ages and we don't need or want ~290 channels of audio
//How many components?
(
~nComponents = ~bases.numChannels;
FluidBufNMF.process(
s,
~source,
bases:~bases,
basesMode:1,
activations:~activations,
actMode:1,
components:~nComponents,
iterations:10,
action:{"NMF Done".postln});
)
//3 Make some datasets to cluster on
//3a Temporal: We'll try and group components together by their activations. At ~20k points, a brute force pair-wise comparison would be slow, but quite possuibly useless because for these sorts of over-decomposed NMFs the activations may well be correlated on a longer time-scale but have lots of 'holes' from frame to frame
//Let's make a 'feature' by using the normalised peak amplitdue across windows of 100 frames: hopefully this will give us a basis to let K-means make sensible distinctions
(
~activationFeature = Buffer.new;
~tmpPoint = Buffer.new;
~temporal = FluidDataSet(s,\temporal);
FluidBufLoudness.process(
s,
~activations,
features:~activationFeature,
kWeighting:0,
truePeak:0,
windowSize:100,
hopSize:100,
action:{"Peaks Found".postln});
)
//This gives us 2 * nComponents channels, because FluidBufLoudness returns both a mean(ish) and peak. We'll throw each peak channel into a dataset
//Doing like this, with no syncs and lots of buffers is much (much) faster than syncing each call, but it does make the
//order of insertion into the dataset non deterministic (which doesn't matter here)
(
~standardizer = FluidStandardize(s);
~temporal.clear;
~tmpPoints = (~activationFeature.numChannels/2).asInteger.collect{Buffer.new};
"Making points...".postln;
~counter = ~tmpPoints.size;
~tmpPoints.do{ |b,i|
FluidBufCompose.process(
s,
~activationFeature,
startChan:(2*i)+1,
numChans:1,
destination:b,
action:{
b.normalize;
~temporal.addPoint(i,b);
b.free;
~counter = ~counter - 1;
if(~counter == 0){
~standardizer.fitTransform(~temporal,~temporal,{
"Done making temporal dataset".postln;
~temporal.print;
~standardizer.free;
});
};
});
};
)
// 3b: Spectral
(
~spectral = FluidDataSet(s,\spectral);
)
(
~standardizer = FluidStandardize(s);
~spectral.clear;
~tmpPoints = ~bases.numChannels.collect{Buffer.new};
~counter = ~tmpPoints.size;
~tmpPoints.do{|b,i|
FluidBufCompose.process(
server:s,
source:~bases,
startChan:i,
numChans:1,
destination:b,
action:{
~spectral.addPoint(i,b);
b.free;
~counter = ~counter - 1;
if(~counter == 0){
~standardizer.fitTransform(~spectral,~spectral,{
"Done making spectral dataset".postln;
~spectral.print;
~standardizer.free;
});
};
});
}
)
//3c: Spectro-temporal
(
~spectrotemporal = FluidDataSet(s,\spectrotemporal);
~joiner = FluidDataSetQuery(s);
)
(
~spectrotemporal.clear;
~joiner.addRange(
start: 0,
count: ~bases.numFrames,
action: {
~joiner.transformJoin(
source1DataSet: ~spectral,
source2DataSet:~temporal,
destDataSet:~spectrotemporal,
action:
{
"Done making spectrotemporal dataset".postln;
~spectrotemporal.print;
}
);
}
);
)
//4 clustering
// Run k Means to find a given number of clusters, and then group together the bases and activations from NMF based
// on the cluster assignments. Finally, run NMF again for a single interation on the grouped buffers to resynthesise a k-channel
// decomposition of the sound
//We want to do this for temporal, spectral and spectrotemporal things, to compare, and to experiment a bit with k, so here's a
//function to swallow the boilerplate:
(
~renderClusters = {|k,data,rendered|
Routine({
var clustering = FluidKMeans(s,numClusters:4);
var labels = FluidLabelSet(s,\labels);
var activationsGrouped = Buffer.new;
var basesGrouped = Buffer.new;
s.sync;
"Clustering...".postln;
clustering.fitPredict(data,labels,{|x| x.postln;});
s.sync;
labels.dump({|d|
var data = d["data"];
data.postln;
data.keysValuesDo{|row,cluster|
FluidBufCompose.process(s,~bases,startChan:row.asInteger,numChans:1,destination:basesGrouped,destStartChan:cluster[0].asInteger,destGain:1);
FluidBufCompose.process(s,~activations,startChan:row.asInteger,numChans:1,destination:activationsGrouped,destStartChan:cluster[0].asInteger,destGain:1);
};
});
s.sync;
"Resynthesising".postln;
FluidBufNMF.process(s,~source,resynth:rendered,bases:basesGrouped,basesMode:1,activations:activationsGrouped,actMode:1,components:k,iterations:1,action:{
clustering.free;
labels.free;
activationsGrouped.free;
basesGrouped.free;
"Rendered audio".postln;
});
}).play;
};
)
//Render temporal clusters
~resynthesis_temporal = Buffer.new;
~renderClusters.value(4,~temporal,~resynthesis_temporal);
//Render spectral clusters
~resynthesis_spectral = Buffer.new;
~renderClusters.value(4,~spectral,~resynthesis_spectral);
//Render spectrotemporal clusters
~resynthesis_spectrotemporal = Buffer.new;
~renderClusters.value(4,~spectrotemporal,~resynthesis_spectrotemporal);
//We'll reuse this to listen to results
~synthfn = {|buf| buf.postln; {|chan = 0| Select.ar(chan,PlayBuf.ar(buf.numChannels,buf)).dup}};
//pick a decomposition
~synth = ~synthfn.value(~resynthesis_temporal).play;
~synth = ~synthfn.value(~resynthesis_spectral).play;
~synth = ~synthfn.value(~resynthesis_spectrotemporal).play;
//channel surf
~synth.set(\chan,0);
~synth.set(\chan,1);
~synth.set(\chan,2);
~synth.set(\chan,3);
~synth.free;