Iterative Decomposition Events Scatterplot
This scatterplot represents musical events from a number of short (~3 second) segments of classical pieces drawn from the
MusicNet dataset. The segments were generated by a neural network
that iteratively/incrementally decomposes the audio into a sparse set of events and times-of-occurrence.
Each event is represented by a 32-dimensional vector, which describes the attack envelope and the resonance of both
the instrument and the room in which the performance occurs. Events are projected into a 2D space via t-SNE, and
colors are chosen via a random projection into 3D color space.
You can read more about the model architecture and training procedure, and listen to reconstructions
here.
Click or tap to play individual events.
# the size, in samples of the audio segment we'll overfit
n_samples = 2 ** 16
samples_per_event = 2048
n_events = n_samples // samples_per_event
context_dim = 32
# the samplerate, in hz, of the audio signal
samplerate = 22050
# derived, the total number of seconds of audio
n_seconds = n_samples / samplerate
transform_window_size = 2048
transform_step_size = 256
n_frames = n_samples // transform_step_size
from argparse import ArgumentParser
from typing import Dict, Tuple
import numpy as np
import torch
from sklearn.manifold import TSNE
from torch import nn
from conjure import S3Collection, \
conjure_article, AudioComponent, Logger, ScatterPlotComponent
from data import AudioIterator
from iterativedecomposition import Model as IterativeDecompositionModel
from modules.eventgenerators.overfitresonance import OverfitResonanceModel
from util import count_parameters
remote_collection_name = 'iterative-decomposition-scatterplot'
def to_numpy(x: torch.Tensor):
return x.data.cpu().numpy()
def process_events(
vectors: torch.Tensor,
times: torch.Tensor,
total_seconds: float
) -> Tuple:
positions = torch.argmax(times, dim=-1, keepdim=True) / times.shape[-1]
times = [float(x) for x in (positions * total_seconds).view(-1).data.cpu().numpy()]
normalized = vectors.data.cpu().numpy().reshape((-1, context_dim))
normalized = normalized - normalized.min(axis=0, keepdims=True)
normalized = normalized / (normalized.max(axis=0, keepdims=True) + 1e-8)
tsne = TSNE(n_components=2)
points = tsne.fit_transform(normalized)
proj = np.random.uniform(0, 1, (2, 3))
colors = points @ proj
colors -= colors.min()
colors /= (colors.max() + 1e-8)
colors *= 255
colors = colors.astype(np.uint8)
colors = [f'rgb({c[0]} {c[1]} {c[2]})' for c in colors]
return points, times, colors
def load_model(wavetable_device: str = 'cpu') -> nn.Module:
hidden_channels = 512
model = IterativeDecompositionModel(
in_channels=1024,
hidden_channels=hidden_channels,
resonance_model=OverfitResonanceModel(
n_noise_filters=32,
noise_expressivity=8,
noise_filter_samples=128,
noise_deformations=16,
instr_expressivity=8,
n_events=1,
n_resonances=4096,
n_envelopes=256,
n_decays=32,
n_deformations=32,
n_samples=n_samples,
n_frames=n_frames,
samplerate=samplerate,
hidden_channels=hidden_channels,
wavetable_device=wavetable_device,
fine_positioning=True
))
with open('iterativedecomposition4.dat', 'rb') as f:
model.load_state_dict(torch.load(f, map_location=lambda storage, loc: storage))
print('Total parameters', count_parameters(model))
print('Encoder parameters', count_parameters(model.encoder))
print('Decoder parameters', count_parameters(model.resonance))
return model
def scatterplot_section(logger: Logger, total_segments: int) -> ScatterPlotComponent:
model = load_model()
ai = AudioIterator(
batch_size=1,
n_samples=n_samples,
samplerate=22050,
normalize=True,
as_torch=True)
all_vectors = []
all_events = []
all_times = []
for i in range(total_segments):
batch = next(iter(ai))
print(f'processing segment {i}')
batch = batch.view(-1, 1, n_samples).to('cpu')
with torch.no_grad():
events, vectors, times = model.iterative(batch)
all_times.append(times)
all_events.append(events)
all_vectors.append(vectors)
vectors = torch.cat(all_vectors, dim=1)
events = torch.cat(all_events, dim=1)
times = torch.cat(all_times, dim=1)
total_seconds = n_samples / samplerate
points, times, colors = process_events(vectors, times, total_seconds)
events = events.view(-1, n_samples)
events = {f'event{i}': events[i: i + 1, :] for i in range(events.shape[0])}
scatterplot_srcs = []
event_components = {}
for k, v in events.items():
_, e = logger.log_sound(k, v)
scatterplot_srcs.append(e.public_uri)
event_components[k] = AudioComponent(e.public_uri, height=35, controls=False)
scatterplot_component = ScatterPlotComponent(
scatterplot_srcs,
width=1500,
height=1500,
radius=0.5,
points=points,
times=times,
colors=colors, )
return scatterplot_component
def demo_page_dict() -> Dict[str, any]:
print(f'Generating article...')
remote = S3Collection(
remote_collection_name, is_public=True, cors_enabled=True)
logger = Logger(remote)
print('Creating large scatterplot')
large_scatterplot = scatterplot_section(logger, total_segments=32)
return dict(
large_scatterplot=large_scatterplot,
)
def generate_demo_page():
display = demo_page_dict()
conjure_article(
__file__,
'html',
title='Iterative Decomposition Events Scatterplot',
**display)
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument('--clear', action='store_true')
parser.add_argument('--list', action='store_true')
args = parser.parse_args()
if args.list:
remote = S3Collection(
remote_collection_name, is_public=True, cors_enabled=True)
print(remote)
print('Listing stored keys')
for key in remote.iter_prefix(start_key=b'', prefix=b''):
print(key)
if args.clear:
remote = S3Collection(
remote_collection_name, is_public=True, cors_enabled=True)
remote.destroy(prefix=b'')
generate_demo_page()
Back to Top