Cleaning the codebase

This commit is contained in:
2025-09-29 15:19:11 +02:00
parent 623082ce3b
commit 709ba3a32a
63 changed files with 1479 additions and 4207 deletions

View File

@@ -23,7 +23,7 @@ const audioData = synthesizeFromImage(imageData, {
duration: 10,
minFreq: 100,
maxFreq: 10000,
maxPartials: 200
maxPartials: 200,
})
// Export as WAV
@@ -35,6 +35,7 @@ downloadWAV(audioData, 44100, 'my-audio.wav')
### Main Functions
#### `synthesizeFromImage(imageData, params?)`
- **imageData**: `ImageData` - Canvas image data
- **params**: `Partial<SynthesisParams>` - Optional parameters
- **Returns**: `Float32Array` - Audio samples
@@ -42,16 +43,17 @@ downloadWAV(audioData, 44100, 'my-audio.wav')
### Types
#### `SynthesisParams`
```typescript
interface SynthesisParams {
duration: number // Audio duration in seconds
minFreq: number // Minimum frequency in Hz
maxFreq: number // Maximum frequency in Hz
sampleRate: number // Sample rate in Hz
duration: number // Audio duration in seconds
minFreq: number // Minimum frequency in Hz
maxFreq: number // Maximum frequency in Hz
sampleRate: number // Sample rate in Hz
frequencyResolution: number // Frequency bin downsampling
timeResolution: number // Time slice downsampling
timeResolution: number // Time slice downsampling
amplitudeThreshold: number // Minimum amplitude threshold
maxPartials: number // Maximum simultaneous partials
maxPartials: number // Maximum simultaneous partials
}
```
@@ -80,6 +82,7 @@ spectral-synthesis/
## Usage Examples
### Basic Synthesis
```typescript
const canvas = document.createElement('canvas')
const ctx = canvas.getContext('2d')
@@ -89,14 +92,15 @@ const audio = synthesizeFromImage(imageData)
```
### Advanced Usage
```typescript
import { ImageToAudioSynthesizer } from './spectral-synthesis'
const synthesizer = new ImageToAudioSynthesizer({
duration: 5,
maxPartials: 150
maxPartials: 150,
})
const result = synthesizer.synthesize(imageData)
console.log(`Generated ${result.duration}s of audio`)
```
```

View File

@@ -25,7 +25,7 @@ export function createWAVBuffer(audioData: Float32Array, sampleRate: number): Ar
let offset = 44
for (let i = 0; i < length; i++) {
const sample = Math.max(-1, Math.min(1, audioData[i]))
view.setInt16(offset, sample * 0x7FFF, true)
view.setInt16(offset, sample * 0x7fff, true)
offset += 2
}
@@ -83,7 +83,9 @@ export function createAudioPlayer(audioData: Float32Array, sampleRate: number):
gainNode.connect(audioContext.destination)
if (audioContext.sampleRate !== sampleRate) {
console.warn(`Audio context sample rate (${audioContext.sampleRate}) differs from data sample rate (${sampleRate})`)
console.warn(
`Audio context sample rate (${audioContext.sampleRate}) differs from data sample rate (${sampleRate})`
)
}
}
}
@@ -102,7 +104,8 @@ export function createAudioPlayer(audioData: Float32Array, sampleRate: number):
if (isPaused) {
// Resume from pause is not supported with AudioBufferSource
// We need to restart from the beginning
// We need to restart from the beginning (pausedAt was ${pausedAt}s)
void pausedAt // Track for future resume feature
isPaused = false
pausedAt = 0
}
@@ -163,7 +166,7 @@ export function createAudioPlayer(audioData: Float32Array, sampleRate: number):
onStateChange(callback: (isPlaying: boolean) => void) {
stateCallback = callback
}
},
}
}
@@ -174,11 +177,11 @@ export async function playAudio(audioData: Float32Array, sampleRate: number): Pr
const player = createAudioPlayer(audioData, sampleRate)
return new Promise(resolve => {
player.onStateChange((isPlaying) => {
player.onStateChange(isPlaying => {
if (!isPlaying) {
resolve()
}
})
player.play()
})
}
}

View File

@@ -9,7 +9,7 @@ import {
generateSpectralDensity,
mapFrequency,
mapFrequencyLinear,
normalizeAudioGlobal
normalizeAudioGlobal,
} from './utils'
/**
@@ -78,7 +78,7 @@ export class ImageToAudioSynthesizer {
disableNormalization: false,
disableContrast: false,
exactBinMapping: false,
...params
...params,
}
}
@@ -93,7 +93,6 @@ export class ImageToAudioSynthesizer {
}
}
/**
* Custom synthesis mode - sophisticated audio processing
*/
@@ -112,7 +111,7 @@ export class ImageToAudioSynthesizer {
spectralDensity,
usePerceptualWeighting,
frequencyMapping,
invert = false
invert = false,
} = this.params
// Calculate synthesis parameters
@@ -136,7 +135,6 @@ export class ImageToAudioSynthesizer {
const previousAmplitudes = new Float32Array(effectiveHeight)
const smoothingFactor = 0.2 // Reduced for sharper transients
// Process each time slice
for (let col = 0; col < effectiveWidth; col++) {
const sourceCol = col
@@ -160,9 +158,12 @@ export class ImageToAudioSynthesizer {
return normalizedDb
})
// Detect spectral peaks
const peaks = detectSpectralPeaks(processedSpectrum, Math.min(amplitudeThreshold, 0.01), false)
const peaks = detectSpectralPeaks(
processedSpectrum,
Math.min(amplitudeThreshold, 0.01),
false
)
// Generate partials from peaks with spectral density
const partials: SpectralPeak[] = []
@@ -176,14 +177,21 @@ export class ImageToAudioSynthesizer {
} else if (frequencyMapping === 'linear') {
frequency = mapFrequencyLinear(peakRow, effectiveHeight, minFreq, maxFreq)
} else {
frequency = mapFrequency(peakRow, effectiveHeight, minFreq, maxFreq, frequencyMapping || 'mel')
frequency = mapFrequency(
peakRow,
effectiveHeight,
minFreq,
maxFreq,
frequencyMapping || 'mel'
)
}
let amplitude = processedSpectrum[peakRow]
// Apply temporal smoothing
if (col > 0) {
amplitude = smoothingFactor * previousAmplitudes[peakRow] + (1 - smoothingFactor) * amplitude
amplitude =
smoothingFactor * previousAmplitudes[peakRow] + (1 - smoothingFactor) * amplitude
}
previousAmplitudes[peakRow] = amplitude
@@ -236,7 +244,7 @@ export class ImageToAudioSynthesizer {
return {
audio: normalizedAudio,
sampleRate,
duration
duration,
}
}
@@ -256,7 +264,7 @@ export class ImageToAudioSynthesizer {
disableNormalization = false,
disableContrast = false,
exactBinMapping = true,
invert = false
invert = false,
} = this.params
const totalSamples = Math.floor(duration * sampleRate)
@@ -282,7 +290,6 @@ export class ImageToAudioSynthesizer {
}
}
// Map image rows to these exact bins
console.log(`Ultra-precise mode: Using ${freqBins.length} exact FFT bins from ${minFreq}Hz to ${maxFreq}Hz`)
} else {
// Linear frequency mapping
freqBins = []
@@ -339,16 +346,16 @@ export class ImageToAudioSynthesizer {
const contrast = this.params.contrast || 1.0
// Fast power optimization for common cases
if (contrast === 1.0) {
amplitude = intensity // No contrast
amplitude = intensity // No contrast
} else if (contrast === 2.0) {
amplitude = intensity * intensity // Square is much faster than Math.pow
amplitude = intensity * intensity // Square is much faster than Math.pow
} else if (contrast === 0.5) {
amplitude = Math.sqrt(intensity) // Square root is faster than Math.pow
amplitude = Math.sqrt(intensity) // Square root is faster than Math.pow
} else if (contrast === 3.0) {
amplitude = intensity * intensity * intensity // Cube
amplitude = intensity * intensity * intensity // Cube
} else if (contrast === 4.0) {
const sq = intensity * intensity
amplitude = sq * sq // Fourth power
amplitude = sq * sq // Fourth power
} else {
// Fast power approximation for arbitrary values
// Uses bit manipulation + lookup for ~10x speedup over Math.pow
@@ -380,8 +387,8 @@ export class ImageToAudioSynthesizer {
// Phase increment method - mathematically identical but much faster
// Eliminates array lookups and multiplications in tight loop
let phase = freqCoeff * startSample / sampleRate // Initial phase
const phaseIncrement = freqCoeff / sampleRate // Phase per sample
let phase = (freqCoeff * startSample) / sampleRate // Initial phase
const phaseIncrement = freqCoeff / sampleRate // Phase per sample
for (let i = 0; i < frameLength; i++) {
columnSpectrum[i] += amplitude * Math.sin(phase)
phase += phaseIncrement
@@ -415,7 +422,7 @@ export class ImageToAudioSynthesizer {
return {
audio,
sampleRate,
duration
duration,
}
}
@@ -470,7 +477,7 @@ export function createDirectParams(overrides: Partial<SynthesisParams> = {}): Sy
disableNormalization: false,
disableContrast: false,
exactBinMapping: false,
...overrides
...overrides,
}
}
@@ -498,7 +505,7 @@ export function createCustomParams(overrides: Partial<SynthesisParams> = {}): Sy
disableNormalization: false,
disableContrast: false,
exactBinMapping: false,
...overrides
...overrides,
}
}
@@ -524,4 +531,4 @@ export function synthesizeCustom(
const customParams = createCustomParams(params)
const synthesizer = new ImageToAudioSynthesizer(customParams)
return synthesizer.synthesize(imageData)
}
}

View File

@@ -32,4 +32,4 @@ export interface SynthesisResult {
audio: Float32Array
sampleRate: number
duration: number
}
}

View File

@@ -27,8 +27,9 @@ export function barkToHz(bark: number): number {
let freq = 1000 // Initial guess
for (let i = 0; i < 10; i++) {
const barkEst = hzToBark(freq)
const derivative = 13 * 0.00076 / (1 + Math.pow(0.00076 * freq, 2)) +
3.5 * 2 * (freq / 7500) * (1 / 7500) / (1 + Math.pow(freq / 7500, 4))
const derivative =
(13 * 0.00076) / (1 + Math.pow(0.00076 * freq, 2)) +
(3.5 * 2 * (freq / 7500) * (1 / 7500)) / (1 + Math.pow(freq / 7500, 4))
freq = freq - (barkEst - bark) / derivative
if (Math.abs(hzToBark(freq) - bark) < 0.001) break
}
@@ -58,7 +59,11 @@ export function applyAmplitudeCurve(amplitude: number, curve: string, gamma: num
/**
* Apply soft thresholding using tanh function
*/
export function applySoftThreshold(amplitude: number, threshold: number, softness: number = 0.1): number {
export function applySoftThreshold(
amplitude: number,
threshold: number,
softness: number = 0.1
): number {
if (threshold <= 0) return amplitude
const ratio = amplitude / threshold
@@ -76,7 +81,13 @@ export function applySoftThreshold(amplitude: number, threshold: number, softnes
/**
* Map frequency using specified scale
*/
export function mapFrequency(row: number, totalRows: number, minFreq: number, maxFreq: number, scale: string): number {
export function mapFrequency(
row: number,
totalRows: number,
minFreq: number,
maxFreq: number,
scale: string
): number {
const normalizedRow = row / (totalRows - 1)
switch (scale) {
@@ -109,7 +120,11 @@ export function mapFrequency(row: number, totalRows: number, minFreq: number, ma
/**
* Detect spectral peaks in amplitude spectrum with optional smoothing
*/
export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01, useSmoothing: boolean = false): number[] {
export function detectSpectralPeaks(
spectrum: number[],
threshold: number = 0.01,
useSmoothing: boolean = false
): number[] {
if (useSmoothing) {
return detectSmoothSpectralPeaks(spectrum, threshold)
}
@@ -126,9 +141,7 @@ export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01
// Fallback: use local maxima with lower threshold if no peaks found
if (peaks.length === 0) {
for (let i = 1; i < spectrum.length - 1; i++) {
if (spectrum[i] > spectrum[i - 1] &&
spectrum[i] > spectrum[i + 1] &&
spectrum[i] > 0.001) {
if (spectrum[i] > spectrum[i - 1] && spectrum[i] > spectrum[i + 1] && spectrum[i] > 0.001) {
peaks.push(i)
}
}
@@ -148,12 +161,13 @@ export function detectSmoothSpectralPeaks(spectrum: number[], threshold: number
for (let i = 2; i < smoothedSpectrum.length - 2; i++) {
const current = smoothedSpectrum[i]
if (current > threshold &&
current > smoothedSpectrum[i - 1] &&
current > smoothedSpectrum[i + 1] &&
current > smoothedSpectrum[i - 2] &&
current > smoothedSpectrum[i + 2]) {
if (
current > threshold &&
current > smoothedSpectrum[i - 1] &&
current > smoothedSpectrum[i + 1] &&
current > smoothedSpectrum[i - 2] &&
current > smoothedSpectrum[i + 2]
) {
// Find the exact peak position with sub-bin accuracy using parabolic interpolation
const y1 = smoothedSpectrum[i - 1]
const y2 = smoothedSpectrum[i]
@@ -199,7 +213,11 @@ function smoothSpectrum(spectrum: number[], windowSize: number): number[] {
let sum = 0
let count = 0
for (let j = Math.max(0, i - halfWindow); j <= Math.min(spectrum.length - 1, i + halfWindow); j++) {
for (
let j = Math.max(0, i - halfWindow);
j <= Math.min(spectrum.length - 1, i + halfWindow);
j++
) {
sum += spectrum[j]
count++
}
@@ -213,7 +231,11 @@ function smoothSpectrum(spectrum: number[], windowSize: number): number[] {
/**
* Apply perceptual amplitude weighting with contrast control
*/
export function perceptualAmplitudeWeighting(freq: number, amplitude: number, contrast: number = 2.2): number {
export function perceptualAmplitudeWeighting(
freq: number,
amplitude: number,
contrast: number = 2.2
): number {
// Apply contrast curve first (like LeviBorodenko's approach)
const contrastedAmplitude = Math.pow(amplitude, contrast)
@@ -223,8 +245,6 @@ export function perceptualAmplitudeWeighting(freq: number, amplitude: number, co
return contrastedAmplitude * weight
}
/**
* Generate spectral density by creating multiple tones per frequency bin
* Inspired by LeviBorodenko's multi-tone approach
@@ -239,13 +259,13 @@ export function generateSpectralDensity(
const toneSpacing = bandwidth / numTones
for (let i = 0; i < numTones; i++) {
const freq = centerFreq + (i - numTones/2) * toneSpacing
const toneAmplitude = amplitude * (1 - Math.abs(i - numTones/2) / numTones * 0.3) // Slight amplitude variation
const freq = centerFreq + (i - numTones / 2) * toneSpacing
const toneAmplitude = amplitude * (1 - (Math.abs(i - numTones / 2) / numTones) * 0.3) // Slight amplitude variation
peaks.push({
frequency: freq,
amplitude: toneAmplitude,
phase: 0
phase: 0,
})
}
@@ -328,7 +348,8 @@ export function analyzeImageBrightness(imageData: ImageData): {
const avgEdgeBrightness = edgePixels > 0 ? edgeBrightness / edgePixels : meanBrightness
// Calculate contrast (standard deviation)
const variance = brightnesses.reduce((sum, b) => sum + Math.pow(b - meanBrightness, 2), 0) / brightnesses.length
const variance =
brightnesses.reduce((sum, b) => sum + Math.pow(b - meanBrightness, 2), 0) / brightnesses.length
const contrast = Math.sqrt(variance)
// Make recommendation
@@ -346,7 +367,7 @@ export function analyzeImageBrightness(imageData: ImageData): {
medianBrightness,
edgeBrightness: avgEdgeBrightness,
contrast,
recommendation
recommendation,
}
}
@@ -366,19 +387,19 @@ export function generateWindow(length: number, windowType: string): Float32Array
switch (windowType) {
case 'hann':
for (let i = 0; i < length; i++) {
window[i] = 0.5 * (1 - Math.cos(2 * Math.PI * i / (length - 1)))
window[i] = 0.5 * (1 - Math.cos((2 * Math.PI * i) / (length - 1)))
}
break
case 'hamming':
for (let i = 0; i < length; i++) {
window[i] = 0.54 - 0.46 * Math.cos(2 * Math.PI * i / (length - 1))
window[i] = 0.54 - 0.46 * Math.cos((2 * Math.PI * i) / (length - 1))
}
break
case 'blackman':
for (let i = 0; i < length; i++) {
const factor = 2 * Math.PI * i / (length - 1)
const factor = (2 * Math.PI * i) / (length - 1)
window[i] = 0.42 - 0.5 * Math.cos(factor) + 0.08 * Math.cos(2 * factor)
}
break
@@ -450,7 +471,12 @@ export function extractSpectrum(
/**
* Alternative linear frequency mapping inspired by alexadam's approach
*/
export function mapFrequencyLinear(row: number, totalRows: number, minFreq: number, maxFreq: number): number {
export function mapFrequencyLinear(
row: number,
totalRows: number,
minFreq: number,
maxFreq: number
): number {
// Direct linear mapping from top to bottom (high freq at top)
const normalizedRow = row / (totalRows - 1)
return maxFreq - normalizedRow * (maxFreq - minFreq)
@@ -481,4 +507,4 @@ export function normalizeAudioGlobal(audio: Float32Array, targetLevel: number =
}
return normalized
}
}

View File

@@ -5,7 +5,7 @@ export {
createDirectParams,
createCustomParams,
synthesizeDirect,
synthesizeCustom
synthesizeCustom,
} from './core/synthesizer'
export type { SynthesisParams, SpectralPeak, SynthesisResult, WindowType } from './core/types'
@@ -25,14 +25,9 @@ export {
mapFrequency,
mapFrequencyLinear,
normalizeAudioGlobal,
generateSpectralDensity
generateSpectralDensity,
} from './core/utils'
// Audio export
export {
createWAVBuffer,
downloadWAV,
playAudio,
createAudioPlayer
} from './audio/export'
export type { AudioPlayer } from './audio/export'
export { createWAVBuffer, downloadWAV, playAudio, createAudioPlayer } from './audio/export'
export type { AudioPlayer } from './audio/export'