Cleaning the codebase
This commit is contained in:
@@ -23,7 +23,7 @@ const audioData = synthesizeFromImage(imageData, {
|
||||
duration: 10,
|
||||
minFreq: 100,
|
||||
maxFreq: 10000,
|
||||
maxPartials: 200
|
||||
maxPartials: 200,
|
||||
})
|
||||
|
||||
// Export as WAV
|
||||
@@ -35,6 +35,7 @@ downloadWAV(audioData, 44100, 'my-audio.wav')
|
||||
### Main Functions
|
||||
|
||||
#### `synthesizeFromImage(imageData, params?)`
|
||||
|
||||
- **imageData**: `ImageData` - Canvas image data
|
||||
- **params**: `Partial<SynthesisParams>` - Optional parameters
|
||||
- **Returns**: `Float32Array` - Audio samples
|
||||
@@ -42,16 +43,17 @@ downloadWAV(audioData, 44100, 'my-audio.wav')
|
||||
### Types
|
||||
|
||||
#### `SynthesisParams`
|
||||
|
||||
```typescript
|
||||
interface SynthesisParams {
|
||||
duration: number // Audio duration in seconds
|
||||
minFreq: number // Minimum frequency in Hz
|
||||
maxFreq: number // Maximum frequency in Hz
|
||||
sampleRate: number // Sample rate in Hz
|
||||
duration: number // Audio duration in seconds
|
||||
minFreq: number // Minimum frequency in Hz
|
||||
maxFreq: number // Maximum frequency in Hz
|
||||
sampleRate: number // Sample rate in Hz
|
||||
frequencyResolution: number // Frequency bin downsampling
|
||||
timeResolution: number // Time slice downsampling
|
||||
timeResolution: number // Time slice downsampling
|
||||
amplitudeThreshold: number // Minimum amplitude threshold
|
||||
maxPartials: number // Maximum simultaneous partials
|
||||
maxPartials: number // Maximum simultaneous partials
|
||||
}
|
||||
```
|
||||
|
||||
@@ -80,6 +82,7 @@ spectral-synthesis/
|
||||
## Usage Examples
|
||||
|
||||
### Basic Synthesis
|
||||
|
||||
```typescript
|
||||
const canvas = document.createElement('canvas')
|
||||
const ctx = canvas.getContext('2d')
|
||||
@@ -89,14 +92,15 @@ const audio = synthesizeFromImage(imageData)
|
||||
```
|
||||
|
||||
### Advanced Usage
|
||||
|
||||
```typescript
|
||||
import { ImageToAudioSynthesizer } from './spectral-synthesis'
|
||||
|
||||
const synthesizer = new ImageToAudioSynthesizer({
|
||||
duration: 5,
|
||||
maxPartials: 150
|
||||
maxPartials: 150,
|
||||
})
|
||||
|
||||
const result = synthesizer.synthesize(imageData)
|
||||
console.log(`Generated ${result.duration}s of audio`)
|
||||
```
|
||||
```
|
||||
|
||||
@@ -25,7 +25,7 @@ export function createWAVBuffer(audioData: Float32Array, sampleRate: number): Ar
|
||||
let offset = 44
|
||||
for (let i = 0; i < length; i++) {
|
||||
const sample = Math.max(-1, Math.min(1, audioData[i]))
|
||||
view.setInt16(offset, sample * 0x7FFF, true)
|
||||
view.setInt16(offset, sample * 0x7fff, true)
|
||||
offset += 2
|
||||
}
|
||||
|
||||
@@ -83,7 +83,9 @@ export function createAudioPlayer(audioData: Float32Array, sampleRate: number):
|
||||
gainNode.connect(audioContext.destination)
|
||||
|
||||
if (audioContext.sampleRate !== sampleRate) {
|
||||
console.warn(`Audio context sample rate (${audioContext.sampleRate}) differs from data sample rate (${sampleRate})`)
|
||||
console.warn(
|
||||
`Audio context sample rate (${audioContext.sampleRate}) differs from data sample rate (${sampleRate})`
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -102,7 +104,8 @@ export function createAudioPlayer(audioData: Float32Array, sampleRate: number):
|
||||
|
||||
if (isPaused) {
|
||||
// Resume from pause is not supported with AudioBufferSource
|
||||
// We need to restart from the beginning
|
||||
// We need to restart from the beginning (pausedAt was ${pausedAt}s)
|
||||
void pausedAt // Track for future resume feature
|
||||
isPaused = false
|
||||
pausedAt = 0
|
||||
}
|
||||
@@ -163,7 +166,7 @@ export function createAudioPlayer(audioData: Float32Array, sampleRate: number):
|
||||
|
||||
onStateChange(callback: (isPlaying: boolean) => void) {
|
||||
stateCallback = callback
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -174,11 +177,11 @@ export async function playAudio(audioData: Float32Array, sampleRate: number): Pr
|
||||
const player = createAudioPlayer(audioData, sampleRate)
|
||||
|
||||
return new Promise(resolve => {
|
||||
player.onStateChange((isPlaying) => {
|
||||
player.onStateChange(isPlaying => {
|
||||
if (!isPlaying) {
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
player.play()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,7 +9,7 @@ import {
|
||||
generateSpectralDensity,
|
||||
mapFrequency,
|
||||
mapFrequencyLinear,
|
||||
normalizeAudioGlobal
|
||||
normalizeAudioGlobal,
|
||||
} from './utils'
|
||||
|
||||
/**
|
||||
@@ -78,7 +78,7 @@ export class ImageToAudioSynthesizer {
|
||||
disableNormalization: false,
|
||||
disableContrast: false,
|
||||
exactBinMapping: false,
|
||||
...params
|
||||
...params,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +93,6 @@ export class ImageToAudioSynthesizer {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Custom synthesis mode - sophisticated audio processing
|
||||
*/
|
||||
@@ -112,7 +111,7 @@ export class ImageToAudioSynthesizer {
|
||||
spectralDensity,
|
||||
usePerceptualWeighting,
|
||||
frequencyMapping,
|
||||
invert = false
|
||||
invert = false,
|
||||
} = this.params
|
||||
|
||||
// Calculate synthesis parameters
|
||||
@@ -136,7 +135,6 @@ export class ImageToAudioSynthesizer {
|
||||
const previousAmplitudes = new Float32Array(effectiveHeight)
|
||||
const smoothingFactor = 0.2 // Reduced for sharper transients
|
||||
|
||||
|
||||
// Process each time slice
|
||||
for (let col = 0; col < effectiveWidth; col++) {
|
||||
const sourceCol = col
|
||||
@@ -160,9 +158,12 @@ export class ImageToAudioSynthesizer {
|
||||
return normalizedDb
|
||||
})
|
||||
|
||||
|
||||
// Detect spectral peaks
|
||||
const peaks = detectSpectralPeaks(processedSpectrum, Math.min(amplitudeThreshold, 0.01), false)
|
||||
const peaks = detectSpectralPeaks(
|
||||
processedSpectrum,
|
||||
Math.min(amplitudeThreshold, 0.01),
|
||||
false
|
||||
)
|
||||
|
||||
// Generate partials from peaks with spectral density
|
||||
const partials: SpectralPeak[] = []
|
||||
@@ -176,14 +177,21 @@ export class ImageToAudioSynthesizer {
|
||||
} else if (frequencyMapping === 'linear') {
|
||||
frequency = mapFrequencyLinear(peakRow, effectiveHeight, minFreq, maxFreq)
|
||||
} else {
|
||||
frequency = mapFrequency(peakRow, effectiveHeight, minFreq, maxFreq, frequencyMapping || 'mel')
|
||||
frequency = mapFrequency(
|
||||
peakRow,
|
||||
effectiveHeight,
|
||||
minFreq,
|
||||
maxFreq,
|
||||
frequencyMapping || 'mel'
|
||||
)
|
||||
}
|
||||
|
||||
let amplitude = processedSpectrum[peakRow]
|
||||
|
||||
// Apply temporal smoothing
|
||||
if (col > 0) {
|
||||
amplitude = smoothingFactor * previousAmplitudes[peakRow] + (1 - smoothingFactor) * amplitude
|
||||
amplitude =
|
||||
smoothingFactor * previousAmplitudes[peakRow] + (1 - smoothingFactor) * amplitude
|
||||
}
|
||||
previousAmplitudes[peakRow] = amplitude
|
||||
|
||||
@@ -236,7 +244,7 @@ export class ImageToAudioSynthesizer {
|
||||
return {
|
||||
audio: normalizedAudio,
|
||||
sampleRate,
|
||||
duration
|
||||
duration,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -256,7 +264,7 @@ export class ImageToAudioSynthesizer {
|
||||
disableNormalization = false,
|
||||
disableContrast = false,
|
||||
exactBinMapping = true,
|
||||
invert = false
|
||||
invert = false,
|
||||
} = this.params
|
||||
|
||||
const totalSamples = Math.floor(duration * sampleRate)
|
||||
@@ -282,7 +290,6 @@ export class ImageToAudioSynthesizer {
|
||||
}
|
||||
}
|
||||
// Map image rows to these exact bins
|
||||
console.log(`Ultra-precise mode: Using ${freqBins.length} exact FFT bins from ${minFreq}Hz to ${maxFreq}Hz`)
|
||||
} else {
|
||||
// Linear frequency mapping
|
||||
freqBins = []
|
||||
@@ -339,16 +346,16 @@ export class ImageToAudioSynthesizer {
|
||||
const contrast = this.params.contrast || 1.0
|
||||
// Fast power optimization for common cases
|
||||
if (contrast === 1.0) {
|
||||
amplitude = intensity // No contrast
|
||||
amplitude = intensity // No contrast
|
||||
} else if (contrast === 2.0) {
|
||||
amplitude = intensity * intensity // Square is much faster than Math.pow
|
||||
amplitude = intensity * intensity // Square is much faster than Math.pow
|
||||
} else if (contrast === 0.5) {
|
||||
amplitude = Math.sqrt(intensity) // Square root is faster than Math.pow
|
||||
amplitude = Math.sqrt(intensity) // Square root is faster than Math.pow
|
||||
} else if (contrast === 3.0) {
|
||||
amplitude = intensity * intensity * intensity // Cube
|
||||
amplitude = intensity * intensity * intensity // Cube
|
||||
} else if (contrast === 4.0) {
|
||||
const sq = intensity * intensity
|
||||
amplitude = sq * sq // Fourth power
|
||||
amplitude = sq * sq // Fourth power
|
||||
} else {
|
||||
// Fast power approximation for arbitrary values
|
||||
// Uses bit manipulation + lookup for ~10x speedup over Math.pow
|
||||
@@ -380,8 +387,8 @@ export class ImageToAudioSynthesizer {
|
||||
|
||||
// Phase increment method - mathematically identical but much faster
|
||||
// Eliminates array lookups and multiplications in tight loop
|
||||
let phase = freqCoeff * startSample / sampleRate // Initial phase
|
||||
const phaseIncrement = freqCoeff / sampleRate // Phase per sample
|
||||
let phase = (freqCoeff * startSample) / sampleRate // Initial phase
|
||||
const phaseIncrement = freqCoeff / sampleRate // Phase per sample
|
||||
for (let i = 0; i < frameLength; i++) {
|
||||
columnSpectrum[i] += amplitude * Math.sin(phase)
|
||||
phase += phaseIncrement
|
||||
@@ -415,7 +422,7 @@ export class ImageToAudioSynthesizer {
|
||||
return {
|
||||
audio,
|
||||
sampleRate,
|
||||
duration
|
||||
duration,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -470,7 +477,7 @@ export function createDirectParams(overrides: Partial<SynthesisParams> = {}): Sy
|
||||
disableNormalization: false,
|
||||
disableContrast: false,
|
||||
exactBinMapping: false,
|
||||
...overrides
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -498,7 +505,7 @@ export function createCustomParams(overrides: Partial<SynthesisParams> = {}): Sy
|
||||
disableNormalization: false,
|
||||
disableContrast: false,
|
||||
exactBinMapping: false,
|
||||
...overrides
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -524,4 +531,4 @@ export function synthesizeCustom(
|
||||
const customParams = createCustomParams(params)
|
||||
const synthesizer = new ImageToAudioSynthesizer(customParams)
|
||||
return synthesizer.synthesize(imageData)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,4 +32,4 @@ export interface SynthesisResult {
|
||||
audio: Float32Array
|
||||
sampleRate: number
|
||||
duration: number
|
||||
}
|
||||
}
|
||||
|
||||
@@ -27,8 +27,9 @@ export function barkToHz(bark: number): number {
|
||||
let freq = 1000 // Initial guess
|
||||
for (let i = 0; i < 10; i++) {
|
||||
const barkEst = hzToBark(freq)
|
||||
const derivative = 13 * 0.00076 / (1 + Math.pow(0.00076 * freq, 2)) +
|
||||
3.5 * 2 * (freq / 7500) * (1 / 7500) / (1 + Math.pow(freq / 7500, 4))
|
||||
const derivative =
|
||||
(13 * 0.00076) / (1 + Math.pow(0.00076 * freq, 2)) +
|
||||
(3.5 * 2 * (freq / 7500) * (1 / 7500)) / (1 + Math.pow(freq / 7500, 4))
|
||||
freq = freq - (barkEst - bark) / derivative
|
||||
if (Math.abs(hzToBark(freq) - bark) < 0.001) break
|
||||
}
|
||||
@@ -58,7 +59,11 @@ export function applyAmplitudeCurve(amplitude: number, curve: string, gamma: num
|
||||
/**
|
||||
* Apply soft thresholding using tanh function
|
||||
*/
|
||||
export function applySoftThreshold(amplitude: number, threshold: number, softness: number = 0.1): number {
|
||||
export function applySoftThreshold(
|
||||
amplitude: number,
|
||||
threshold: number,
|
||||
softness: number = 0.1
|
||||
): number {
|
||||
if (threshold <= 0) return amplitude
|
||||
|
||||
const ratio = amplitude / threshold
|
||||
@@ -76,7 +81,13 @@ export function applySoftThreshold(amplitude: number, threshold: number, softnes
|
||||
/**
|
||||
* Map frequency using specified scale
|
||||
*/
|
||||
export function mapFrequency(row: number, totalRows: number, minFreq: number, maxFreq: number, scale: string): number {
|
||||
export function mapFrequency(
|
||||
row: number,
|
||||
totalRows: number,
|
||||
minFreq: number,
|
||||
maxFreq: number,
|
||||
scale: string
|
||||
): number {
|
||||
const normalizedRow = row / (totalRows - 1)
|
||||
|
||||
switch (scale) {
|
||||
@@ -109,7 +120,11 @@ export function mapFrequency(row: number, totalRows: number, minFreq: number, ma
|
||||
/**
|
||||
* Detect spectral peaks in amplitude spectrum with optional smoothing
|
||||
*/
|
||||
export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01, useSmoothing: boolean = false): number[] {
|
||||
export function detectSpectralPeaks(
|
||||
spectrum: number[],
|
||||
threshold: number = 0.01,
|
||||
useSmoothing: boolean = false
|
||||
): number[] {
|
||||
if (useSmoothing) {
|
||||
return detectSmoothSpectralPeaks(spectrum, threshold)
|
||||
}
|
||||
@@ -126,9 +141,7 @@ export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01
|
||||
// Fallback: use local maxima with lower threshold if no peaks found
|
||||
if (peaks.length === 0) {
|
||||
for (let i = 1; i < spectrum.length - 1; i++) {
|
||||
if (spectrum[i] > spectrum[i - 1] &&
|
||||
spectrum[i] > spectrum[i + 1] &&
|
||||
spectrum[i] > 0.001) {
|
||||
if (spectrum[i] > spectrum[i - 1] && spectrum[i] > spectrum[i + 1] && spectrum[i] > 0.001) {
|
||||
peaks.push(i)
|
||||
}
|
||||
}
|
||||
@@ -148,12 +161,13 @@ export function detectSmoothSpectralPeaks(spectrum: number[], threshold: number
|
||||
for (let i = 2; i < smoothedSpectrum.length - 2; i++) {
|
||||
const current = smoothedSpectrum[i]
|
||||
|
||||
if (current > threshold &&
|
||||
current > smoothedSpectrum[i - 1] &&
|
||||
current > smoothedSpectrum[i + 1] &&
|
||||
current > smoothedSpectrum[i - 2] &&
|
||||
current > smoothedSpectrum[i + 2]) {
|
||||
|
||||
if (
|
||||
current > threshold &&
|
||||
current > smoothedSpectrum[i - 1] &&
|
||||
current > smoothedSpectrum[i + 1] &&
|
||||
current > smoothedSpectrum[i - 2] &&
|
||||
current > smoothedSpectrum[i + 2]
|
||||
) {
|
||||
// Find the exact peak position with sub-bin accuracy using parabolic interpolation
|
||||
const y1 = smoothedSpectrum[i - 1]
|
||||
const y2 = smoothedSpectrum[i]
|
||||
@@ -199,7 +213,11 @@ function smoothSpectrum(spectrum: number[], windowSize: number): number[] {
|
||||
let sum = 0
|
||||
let count = 0
|
||||
|
||||
for (let j = Math.max(0, i - halfWindow); j <= Math.min(spectrum.length - 1, i + halfWindow); j++) {
|
||||
for (
|
||||
let j = Math.max(0, i - halfWindow);
|
||||
j <= Math.min(spectrum.length - 1, i + halfWindow);
|
||||
j++
|
||||
) {
|
||||
sum += spectrum[j]
|
||||
count++
|
||||
}
|
||||
@@ -213,7 +231,11 @@ function smoothSpectrum(spectrum: number[], windowSize: number): number[] {
|
||||
/**
|
||||
* Apply perceptual amplitude weighting with contrast control
|
||||
*/
|
||||
export function perceptualAmplitudeWeighting(freq: number, amplitude: number, contrast: number = 2.2): number {
|
||||
export function perceptualAmplitudeWeighting(
|
||||
freq: number,
|
||||
amplitude: number,
|
||||
contrast: number = 2.2
|
||||
): number {
|
||||
// Apply contrast curve first (like LeviBorodenko's approach)
|
||||
const contrastedAmplitude = Math.pow(amplitude, contrast)
|
||||
|
||||
@@ -223,8 +245,6 @@ export function perceptualAmplitudeWeighting(freq: number, amplitude: number, co
|
||||
return contrastedAmplitude * weight
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Generate spectral density by creating multiple tones per frequency bin
|
||||
* Inspired by LeviBorodenko's multi-tone approach
|
||||
@@ -239,13 +259,13 @@ export function generateSpectralDensity(
|
||||
const toneSpacing = bandwidth / numTones
|
||||
|
||||
for (let i = 0; i < numTones; i++) {
|
||||
const freq = centerFreq + (i - numTones/2) * toneSpacing
|
||||
const toneAmplitude = amplitude * (1 - Math.abs(i - numTones/2) / numTones * 0.3) // Slight amplitude variation
|
||||
const freq = centerFreq + (i - numTones / 2) * toneSpacing
|
||||
const toneAmplitude = amplitude * (1 - (Math.abs(i - numTones / 2) / numTones) * 0.3) // Slight amplitude variation
|
||||
|
||||
peaks.push({
|
||||
frequency: freq,
|
||||
amplitude: toneAmplitude,
|
||||
phase: 0
|
||||
phase: 0,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -328,7 +348,8 @@ export function analyzeImageBrightness(imageData: ImageData): {
|
||||
const avgEdgeBrightness = edgePixels > 0 ? edgeBrightness / edgePixels : meanBrightness
|
||||
|
||||
// Calculate contrast (standard deviation)
|
||||
const variance = brightnesses.reduce((sum, b) => sum + Math.pow(b - meanBrightness, 2), 0) / brightnesses.length
|
||||
const variance =
|
||||
brightnesses.reduce((sum, b) => sum + Math.pow(b - meanBrightness, 2), 0) / brightnesses.length
|
||||
const contrast = Math.sqrt(variance)
|
||||
|
||||
// Make recommendation
|
||||
@@ -346,7 +367,7 @@ export function analyzeImageBrightness(imageData: ImageData): {
|
||||
medianBrightness,
|
||||
edgeBrightness: avgEdgeBrightness,
|
||||
contrast,
|
||||
recommendation
|
||||
recommendation,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -366,19 +387,19 @@ export function generateWindow(length: number, windowType: string): Float32Array
|
||||
switch (windowType) {
|
||||
case 'hann':
|
||||
for (let i = 0; i < length; i++) {
|
||||
window[i] = 0.5 * (1 - Math.cos(2 * Math.PI * i / (length - 1)))
|
||||
window[i] = 0.5 * (1 - Math.cos((2 * Math.PI * i) / (length - 1)))
|
||||
}
|
||||
break
|
||||
|
||||
case 'hamming':
|
||||
for (let i = 0; i < length; i++) {
|
||||
window[i] = 0.54 - 0.46 * Math.cos(2 * Math.PI * i / (length - 1))
|
||||
window[i] = 0.54 - 0.46 * Math.cos((2 * Math.PI * i) / (length - 1))
|
||||
}
|
||||
break
|
||||
|
||||
case 'blackman':
|
||||
for (let i = 0; i < length; i++) {
|
||||
const factor = 2 * Math.PI * i / (length - 1)
|
||||
const factor = (2 * Math.PI * i) / (length - 1)
|
||||
window[i] = 0.42 - 0.5 * Math.cos(factor) + 0.08 * Math.cos(2 * factor)
|
||||
}
|
||||
break
|
||||
@@ -450,7 +471,12 @@ export function extractSpectrum(
|
||||
/**
|
||||
* Alternative linear frequency mapping inspired by alexadam's approach
|
||||
*/
|
||||
export function mapFrequencyLinear(row: number, totalRows: number, minFreq: number, maxFreq: number): number {
|
||||
export function mapFrequencyLinear(
|
||||
row: number,
|
||||
totalRows: number,
|
||||
minFreq: number,
|
||||
maxFreq: number
|
||||
): number {
|
||||
// Direct linear mapping from top to bottom (high freq at top)
|
||||
const normalizedRow = row / (totalRows - 1)
|
||||
return maxFreq - normalizedRow * (maxFreq - minFreq)
|
||||
@@ -481,4 +507,4 @@ export function normalizeAudioGlobal(audio: Float32Array, targetLevel: number =
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ export {
|
||||
createDirectParams,
|
||||
createCustomParams,
|
||||
synthesizeDirect,
|
||||
synthesizeCustom
|
||||
synthesizeCustom,
|
||||
} from './core/synthesizer'
|
||||
export type { SynthesisParams, SpectralPeak, SynthesisResult, WindowType } from './core/types'
|
||||
|
||||
@@ -25,14 +25,9 @@ export {
|
||||
mapFrequency,
|
||||
mapFrequencyLinear,
|
||||
normalizeAudioGlobal,
|
||||
generateSpectralDensity
|
||||
generateSpectralDensity,
|
||||
} from './core/utils'
|
||||
|
||||
// Audio export
|
||||
export {
|
||||
createWAVBuffer,
|
||||
downloadWAV,
|
||||
playAudio,
|
||||
createAudioPlayer
|
||||
} from './audio/export'
|
||||
export type { AudioPlayer } from './audio/export'
|
||||
export { createWAVBuffer, downloadWAV, playAudio, createAudioPlayer } from './audio/export'
|
||||
export type { AudioPlayer } from './audio/export'
|
||||
|
||||
Reference in New Issue
Block a user