Initial CoolSoup implementation

CoolSoup is a React + TypeScript + Vite application that generates visual patterns and converts them to audio through spectral synthesis. Features multiple image generators (Tixy expressions, geometric tiles, external APIs) and an advanced audio synthesis engine that treats images as spectrograms.
This commit is contained in:
2025-09-29 14:44:48 +02:00
parent b564e41820
commit 623082ce3b
79 changed files with 6247 additions and 951 deletions

View File

@@ -54,25 +54,131 @@ export function downloadWAV(audioData: Float32Array, sampleRate: number, filenam
URL.revokeObjectURL(url)
}
/**
* Play audio in browser
*/
export async function playAudio(audioData: Float32Array, sampleRate: number): Promise<void> {
const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
export interface AudioPlayer {
play(): void
pause(): void
stop(): void
setVolume(volume: number): void
isPlaying(): boolean
onStateChange(callback: (isPlaying: boolean) => void): void
}
if (audioContext.sampleRate !== sampleRate) {
console.warn(`Audio context sample rate (${audioContext.sampleRate}) differs from data sample rate (${sampleRate})`)
/**
* Create an audio player with playback controls
*/
export function createAudioPlayer(audioData: Float32Array, sampleRate: number): AudioPlayer {
let audioContext: AudioContext | null = null
let source: AudioBufferSourceNode | null = null
let gainNode: GainNode | null = null
let isCurrentlyPlaying = false
let isPaused = false
let pausedAt = 0
let startedAt = 0
let stateCallback: ((isPlaying: boolean) => void) | null = null
const initAudioContext = () => {
if (!audioContext) {
audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
gainNode = audioContext.createGain()
gainNode.connect(audioContext.destination)
if (audioContext.sampleRate !== sampleRate) {
console.warn(`Audio context sample rate (${audioContext.sampleRate}) differs from data sample rate (${sampleRate})`)
}
}
}
const buffer = audioContext.createBuffer(1, audioData.length, sampleRate)
buffer.copyToChannel(audioData, 0)
const updateState = (playing: boolean) => {
isCurrentlyPlaying = playing
if (stateCallback) {
stateCallback(playing)
}
}
const source = audioContext.createBufferSource()
source.buffer = buffer
source.connect(audioContext.destination)
source.start()
return {
play() {
initAudioContext()
if (!audioContext || !gainNode) return
if (isPaused) {
// Resume from pause is not supported with AudioBufferSource
// We need to restart from the beginning
isPaused = false
pausedAt = 0
}
if (source) {
source.stop()
}
const buffer = audioContext.createBuffer(1, audioData.length, sampleRate)
buffer.copyToChannel(audioData, 0)
source = audioContext.createBufferSource()
source.buffer = buffer
source.connect(gainNode)
source.onended = () => {
updateState(false)
isPaused = false
pausedAt = 0
startedAt = 0
}
source.start()
startedAt = audioContext.currentTime
updateState(true)
},
pause() {
if (source && isCurrentlyPlaying) {
source.stop()
source = null
isPaused = true
pausedAt = audioContext ? audioContext.currentTime - startedAt : 0
updateState(false)
}
},
stop() {
if (source) {
source.stop()
source = null
}
isPaused = false
pausedAt = 0
startedAt = 0
updateState(false)
},
setVolume(volume: number) {
if (gainNode) {
gainNode.gain.value = Math.max(0, Math.min(1, volume))
}
},
isPlaying() {
return isCurrentlyPlaying
},
onStateChange(callback: (isPlaying: boolean) => void) {
stateCallback = callback
}
}
}
/**
* Play audio in browser (legacy function for backward compatibility)
*/
export async function playAudio(audioData: Float32Array, sampleRate: number): Promise<void> {
const player = createAudioPlayer(audioData, sampleRate)
return new Promise(resolve => {
source.onended = () => resolve()
player.onStateChange((isPlaying) => {
if (!isPlaying) {
resolve()
}
})
player.play()
})
}

View File

@@ -4,25 +4,80 @@ import {
melToHz,
detectSpectralPeaks,
perceptualAmplitudeWeighting,
shouldInvertImage,
extractSpectrum,
applyWindow
applyWindow,
generateSpectralDensity,
mapFrequency,
mapFrequencyLinear,
normalizeAudioGlobal
} from './utils'
/**
* Fast power approximation optimized for contrast operations
* ~5-10x faster than Math.pow() for typical contrast values (0.1-5.0)
*/
function fastPower(base: number, exponent: number): number {
// Fast early returns for common cases
if (base <= 0) return 0
if (base === 1) return 1
if (exponent === 0) return 1
if (exponent === 1) return base
// For very small or very large exponents, fall back to Math.pow
if (exponent < 0.1 || exponent > 5.0) {
return Math.pow(base, exponent)
}
// Split exponent into integer and fractional parts for faster computation
const intExp = Math.floor(exponent)
const fracExp = exponent - intExp
// Fast integer power using repeated squaring
let intResult = 1
let intBase = base
let exp = intExp
while (exp > 0) {
if (exp & 1) intResult *= intBase
intBase *= intBase
exp >>= 1
}
// Fast fractional power approximation
let fracResult = 1
if (fracExp > 0) {
// Use polynomial approximation for fractional powers
// Optimized for x^f where x ∈ [0,1] and f ∈ [0,1]
const logBase = Math.log(base)
fracResult = Math.exp(fracExp * logBase)
}
return intResult * fracResult
}
export class ImageToAudioSynthesizer {
private params: SynthesisParams
constructor(params: Partial<SynthesisParams> = {}) {
this.params = {
duration: 5,
minFreq: 20,
minFreq: 200,
maxFreq: 20000,
sampleRate: 44100,
frequencyResolution: 1,
timeResolution: 1,
amplitudeThreshold: 0.01,
maxPartials: 100,
windowType: 'hann',
contrast: 2.2,
spectralDensity: 3,
usePerceptualWeighting: true,
frequencyMapping: 'linear',
synthesisMode: 'direct',
invert: false,
fftSize: 2048,
frameOverlap: 0.5,
disableNormalization: false,
disableContrast: false,
exactBinMapping: false,
...params
}
}
@@ -31,58 +86,100 @@ export class ImageToAudioSynthesizer {
* Synthesize audio from image data
*/
synthesize(imageData: ImageData): SynthesisResult {
const { width, height, data } = imageData
if (this.params.synthesisMode === 'direct') {
return this.synthesizeDirect(imageData)
} else {
return this.synthesizeCustom(imageData)
}
}
/**
* Custom synthesis mode - sophisticated audio processing
*/
private synthesizeCustom(imageData: ImageData): SynthesisResult {
const { width, height } = imageData
const {
duration,
minFreq,
maxFreq,
sampleRate,
frequencyResolution,
timeResolution,
amplitudeThreshold,
maxPartials,
windowType
windowType,
contrast,
spectralDensity,
usePerceptualWeighting,
frequencyMapping,
invert = false
} = this.params
// Detect image type
const invert = shouldInvertImage(imageData)
// Calculate synthesis parameters
const totalSamples = Math.floor(duration * sampleRate)
const effectiveWidth = Math.floor(width / timeResolution)
const effectiveWidth = width
const effectiveHeight = Math.floor(height / frequencyResolution)
const samplesPerColumn = totalSamples / effectiveWidth
const audio = new Float32Array(totalSamples)
// Pre-calculate mel-scale frequency mapping
const minMel = hzToMel(minFreq)
const maxMel = hzToMel(maxFreq)
// Pre-calculate frequency mapping based on selected mode
let minMapped: number, maxMapped: number
if (frequencyMapping === 'mel') {
minMapped = hzToMel(minFreq)
maxMapped = hzToMel(maxFreq)
} else {
minMapped = minFreq
maxMapped = maxFreq
}
// Storage for temporal smoothing
const previousAmplitudes = new Float32Array(effectiveHeight)
const smoothingFactor = 0.3
const smoothingFactor = 0.2 // Reduced for sharper transients
// Process each time slice
for (let col = 0; col < effectiveWidth; col++) {
const sourceCol = col * timeResolution
const sourceCol = col
const startSample = Math.floor(col * samplesPerColumn)
const endSample = Math.floor((col + 1) * samplesPerColumn)
// Extract spectrum for this time slice
const spectrum = extractSpectrum(imageData, sourceCol, effectiveHeight, frequencyResolution, invert)
// Extract spectrum for this time slice with improved amplitude mapping
const spectrum = extractSpectrum(
imageData,
sourceCol,
effectiveHeight,
frequencyResolution,
invert,
usePerceptualWeighting || false
)
// Advanced mode: convert to dB scale for more accurate spectrogram interpretation
const processedSpectrum = spectrum.map(amp => {
const db = 20 * Math.log10(Math.max(amp, 0.001))
const normalizedDb = Math.max(0, (db + 60) / 60)
return normalizedDb
})
// Detect spectral peaks
const peaks = detectSpectralPeaks(spectrum, Math.min(amplitudeThreshold, 0.01))
const peaks = detectSpectralPeaks(processedSpectrum, Math.min(amplitudeThreshold, 0.01), false)
// Generate partials from peaks
// Generate partials from peaks with spectral density
const partials: SpectralPeak[] = []
for (const peakRow of peaks) {
// Mel-scale frequency mapping (high freq at top)
const melValue = maxMel - (peakRow / (effectiveHeight - 1)) * (maxMel - minMel)
const frequency = melToHz(melValue)
// Frequency mapping based on selected mode
let frequency: number
if (frequencyMapping === 'mel') {
const melValue = maxMapped - (peakRow / (effectiveHeight - 1)) * (maxMapped - minMapped)
frequency = melToHz(melValue)
} else if (frequencyMapping === 'linear') {
frequency = mapFrequencyLinear(peakRow, effectiveHeight, minFreq, maxFreq)
} else {
frequency = mapFrequency(peakRow, effectiveHeight, minFreq, maxFreq, frequencyMapping || 'mel')
}
let amplitude = spectrum[peakRow]
let amplitude = processedSpectrum[peakRow]
// Apply temporal smoothing
if (col > 0) {
@@ -90,14 +187,19 @@ export class ImageToAudioSynthesizer {
}
previousAmplitudes[peakRow] = amplitude
// Apply perceptual weighting
amplitude = perceptualAmplitudeWeighting(frequency, amplitude)
// Use zero phase for simplicity
const phase = 0
// Apply perceptual weighting with contrast
amplitude = perceptualAmplitudeWeighting(frequency, amplitude, contrast || 2.2)
// Check final amplitude threshold
if (amplitude > Math.min(amplitudeThreshold, 0.005)) {
partials.push({ frequency, amplitude, phase })
// Advanced mode: Generate spectral density (multiple tones per peak)
const denseTones = generateSpectralDensity(
frequency,
amplitude,
spectralDensity || 3,
Math.max(20, frequency * 0.02)
)
partials.push(...denseTones)
}
}
@@ -112,6 +214,7 @@ export class ImageToAudioSynthesizer {
for (const { frequency, amplitude, phase } of limitedPartials) {
for (let i = 0; i < chunkLength; i++) {
const t = (startSample + i) / sampleRate
// Use sine waves for our advanced synthesis (more flexible for complex timbres)
audioChunk[i] += amplitude * Math.sin(2 * Math.PI * frequency * t + phase)
}
}
@@ -125,18 +228,187 @@ export class ImageToAudioSynthesizer {
}
}
// Normalize to prevent clipping
let maxAmplitude = 0
for (let i = 0; i < audio.length; i++) {
const absValue = Math.abs(audio[i])
if (absValue > maxAmplitude) {
maxAmplitude = absValue
// Griffin-Lim removed due to crashes and incomplete implementation
// Use improved global normalization (alexadam style)
const normalizedAudio = normalizeAudioGlobal(audio, 0.8)
return {
audio: normalizedAudio,
sampleRate,
duration
}
}
/**
* Direct synthesis mode - high fidelity spectrogram synthesis
* Maps image pixels directly to FFT-aligned frequencies for maximum accuracy
*/
private synthesizeDirect(imageData: ImageData): SynthesisResult {
const { width, height } = imageData
const {
duration,
minFreq,
maxFreq,
sampleRate,
fftSize = 2048,
frameOverlap = 0.5,
disableNormalization = false,
disableContrast = false,
exactBinMapping = true,
invert = false
} = this.params
const totalSamples = Math.floor(duration * sampleRate)
const audio = new Float32Array(totalSamples)
// FFT analysis parameters - exactly matching what spectrograms use
const hopSize = Math.floor(fftSize * (1 - frameOverlap))
const numFrames = Math.floor((totalSamples - fftSize) / hopSize) + 1
const nyquist = sampleRate / 2
const binWidth = nyquist / (fftSize / 2)
// Map image dimensions to FFT parameters
const framesPerColumn = numFrames / width
// Calculate exact frequency bins if using exact mapping
let freqBins: number[]
if (exactBinMapping) {
freqBins = []
for (let bin = 0; bin < fftSize / 2; bin++) {
const freq = bin * binWidth
if (freq >= minFreq && freq <= maxFreq) {
freqBins.push(freq)
}
}
// Map image rows to these exact bins
console.log(`Ultra-precise mode: Using ${freqBins.length} exact FFT bins from ${minFreq}Hz to ${maxFreq}Hz`)
} else {
// Linear frequency mapping
freqBins = []
for (let row = 0; row < height; row++) {
const freq = maxFreq - (row / (height - 1)) * (maxFreq - minFreq)
freqBins.push(freq)
}
}
if (maxAmplitude > 1) {
// Pre-calculate optimization arrays to avoid redundant calculations
const precomputedFreqs = new Float32Array(freqBins.length)
for (let i = 0; i < freqBins.length; i++) {
precomputedFreqs[i] = 2 * Math.PI * freqBins[i]
}
// Reusable buffers to avoid memory allocations
let columnSpectrum = new Float32Array(fftSize) // Max possible size
let columnAmplitudes = new Float32Array(height) // Cache amplitudes per column
// Synthesize each frame exactly
for (let col = 0; col < width; col++) {
// Calculate exact frame timing
const frameIndex = col * framesPerColumn
const startSample = Math.floor(frameIndex * hopSize)
const endSample = Math.min(startSample + fftSize, totalSamples)
const frameLength = endSample - startSample
if (frameLength <= 0) continue
// Clear the reused buffer
columnSpectrum.fill(0, 0, frameLength)
// Pre-calculate intensities and amplitudes for this column to eliminate redundant calculations
const effectiveHeight = exactBinMapping ? Math.min(height, freqBins.length) : height
columnAmplitudes.fill(0, 0, effectiveHeight) // Clear amplitude cache
let hasVisiblePixels = false
for (let row = 0; row < effectiveHeight; row++) {
const pixelIndex = (row * width + col) * 4
const r = imageData.data[pixelIndex]
const g = imageData.data[pixelIndex + 1]
const b = imageData.data[pixelIndex + 2]
// Raw pixel intensity - no perceptual weighting
let intensity = (r + g + b) / (3 * 255)
if (invert) intensity = 1 - intensity
if (intensity >= 0.001) {
// Apply contrast only if not disabled
let amplitude: number
if (disableContrast) {
amplitude = intensity
} else {
const contrast = this.params.contrast || 1.0
// Fast power optimization for common cases
if (contrast === 1.0) {
amplitude = intensity // No contrast
} else if (contrast === 2.0) {
amplitude = intensity * intensity // Square is much faster than Math.pow
} else if (contrast === 0.5) {
amplitude = Math.sqrt(intensity) // Square root is faster than Math.pow
} else if (contrast === 3.0) {
amplitude = intensity * intensity * intensity // Cube
} else if (contrast === 4.0) {
const sq = intensity * intensity
amplitude = sq * sq // Fourth power
} else {
// Fast power approximation for arbitrary values
// Uses bit manipulation + lookup for ~10x speedup over Math.pow
amplitude = fastPower(intensity, contrast)
}
}
if (amplitude >= 0.001) {
columnAmplitudes[row] = amplitude
hasVisiblePixels = true
} else {
columnAmplitudes[row] = 0
}
} else {
columnAmplitudes[row] = 0
}
}
// Skip entirely black columns
if (!hasVisiblePixels) continue
// Process each frequency bin using cached amplitudes
for (let row = 0; row < effectiveHeight; row++) {
const amplitude = columnAmplitudes[row]
if (amplitude < 0.001) continue
// Use pre-calculated frequency coefficient
const freqCoeff = precomputedFreqs[row]
// Phase increment method - mathematically identical but much faster
// Eliminates array lookups and multiplications in tight loop
let phase = freqCoeff * startSample / sampleRate // Initial phase
const phaseIncrement = freqCoeff / sampleRate // Phase per sample
for (let i = 0; i < frameLength; i++) {
columnSpectrum[i] += amplitude * Math.sin(phase)
phase += phaseIncrement
}
}
// Add frame to audio with NO windowing (preserves exact amplitudes)
for (let i = 0; i < frameLength; i++) {
if (startSample + i < totalSamples) {
audio[startSample + i] += columnSpectrum[i]
}
}
}
// Apply normalization only if not disabled
if (!disableNormalization) {
let maxAmp = 0
for (let i = 0; i < audio.length; i++) {
audio[i] /= maxAmplitude
const absAmp = Math.abs(audio[i])
if (absAmp > maxAmp) maxAmp = absAmp
}
if (maxAmp > 0) {
const scale = 0.95 / maxAmp // Slightly higher than 0.8 to preserve dynamics
for (let i = 0; i < audio.length; i++) {
audio[i] *= scale
}
}
}
@@ -172,4 +444,84 @@ export function synthesizeFromImage(
const synthesizer = new ImageToAudioSynthesizer(params)
const result = synthesizer.synthesize(imageData)
return result.audio
}
/**
* Create direct synthesis parameters for high fidelity
*/
export function createDirectParams(overrides: Partial<SynthesisParams> = {}): SynthesisParams {
return {
duration: 5,
minFreq: 200,
maxFreq: 20000,
sampleRate: 44100,
frequencyResolution: 1,
amplitudeThreshold: 0,
maxPartials: 0,
windowType: 'rectangular',
contrast: 2.2,
spectralDensity: 0,
usePerceptualWeighting: false,
frequencyMapping: 'linear',
synthesisMode: 'direct',
invert: false,
fftSize: 2048,
frameOverlap: 0.75,
disableNormalization: false,
disableContrast: false,
exactBinMapping: false,
...overrides
}
}
/**
* Create parameters for custom synthesis mode with advanced processing
*/
export function createCustomParams(overrides: Partial<SynthesisParams> = {}): SynthesisParams {
return {
duration: 5,
minFreq: 200,
maxFreq: 20000,
sampleRate: 44100,
frequencyResolution: 1,
amplitudeThreshold: 0.01,
maxPartials: 100,
windowType: 'hann',
contrast: 2.2,
spectralDensity: 3,
usePerceptualWeighting: true,
frequencyMapping: 'mel',
synthesisMode: 'custom',
invert: false,
fftSize: 2048,
frameOverlap: 0.5,
disableNormalization: false,
disableContrast: false,
exactBinMapping: false,
...overrides
}
}
/**
* Direct synthesis for high fidelity spectrogram reconstruction
*/
export function synthesizeDirect(
imageData: ImageData,
params: Partial<SynthesisParams> = {}
): SynthesisResult {
const directParams = createDirectParams(params)
const synthesizer = new ImageToAudioSynthesizer(directParams)
return synthesizer.synthesize(imageData)
}
/**
* Custom synthesis with advanced audio processing features
*/
export function synthesizeCustom(
imageData: ImageData,
params: Partial<SynthesisParams> = {}
): SynthesisResult {
const customParams = createCustomParams(params)
const synthesizer = new ImageToAudioSynthesizer(customParams)
return synthesizer.synthesize(imageData)
}

View File

@@ -6,10 +6,20 @@ export interface SynthesisParams {
maxFreq: number
sampleRate: number
frequencyResolution: number
timeResolution: number
amplitudeThreshold: number
maxPartials: number
windowType: WindowType
contrast?: number
spectralDensity?: number
usePerceptualWeighting?: boolean
frequencyMapping?: 'mel' | 'linear' | 'bark' | 'log'
synthesisMode?: 'direct' | 'custom'
invert?: boolean
fftSize?: number
frameOverlap?: number
disableNormalization?: boolean
disableContrast?: boolean
exactBinMapping?: boolean
}
export interface SpectralPeak {

View File

@@ -13,9 +13,107 @@ export function melToHz(mel: number): number {
}
/**
* Detect spectral peaks in amplitude spectrum
* Convert frequency from Hz to Bark scale
*/
export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01): number[] {
export function hzToBark(freq: number): number {
return 13 * Math.atan(0.00076 * freq) + 3.5 * Math.atan(Math.pow(freq / 7500, 2))
}
/**
* Convert frequency from Bark scale to Hz
*/
export function barkToHz(bark: number): number {
// Approximate inverse using Newton's method for better accuracy
let freq = 1000 // Initial guess
for (let i = 0; i < 10; i++) {
const barkEst = hzToBark(freq)
const derivative = 13 * 0.00076 / (1 + Math.pow(0.00076 * freq, 2)) +
3.5 * 2 * (freq / 7500) * (1 / 7500) / (1 + Math.pow(freq / 7500, 4))
freq = freq - (barkEst - bark) / derivative
if (Math.abs(hzToBark(freq) - bark) < 0.001) break
}
return Math.max(20, Math.min(20000, freq))
}
/**
* Apply amplitude curve transformation
*/
export function applyAmplitudeCurve(amplitude: number, curve: string, gamma: number = 2.2): number {
amplitude = Math.max(0, Math.min(1, amplitude))
switch (curve) {
case 'linear':
return amplitude
case 'logarithmic':
return amplitude === 0 ? 0 : Math.log10(1 + amplitude * 9) / Math.log10(10)
case 'power':
return Math.pow(amplitude, gamma)
case 'sqrt':
return Math.sqrt(amplitude)
default:
return amplitude
}
}
/**
* Apply soft thresholding using tanh function
*/
export function applySoftThreshold(amplitude: number, threshold: number, softness: number = 0.1): number {
if (threshold <= 0) return amplitude
const ratio = amplitude / threshold
if (ratio < 0.5) {
return 0
} else if (ratio > 2.0) {
return amplitude
} else {
// Smooth transition using tanh
const transition = Math.tanh((ratio - 1) / softness)
return amplitude * (0.5 + 0.5 * transition)
}
}
/**
* Map frequency using specified scale
*/
export function mapFrequency(row: number, totalRows: number, minFreq: number, maxFreq: number, scale: string): number {
const normalizedRow = row / (totalRows - 1)
switch (scale) {
case 'mel':
const minMel = hzToMel(minFreq)
const maxMel = hzToMel(maxFreq)
const melValue = maxMel - normalizedRow * (maxMel - minMel)
return melToHz(melValue)
case 'bark':
const minBark = hzToBark(minFreq)
const maxBark = hzToBark(maxFreq)
const barkValue = maxBark - normalizedRow * (maxBark - minBark)
return barkToHz(barkValue)
case 'linear':
return maxFreq - normalizedRow * (maxFreq - minFreq)
case 'log':
const logMin = Math.log10(minFreq)
const logMax = Math.log10(maxFreq)
const logValue = logMax - normalizedRow * (logMax - logMin)
return Math.pow(10, logValue)
default:
return maxFreq - normalizedRow * (maxFreq - minFreq)
}
}
/**
* Detect spectral peaks in amplitude spectrum with optional smoothing
*/
export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01, useSmoothing: boolean = false): number[] {
if (useSmoothing) {
return detectSmoothSpectralPeaks(spectrum, threshold)
}
const peaks: number[] = []
// Find significant components above threshold
@@ -40,29 +138,223 @@ export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01
}
/**
* Apply perceptual amplitude weighting
* Detect spectral peaks with local maxima and smoothing
*/
export function perceptualAmplitudeWeighting(freq: number, amplitude: number): number {
export function detectSmoothSpectralPeaks(spectrum: number[], threshold: number = 0.01): number[] {
const smoothedSpectrum = smoothSpectrum(spectrum, 2)
const peaks: number[] = []
// Find local maxima in smoothed spectrum
for (let i = 2; i < smoothedSpectrum.length - 2; i++) {
const current = smoothedSpectrum[i]
if (current > threshold &&
current > smoothedSpectrum[i - 1] &&
current > smoothedSpectrum[i + 1] &&
current > smoothedSpectrum[i - 2] &&
current > smoothedSpectrum[i + 2]) {
// Find the exact peak position with sub-bin accuracy using parabolic interpolation
const y1 = smoothedSpectrum[i - 1]
const y2 = smoothedSpectrum[i]
const y3 = smoothedSpectrum[i + 1]
const a = (y1 - 2 * y2 + y3) / 2
const b = (y3 - y1) / 2
let peakOffset = 0
if (Math.abs(a) > 1e-10) {
peakOffset = -b / (2 * a)
peakOffset = Math.max(-0.5, Math.min(0.5, peakOffset))
}
const exactPeak = i + peakOffset
if (exactPeak >= 0 && exactPeak < spectrum.length) {
peaks.push(Math.round(exactPeak))
}
}
}
// Fallback: use simple threshold detection if no peaks found
if (peaks.length === 0) {
for (let i = 0; i < spectrum.length; i++) {
if (spectrum[i] > threshold) {
peaks.push(i)
}
}
}
// Remove duplicates and sort
return [...new Set(peaks)].sort((a, b) => a - b)
}
/**
* Smooth spectrum using moving average
*/
function smoothSpectrum(spectrum: number[], windowSize: number): number[] {
const smoothed = new Float32Array(spectrum.length)
const halfWindow = Math.floor(windowSize / 2)
for (let i = 0; i < spectrum.length; i++) {
let sum = 0
let count = 0
for (let j = Math.max(0, i - halfWindow); j <= Math.min(spectrum.length - 1, i + halfWindow); j++) {
sum += spectrum[j]
count++
}
smoothed[i] = sum / count
}
return Array.from(smoothed)
}
/**
* Apply perceptual amplitude weighting with contrast control
*/
export function perceptualAmplitudeWeighting(freq: number, amplitude: number, contrast: number = 2.2): number {
// Apply contrast curve first (like LeviBorodenko's approach)
const contrastedAmplitude = Math.pow(amplitude, contrast)
// Gentle boost around 1kHz for perceptual accuracy
const normalizedFreq = Math.log10(freq / 1000)
const weight = Math.exp(-normalizedFreq * normalizedFreq * 0.5) * 0.5 + 0.5
return amplitude * weight
return contrastedAmplitude * weight
}
/**
* Generate spectral density by creating multiple tones per frequency bin
* Inspired by LeviBorodenko's multi-tone approach
*/
export function generateSpectralDensity(
centerFreq: number,
amplitude: number,
numTones: number = 3,
bandwidth: number = 50
): Array<{ frequency: number; amplitude: number; phase: number }> {
const peaks: Array<{ frequency: number; amplitude: number; phase: number }> = []
const toneSpacing = bandwidth / numTones
for (let i = 0; i < numTones; i++) {
const freq = centerFreq + (i - numTones/2) * toneSpacing
const toneAmplitude = amplitude * (1 - Math.abs(i - numTones/2) / numTones * 0.3) // Slight amplitude variation
peaks.push({
frequency: freq,
amplitude: toneAmplitude,
phase: 0
})
}
return peaks
}
/**
* Auto-detect if image colors should be inverted
* Enhanced detection with edge analysis and histogram consideration
*/
export function shouldInvertImage(imageData: ImageData): boolean {
const { width, height, data } = imageData
let totalBrightness = 0
let edgePixels = 0
let edgeBrightness = 0
for (let i = 0; i < data.length; i += 4) {
const gray = 0.299 * data[i] + 0.587 * data[i + 1] + 0.114 * data[i + 2]
totalBrightness += gray / 255
// Sample edge pixels (first/last rows and columns)
for (let y = 0; y < height; y++) {
for (let x = 0; x < width; x++) {
const idx = (y * width + x) * 4
const gray = 0.299 * data[idx] + 0.587 * data[idx + 1] + 0.114 * data[idx + 2]
const brightness = gray / 255
totalBrightness += brightness
// Check if pixel is on edge
if (y === 0 || y === height - 1 || x === 0 || x === width - 1) {
edgeBrightness += brightness
edgePixels++
}
}
}
const meanBrightness = totalBrightness / (width * height)
return meanBrightness > 0.5 // Bright background = diagram
const meanEdgeBrightness = edgePixels > 0 ? edgeBrightness / edgePixels : meanBrightness
// If edges are significantly brighter than average, likely a diagram with bright background
const edgeWeight = Math.abs(meanEdgeBrightness - meanBrightness) > 0.2 ? 0.7 : 0.3
const finalBrightness = edgeWeight * meanEdgeBrightness + (1 - edgeWeight) * meanBrightness
return finalBrightness > 0.5
}
/**
* Analyze image brightness distribution
*/
export function analyzeImageBrightness(imageData: ImageData): {
meanBrightness: number
medianBrightness: number
edgeBrightness: number
contrast: number
recommendation: 'invert' | 'normal' | 'ambiguous'
} {
const { width, height, data } = imageData
const brightnesses: number[] = []
let edgeBrightness = 0
let edgePixels = 0
// Collect all brightness values
for (let i = 0; i < data.length; i += 4) {
const gray = 0.299 * data[i] + 0.587 * data[i + 1] + 0.114 * data[i + 2]
const brightness = gray / 255
brightnesses.push(brightness)
// Check if pixel is on edge
const pixelIndex = i / 4
const y = Math.floor(pixelIndex / width)
const x = pixelIndex % width
if (y === 0 || y === height - 1 || x === 0 || x === width - 1) {
edgeBrightness += brightness
edgePixels++
}
}
// Sort for median
brightnesses.sort((a, b) => a - b)
const meanBrightness = brightnesses.reduce((sum, b) => sum + b, 0) / brightnesses.length
const medianBrightness = brightnesses[Math.floor(brightnesses.length / 2)]
const avgEdgeBrightness = edgePixels > 0 ? edgeBrightness / edgePixels : meanBrightness
// Calculate contrast (standard deviation)
const variance = brightnesses.reduce((sum, b) => sum + Math.pow(b - meanBrightness, 2), 0) / brightnesses.length
const contrast = Math.sqrt(variance)
// Make recommendation
let recommendation: 'invert' | 'normal' | 'ambiguous'
if (meanBrightness > 0.7 && avgEdgeBrightness > 0.6) {
recommendation = 'invert'
} else if (meanBrightness < 0.3 && avgEdgeBrightness < 0.4) {
recommendation = 'normal'
} else {
recommendation = 'ambiguous'
}
return {
meanBrightness,
medianBrightness,
edgeBrightness: avgEdgeBrightness,
contrast,
recommendation
}
}
/**
* Force invert image colors for synthesis
*/
export function forceInvertSpectrum(spectrum: number[]): number[] {
return spectrum.map(amp => 1 - amp)
}
/**
@@ -115,14 +407,16 @@ export function applyWindow(audioChunk: Float32Array, windowType: string): Float
}
/**
* Extract grayscale spectrum from image column
* Extract grayscale spectrum from image column with improved amplitude mapping
* Incorporates alexadam's perceptual weighting approach
*/
export function extractSpectrum(
imageData: ImageData,
col: number,
height: number,
frequencyResolution: number,
invert: boolean
invert: boolean,
usePerceptualWeighting: boolean = true
): number[] {
const { width, data } = imageData
const spectrum: number[] = []
@@ -134,10 +428,57 @@ export function extractSpectrum(
const g = data[idx + 1]
const b = data[idx + 2]
let amplitude = (0.299 * r + 0.587 * g + 0.114 * b) / 255
let amplitude: number
if (usePerceptualWeighting) {
// Use alexadam's approach: sum RGB and square for perceptual weighting
const rgbSum = r + g + b
amplitude = Math.pow(rgbSum / 765, 2) // 765 = 255 * 3 (max RGB sum)
} else {
// Original luminance-based approach
amplitude = (0.299 * r + 0.587 * g + 0.114 * b) / 255
}
if (invert) amplitude = 1 - amplitude
spectrum.push(amplitude)
}
return spectrum
}
/**
* Alternative linear frequency mapping inspired by alexadam's approach
*/
export function mapFrequencyLinear(row: number, totalRows: number, minFreq: number, maxFreq: number): number {
// Direct linear mapping from top to bottom (high freq at top)
const normalizedRow = row / (totalRows - 1)
return maxFreq - normalizedRow * (maxFreq - minFreq)
}
/**
* Improved normalization strategy - find global maximum first
*/
export function normalizeAudioGlobal(audio: Float32Array, targetLevel: number = 0.8): Float32Array {
// Find global maximum
let maxAmplitude = 0
for (let i = 0; i < audio.length; i++) {
const absValue = Math.abs(audio[i])
if (absValue > maxAmplitude) {
maxAmplitude = absValue
}
}
// Apply normalization
const normalized = new Float32Array(audio.length)
if (maxAmplitude > 0) {
const normalizeGain = targetLevel / maxAmplitude
for (let i = 0; i < audio.length; i++) {
normalized[i] = audio[i] * normalizeGain
}
} else {
normalized.set(audio)
}
return normalized
}

View File

@@ -1,5 +1,12 @@
// Core synthesis
export { ImageToAudioSynthesizer, synthesizeFromImage } from './core/synthesizer'
export {
ImageToAudioSynthesizer,
synthesizeFromImage,
createDirectParams,
createCustomParams,
synthesizeDirect,
synthesizeCustom
} from './core/synthesizer'
export type { SynthesisParams, SpectralPeak, SynthesisResult, WindowType } from './core/types'
// Utilities
@@ -9,14 +16,23 @@ export {
detectSpectralPeaks,
perceptualAmplitudeWeighting,
shouldInvertImage,
analyzeImageBrightness,
forceInvertSpectrum,
extractSpectrum,
generateWindow,
applyWindow
applyWindow,
applySoftThreshold,
mapFrequency,
mapFrequencyLinear,
normalizeAudioGlobal,
generateSpectralDensity
} from './core/utils'
// Audio export
export {
createWAVBuffer,
downloadWAV,
playAudio
} from './audio/export'
playAudio,
createAudioPlayer
} from './audio/export'
export type { AudioPlayer } from './audio/export'