Init

2025-09-27 12:00:17 +02:00
commit b564e41820
60 changed files with 7925 additions and 0 deletions
--- a/src/spectral-synthesis/README.md
+++ b/src/spectral-synthesis/README.md
@@ -0,0 +1,102 @@
+# Spectral Synthesis Library
+
+A library for synthesizing audio from images using additive synthesis and perceptual frequency mapping.
+
+## Features
+
+- **Image-to-Audio Synthesis**: Convert any image to audio by treating it as a spectrogram
+- **Perceptual Accuracy**: Uses Mel-scale frequency mapping for better perceptual results
+- **Spectral Peak Detection**: Only synthesizes significant frequency components
+- **Temporal Smoothing**: Maintains coherent trajectories between time frames
+- **Auto-Detection**: Automatically handles different image types (spectrograms vs diagrams)
+
+## Quick Start
+
+```typescript
+import { synthesizeFromImage, downloadWAV } from './spectral-synthesis'
+
+// Simple usage
+const audioData = synthesizeFromImage(imageData)
+
+// With custom parameters
+const audioData = synthesizeFromImage(imageData, {
+  duration: 10,
+  minFreq: 100,
+  maxFreq: 10000,
+  maxPartials: 200
+})
+
+// Export as WAV
+downloadWAV(audioData, 44100, 'my-audio.wav')
+```
+
+## API Reference
+
+### Main Functions
+
+#### `synthesizeFromImage(imageData, params?)`
+- **imageData**: `ImageData` - Canvas image data
+- **params**: `Partial<SynthesisParams>` - Optional parameters
+- **Returns**: `Float32Array` - Audio samples
+
+### Types
+
+#### `SynthesisParams`
+```typescript
+interface SynthesisParams {
+  duration: number           // Audio duration in seconds
+  minFreq: number           // Minimum frequency in Hz
+  maxFreq: number           // Maximum frequency in Hz
+  sampleRate: number        // Sample rate in Hz
+  frequencyResolution: number // Frequency bin downsampling
+  timeResolution: number    // Time slice downsampling
+  amplitudeThreshold: number // Minimum amplitude threshold
+  maxPartials: number       // Maximum simultaneous partials
+}
+```
+
+## Project Structure
+
+```
+spectral-synthesis/
+├── core/
+│   ├── types.ts       # Type definitions
+│   ├── utils.ts       # Helper functions
+│   └── synthesizer.ts # Main synthesis logic
+├── audio/
+│   └── export.ts      # Audio export utilities
+└── index.ts           # Main exports
+```
+
+## Algorithm
+
+1. **Image Analysis**: Auto-detect if colors should be inverted
+2. **Frequency Mapping**: Convert image rows to Mel-scale frequencies
+3. **Peak Detection**: Find significant spectral components
+4. **Temporal Smoothing**: Apply continuity between time frames
+5. **Perceptual Weighting**: Apply psychoacoustic amplitude scaling
+6. **Additive Synthesis**: Generate and sum sine waves
+
+## Usage Examples
+
+### Basic Synthesis
+```typescript
+const canvas = document.createElement('canvas')
+const ctx = canvas.getContext('2d')
+// ... load image to canvas
+const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height)
+const audio = synthesizeFromImage(imageData)
+```
+
+### Advanced Usage
+```typescript
+import { ImageToAudioSynthesizer } from './spectral-synthesis'
+
+const synthesizer = new ImageToAudioSynthesizer({
+  duration: 5,
+  maxPartials: 150
+})
+
+const result = synthesizer.synthesize(imageData)
+console.log(`Generated ${result.duration}s of audio`)
+```
--- a/src/spectral-synthesis/audio/export.ts
+++ b/src/spectral-synthesis/audio/export.ts
@@ -0,0 +1,78 @@
+/**
+ * Create WAV buffer from audio data
+ */
+export function createWAVBuffer(audioData: Float32Array, sampleRate: number): ArrayBuffer {
+  const length = audioData.length
+  const buffer = new ArrayBuffer(44 + length * 2)
+  const view = new DataView(buffer)
+
+  // WAV header
+  writeString(view, 0, 'RIFF')
+  view.setUint32(4, 36 + length * 2, true) // file length - 8
+  writeString(view, 8, 'WAVE')
+  writeString(view, 12, 'fmt ')
+  view.setUint32(16, 16, true) // format chunk length
+  view.setUint16(20, 1, true) // PCM format
+  view.setUint16(22, 1, true) // mono
+  view.setUint32(24, sampleRate, true)
+  view.setUint32(28, sampleRate * 2, true) // byte rate
+  view.setUint16(32, 2, true) // block align
+  view.setUint16(34, 16, true) // bits per sample
+  writeString(view, 36, 'data')
+  view.setUint32(40, length * 2, true) // data chunk length
+
+  // Convert float samples to 16-bit PCM
+  let offset = 44
+  for (let i = 0; i < length; i++) {
+    const sample = Math.max(-1, Math.min(1, audioData[i]))
+    view.setInt16(offset, sample * 0x7FFF, true)
+    offset += 2
+  }
+
+  return buffer
+}
+
+function writeString(view: DataView, offset: number, string: string) {
+  for (let i = 0; i < string.length; i++) {
+    view.setUint8(offset + i, string.charCodeAt(i))
+  }
+}
+
+/**
+ * Download audio as WAV file
+ */
+export function downloadWAV(audioData: Float32Array, sampleRate: number, filename: string) {
+  const buffer = createWAVBuffer(audioData, sampleRate)
+  const blob = new Blob([buffer], { type: 'audio/wav' })
+  const url = URL.createObjectURL(blob)
+
+  const a = document.createElement('a')
+  a.href = url
+  a.download = filename
+  a.click()
+
+  URL.revokeObjectURL(url)
+}
+
+/**
+ * Play audio in browser
+ */
+export async function playAudio(audioData: Float32Array, sampleRate: number): Promise<void> {
+  const audioContext = new (window.AudioContext || (window as any).webkitAudioContext)()
+
+  if (audioContext.sampleRate !== sampleRate) {
+    console.warn(`Audio context sample rate (${audioContext.sampleRate}) differs from data sample rate (${sampleRate})`)
+  }
+
+  const buffer = audioContext.createBuffer(1, audioData.length, sampleRate)
+  buffer.copyToChannel(audioData, 0)
+
+  const source = audioContext.createBufferSource()
+  source.buffer = buffer
+  source.connect(audioContext.destination)
+  source.start()
+
+  return new Promise(resolve => {
+    source.onended = () => resolve()
+  })
+}
--- a/src/spectral-synthesis/core/synthesizer.ts
+++ b/src/spectral-synthesis/core/synthesizer.ts
@@ -0,0 +1,175 @@
+import type { SynthesisParams, SpectralPeak, SynthesisResult } from './types'
+import {
+  hzToMel,
+  melToHz,
+  detectSpectralPeaks,
+  perceptualAmplitudeWeighting,
+  shouldInvertImage,
+  extractSpectrum,
+  applyWindow
+} from './utils'
+
+export class ImageToAudioSynthesizer {
+  private params: SynthesisParams
+
+  constructor(params: Partial<SynthesisParams> = {}) {
+    this.params = {
+      duration: 5,
+      minFreq: 20,
+      maxFreq: 20000,
+      sampleRate: 44100,
+      frequencyResolution: 1,
+      timeResolution: 1,
+      amplitudeThreshold: 0.01,
+      maxPartials: 100,
+      windowType: 'hann',
+      ...params
+    }
+  }
+
+  /**
+   * Synthesize audio from image data
+   */
+  synthesize(imageData: ImageData): SynthesisResult {
+    const { width, height, data } = imageData
+    const {
+      duration,
+      minFreq,
+      maxFreq,
+      sampleRate,
+      frequencyResolution,
+      timeResolution,
+      amplitudeThreshold,
+      maxPartials,
+      windowType
+    } = this.params
+
+    // Detect image type
+    const invert = shouldInvertImage(imageData)
+
+    // Calculate synthesis parameters
+    const totalSamples = Math.floor(duration * sampleRate)
+    const effectiveWidth = Math.floor(width / timeResolution)
+    const effectiveHeight = Math.floor(height / frequencyResolution)
+    const samplesPerColumn = totalSamples / effectiveWidth
+    const audio = new Float32Array(totalSamples)
+
+    // Pre-calculate mel-scale frequency mapping
+    const minMel = hzToMel(minFreq)
+    const maxMel = hzToMel(maxFreq)
+
+    // Storage for temporal smoothing
+    const previousAmplitudes = new Float32Array(effectiveHeight)
+    const smoothingFactor = 0.3
+
+    // Process each time slice
+    for (let col = 0; col < effectiveWidth; col++) {
+      const sourceCol = col * timeResolution
+      const startSample = Math.floor(col * samplesPerColumn)
+      const endSample = Math.floor((col + 1) * samplesPerColumn)
+
+      // Extract spectrum for this time slice
+      const spectrum = extractSpectrum(imageData, sourceCol, effectiveHeight, frequencyResolution, invert)
+
+      // Detect spectral peaks
+      const peaks = detectSpectralPeaks(spectrum, Math.min(amplitudeThreshold, 0.01))
+
+      // Generate partials from peaks
+      const partials: SpectralPeak[] = []
+
+      for (const peakRow of peaks) {
+        // Mel-scale frequency mapping (high freq at top)
+        const melValue = maxMel - (peakRow / (effectiveHeight - 1)) * (maxMel - minMel)
+        const frequency = melToHz(melValue)
+
+        let amplitude = spectrum[peakRow]
+
+        // Apply temporal smoothing
+        if (col > 0) {
+          amplitude = smoothingFactor * previousAmplitudes[peakRow] + (1 - smoothingFactor) * amplitude
+        }
+        previousAmplitudes[peakRow] = amplitude
+
+        // Apply perceptual weighting
+        amplitude = perceptualAmplitudeWeighting(frequency, amplitude)
+
+        // Use zero phase for simplicity
+        const phase = 0
+
+        if (amplitude > Math.min(amplitudeThreshold, 0.005)) {
+          partials.push({ frequency, amplitude, phase })
+        }
+      }
+
+      // Sort by amplitude and limit partials
+      partials.sort((a, b) => b.amplitude - a.amplitude)
+      const limitedPartials = partials.slice(0, maxPartials)
+
+      // Generate audio for this time slice
+      const chunkLength = endSample - startSample
+      const audioChunk = new Float32Array(chunkLength)
+
+      for (const { frequency, amplitude, phase } of limitedPartials) {
+        for (let i = 0; i < chunkLength; i++) {
+          const t = (startSample + i) / sampleRate
+          audioChunk[i] += amplitude * Math.sin(2 * Math.PI * frequency * t + phase)
+        }
+      }
+
+      // Apply windowing to reduce artifacts
+      const windowedChunk = applyWindow(audioChunk, windowType)
+
+      // Add windowed chunk to final audio
+      for (let i = 0; i < chunkLength && startSample + i < totalSamples; i++) {
+        audio[startSample + i] += windowedChunk[i]
+      }
+    }
+
+    // Normalize to prevent clipping
+    let maxAmplitude = 0
+    for (let i = 0; i < audio.length; i++) {
+      const absValue = Math.abs(audio[i])
+      if (absValue > maxAmplitude) {
+        maxAmplitude = absValue
+      }
+    }
+
+    if (maxAmplitude > 1) {
+      for (let i = 0; i < audio.length; i++) {
+        audio[i] /= maxAmplitude
+      }
+    }
+
+    return {
+      audio,
+      sampleRate,
+      duration
+    }
+  }
+
+  /**
+   * Update synthesis parameters
+   */
+  updateParams(newParams: Partial<SynthesisParams>): void {
+    this.params = { ...this.params, ...newParams }
+  }
+
+  /**
+   * Get current parameters
+   */
+  getParams(): SynthesisParams {
+    return { ...this.params }
+  }
+}
+
+/**
+ * Convenience function for quick synthesis
+ */
+export function synthesizeFromImage(
+  imageData: ImageData,
+  params: Partial<SynthesisParams> = {}
+): Float32Array {
+  const synthesizer = new ImageToAudioSynthesizer(params)
+  const result = synthesizer.synthesize(imageData)
+  return result.audio
+}
--- a/src/spectral-synthesis/core/types.ts
+++ b/src/spectral-synthesis/core/types.ts
@@ -0,0 +1,25 @@
+export type WindowType = 'rectangular' | 'hann' | 'hamming' | 'blackman'
+
+export interface SynthesisParams {
+  duration: number
+  minFreq: number
+  maxFreq: number
+  sampleRate: number
+  frequencyResolution: number
+  timeResolution: number
+  amplitudeThreshold: number
+  maxPartials: number
+  windowType: WindowType
+}
+
+export interface SpectralPeak {
+  frequency: number
+  amplitude: number
+  phase: number
+}
+
+export interface SynthesisResult {
+  audio: Float32Array
+  sampleRate: number
+  duration: number
+}
--- a/src/spectral-synthesis/core/utils.ts
+++ b/src/spectral-synthesis/core/utils.ts
@@ -0,0 +1,143 @@
+/**
+ * Convert frequency from Hz to Mel scale
+ */
+export function hzToMel(freq: number): number {
+  return 2595 * Math.log10(1 + freq / 700)
+}
+
+/**
+ * Convert frequency from Mel scale to Hz
+ */
+export function melToHz(mel: number): number {
+  return 700 * (Math.pow(10, mel / 2595) - 1)
+}
+
+/**
+ * Detect spectral peaks in amplitude spectrum
+ */
+export function detectSpectralPeaks(spectrum: number[], threshold: number = 0.01): number[] {
+  const peaks: number[] = []
+
+  // Find significant components above threshold
+  for (let i = 0; i < spectrum.length; i++) {
+    if (spectrum[i] > threshold) {
+      peaks.push(i)
+    }
+  }
+
+  // Fallback: use local maxima with lower threshold if no peaks found
+  if (peaks.length === 0) {
+    for (let i = 1; i < spectrum.length - 1; i++) {
+      if (spectrum[i] > spectrum[i - 1] &&
+          spectrum[i] > spectrum[i + 1] &&
+          spectrum[i] > 0.001) {
+        peaks.push(i)
+      }
+    }
+  }
+
+  return peaks
+}
+
+/**
+ * Apply perceptual amplitude weighting
+ */
+export function perceptualAmplitudeWeighting(freq: number, amplitude: number): number {
+  // Gentle boost around 1kHz for perceptual accuracy
+  const normalizedFreq = Math.log10(freq / 1000)
+  const weight = Math.exp(-normalizedFreq * normalizedFreq * 0.5) * 0.5 + 0.5
+  return amplitude * weight
+}
+
+/**
+ * Auto-detect if image colors should be inverted
+ */
+export function shouldInvertImage(imageData: ImageData): boolean {
+  const { width, height, data } = imageData
+  let totalBrightness = 0
+
+  for (let i = 0; i < data.length; i += 4) {
+    const gray = 0.299 * data[i] + 0.587 * data[i + 1] + 0.114 * data[i + 2]
+    totalBrightness += gray / 255
+  }
+
+  const meanBrightness = totalBrightness / (width * height)
+  return meanBrightness > 0.5 // Bright background = diagram
+}
+
+/**
+ * Generate windowing function
+ */
+export function generateWindow(length: number, windowType: string): Float32Array {
+  const window = new Float32Array(length)
+
+  switch (windowType) {
+    case 'hann':
+      for (let i = 0; i < length; i++) {
+        window[i] = 0.5 * (1 - Math.cos(2 * Math.PI * i / (length - 1)))
+      }
+      break
+
+    case 'hamming':
+      for (let i = 0; i < length; i++) {
+        window[i] = 0.54 - 0.46 * Math.cos(2 * Math.PI * i / (length - 1))
+      }
+      break
+
+    case 'blackman':
+      for (let i = 0; i < length; i++) {
+        const factor = 2 * Math.PI * i / (length - 1)
+        window[i] = 0.42 - 0.5 * Math.cos(factor) + 0.08 * Math.cos(2 * factor)
+      }
+      break
+
+    case 'rectangular':
+    default:
+      window.fill(1.0)
+      break
+  }
+
+  return window
+}
+
+/**
+ * Apply windowing function to audio chunk
+ */
+export function applyWindow(audioChunk: Float32Array, windowType: string): Float32Array {
+  const window = generateWindow(audioChunk.length, windowType)
+  const windowed = new Float32Array(audioChunk.length)
+
+  for (let i = 0; i < audioChunk.length; i++) {
+    windowed[i] = audioChunk[i] * window[i]
+  }
+
+  return windowed
+}
+
+/**
+ * Extract grayscale spectrum from image column
+ */
+export function extractSpectrum(
+  imageData: ImageData,
+  col: number,
+  height: number,
+  frequencyResolution: number,
+  invert: boolean
+): number[] {
+  const { width, data } = imageData
+  const spectrum: number[] = []
+
+  for (let row = 0; row < height; row++) {
+    const sourceRow = row * frequencyResolution
+    const idx = (sourceRow * width + col) * 4
+    const r = data[idx]
+    const g = data[idx + 1]
+    const b = data[idx + 2]
+
+    let amplitude = (0.299 * r + 0.587 * g + 0.114 * b) / 255
+    if (invert) amplitude = 1 - amplitude
+    spectrum.push(amplitude)
+  }
+
+  return spectrum
+}
--- a/src/spectral-synthesis/index.ts
+++ b/src/spectral-synthesis/index.ts
@@ -0,0 +1,22 @@
+// Core synthesis
+export { ImageToAudioSynthesizer, synthesizeFromImage } from './core/synthesizer'
+export type { SynthesisParams, SpectralPeak, SynthesisResult, WindowType } from './core/types'
+
+// Utilities
+export {
+  hzToMel,
+  melToHz,
+  detectSpectralPeaks,
+  perceptualAmplitudeWeighting,
+  shouldInvertImage,
+  extractSpectrum,
+  generateWindow,
+  applyWindow
+} from './core/utils'
+
+// Audio export
+export {
+  createWAVBuffer,
+  downloadWAV,
+  playAudio
+} from './audio/export'