import * as fs from 'fs'; import * as path from 'path'; import type { ParsedOpcode, CsoundReference } from './types'; interface FrontMatter { id?: string; category?: string; } export class CsoundManualParser { private opcodes: Map = new Map(); parseFrontMatter(content: string): FrontMatter { const frontMatterMatch = content.match(/^/); if (!frontMatterMatch) return {}; const frontMatter: FrontMatter = {}; const lines = frontMatterMatch[1].split('\n'); for (const line of lines) { const [key, ...valueParts] = line.split(':'); if (key && valueParts.length > 0) { const trimmedKey = key.trim(); const value = valueParts.join(':').trim(); if (trimmedKey === 'id') frontMatter.id = value; if (trimmedKey === 'category') frontMatter.category = value; } } return frontMatter; } extractFirstParagraph(content: string): string { const withoutFrontMatter = content.replace(/^\n/, ''); const withoutTitle = withoutFrontMatter.replace(/^#\s+.*\n/, ''); const lines = withoutTitle.split('\n'); const paragraphLines: string[] = []; for (const line of lines) { const trimmed = line.trim(); if (trimmed === '') { if (paragraphLines.length > 0) break; continue; } if (trimmed.startsWith('#')) break; paragraphLines.push(trimmed); } return paragraphLines.join(' ').trim(); } extractSection(content: string, sectionTitle: string): string { const regex = new RegExp(`##\\s+${sectionTitle}\\s*\\n([\\s\\S]*?)(?=\\n##(?!#)|$)`, 'i'); const match = content.match(regex); return match ? match[1].trim() : ''; } extractSyntax(content: string): { modern?: string; classic?: string } { const syntaxSection = this.extractSection(content, 'Syntax'); if (!syntaxSection) return {}; const modernMatch = syntaxSection.match(/===\s*"Modern"[\s\S]*?```[\s\S]*?\n([\s\S]*?)```/); const classicMatch = syntaxSection.match(/===\s*"Classic"[\s\S]*?```[\s\S]*?\n([\s\S]*?)```/); return { modern: modernMatch ? modernMatch[1].trim() : undefined, classic: classicMatch ? classicMatch[1].trim() : undefined, }; } extractParameters(content: string, subsectionTitle: string): { name: string; description: string }[] { const syntaxSection = this.extractSection(content, 'Syntax'); const subsectionRegex = new RegExp(`###\\s+${subsectionTitle}\\s*\\n([\\s\\S]*?)(?=\\n###|\\n##|$)`, 'i'); const match = syntaxSection.match(subsectionRegex); if (!match) return []; const subsectionContent = match[1]; const params: { name: string; description: string }[] = []; const paramMatches = subsectionContent.matchAll(/_([a-zA-Z0-9_,\s/]+)_\s*[-–—]+\s*([^\n]+(?:\n(?!_)[^\n]+)*)/g); for (const paramMatch of paramMatches) { const names = paramMatch[1] .split(/[,/]/) .map(n => n.trim().replace(/^_+|_+$/g, '')) .filter(n => n.length > 0); const description = paramMatch[2].trim().replace(/\s+/g, ' '); for (const name of names) { params.push({ name, description }); } } return params; } extractExample(content: string): string | undefined { const exampleSection = this.extractSection(content, 'Examples'); if (!exampleSection) return undefined; const codeMatch = exampleSection.match(/```[\s\S]*?\n([\s\S]*?)```/); return codeMatch ? codeMatch[1].trim() : undefined; } extractSeeAlso(content: string): string[] { const seeAlsoSection = this.extractSection(content, 'See also'); if (!seeAlsoSection) return []; const links = seeAlsoSection.matchAll(/\[([^\]]+)\]/g); return Array.from(links, match => match[1]); } parseMarkdownFile(filePath: string, content: string): ParsedOpcode | null { try { const frontMatter = this.parseFrontMatter(content); if (!frontMatter.id || !frontMatter.category) { console.warn(`Skipping ${filePath}: missing id or category`); return null; } const description = this.extractFirstParagraph(content); const syntax = this.extractSyntax(content); const initParams = this.extractParameters(content, 'Initialization'); const perfParams = this.extractParameters(content, 'Performance'); const example = this.extractExample(content); const seeAlso = this.extractSeeAlso(content); return { id: frontMatter.id, category: frontMatter.category, title: frontMatter.id, description, syntaxModern: syntax.modern, syntaxClassic: syntax.classic, initParams, perfParams, example, seeAlso, rawContent: content, }; } catch (error) { console.error(`Error parsing ${filePath}:`, error); return null; } } parseDirectory(dirPath: string): void { if (!fs.existsSync(dirPath)) { throw new Error(`Directory not found: ${dirPath}`); } const files = fs.readdirSync(dirPath); let parsed = 0; let skipped = 0; for (const file of files) { if (!file.endsWith('.md')) continue; const filePath = path.join(dirPath, file); const content = fs.readFileSync(filePath, 'utf-8'); const opcode = this.parseMarkdownFile(file, content); if (opcode) { this.opcodes.set(opcode.id, opcode); parsed++; } else { skipped++; } } console.log(`Parsed ${parsed} opcodes, skipped ${skipped}`); } convertToReference(opcode: ParsedOpcode): CsoundReference { const syntax = opcode.syntaxModern || opcode.syntaxClassic; const parameters = [ ...opcode.initParams.map(p => ({ ...p, type: 'initialization' as const })), ...opcode.perfParams.map(p => ({ ...p, type: 'performance' as const })), ]; const rates = this.extractRates(syntax || ''); return { name: opcode.id, type: 'opcode', category: opcode.category, description: opcode.description, syntax, example: opcode.example, rates, parameters: parameters.length > 0 ? parameters : undefined, seeAlso: opcode.seeAlso.length > 0 ? opcode.seeAlso : undefined, }; } private extractRates(syntax: string): string[] { const rates: Set = new Set(); if (/\bares\b|\basin\b|\basig\b/.test(syntax)) rates.add('a-rate'); if (/\bkres\b|\bkamp\b|\bkcps\b|\bkin\b/.test(syntax)) rates.add('k-rate'); if (/\bires\b|\bifn\b|\biphs\b/.test(syntax)) rates.add('i-rate'); return Array.from(rates); } getOpcodes(): ParsedOpcode[] { return Array.from(this.opcodes.values()); } getOpcodesByCategory(): Map { const categories = new Map(); for (const opcode of this.opcodes.values()) { const category = opcode.category; if (!categories.has(category)) { categories.set(category, []); } categories.get(category)!.push(opcode); } return categories; } getReferences(): CsoundReference[] { return this.getOpcodes().map(op => this.convertToReference(op)); } getReferencesByCategory(): Map { const categories = new Map(); for (const opcode of this.opcodes.values()) { const category = opcode.category; if (!categories.has(category)) { categories.set(category, []); } categories.get(category)!.push(this.convertToReference(opcode)); } return categories; } }