Files
oldboy/scripts/csound-parser/parser.ts
2025-10-15 15:05:23 +02:00

246 lines
7.4 KiB
TypeScript

import * as fs from 'fs';
import * as path from 'path';
import type { ParsedOpcode, CsoundReference } from './types';
interface FrontMatter {
id?: string;
category?: string;
}
export class CsoundManualParser {
private opcodes: Map<string, ParsedOpcode> = new Map();
parseFrontMatter(content: string): FrontMatter {
const frontMatterMatch = content.match(/^<!--\s*\n([\s\S]*?)\n-->/);
if (!frontMatterMatch) return {};
const frontMatter: FrontMatter = {};
const lines = frontMatterMatch[1].split('\n');
for (const line of lines) {
const [key, ...valueParts] = line.split(':');
if (key && valueParts.length > 0) {
const trimmedKey = key.trim();
const value = valueParts.join(':').trim();
if (trimmedKey === 'id') frontMatter.id = value;
if (trimmedKey === 'category') frontMatter.category = value;
}
}
return frontMatter;
}
extractFirstParagraph(content: string): string {
const withoutFrontMatter = content.replace(/^<!--[\s\S]*?-->\n/, '');
const withoutTitle = withoutFrontMatter.replace(/^#\s+.*\n/, '');
const lines = withoutTitle.split('\n');
const paragraphLines: string[] = [];
for (const line of lines) {
const trimmed = line.trim();
if (trimmed === '') {
if (paragraphLines.length > 0) break;
continue;
}
if (trimmed.startsWith('#')) break;
paragraphLines.push(trimmed);
}
return paragraphLines.join(' ').trim();
}
extractSection(content: string, sectionTitle: string): string {
const regex = new RegExp(`##\\s+${sectionTitle}\\s*\\n([\\s\\S]*?)(?=\\n##(?!#)|$)`, 'i');
const match = content.match(regex);
return match ? match[1].trim() : '';
}
extractSyntax(content: string): { modern?: string; classic?: string } {
const syntaxSection = this.extractSection(content, 'Syntax');
if (!syntaxSection) return {};
const modernMatch = syntaxSection.match(/===\s*"Modern"[\s\S]*?```[\s\S]*?\n([\s\S]*?)```/);
const classicMatch = syntaxSection.match(/===\s*"Classic"[\s\S]*?```[\s\S]*?\n([\s\S]*?)```/);
return {
modern: modernMatch ? modernMatch[1].trim() : undefined,
classic: classicMatch ? classicMatch[1].trim() : undefined,
};
}
extractParameters(content: string, subsectionTitle: string): { name: string; description: string }[] {
const syntaxSection = this.extractSection(content, 'Syntax');
const subsectionRegex = new RegExp(`###\\s+${subsectionTitle}\\s*\\n([\\s\\S]*?)(?=\\n###|\\n##|$)`, 'i');
const match = syntaxSection.match(subsectionRegex);
if (!match) return [];
const subsectionContent = match[1];
const params: { name: string; description: string }[] = [];
const paramMatches = subsectionContent.matchAll(/_([a-zA-Z0-9_,\s/]+)_\s*[-–—]+\s*([^\n]+(?:\n(?!_)[^\n]+)*)/g);
for (const paramMatch of paramMatches) {
const names = paramMatch[1]
.split(/[,/]/)
.map(n => n.trim().replace(/^_+|_+$/g, ''))
.filter(n => n.length > 0);
const description = paramMatch[2].trim().replace(/\s+/g, ' ');
for (const name of names) {
params.push({ name, description });
}
}
return params;
}
extractExample(content: string): string | undefined {
const exampleSection = this.extractSection(content, 'Examples');
if (!exampleSection) return undefined;
const codeMatch = exampleSection.match(/```[\s\S]*?\n([\s\S]*?)```/);
return codeMatch ? codeMatch[1].trim() : undefined;
}
extractSeeAlso(content: string): string[] {
const seeAlsoSection = this.extractSection(content, 'See also');
if (!seeAlsoSection) return [];
const links = seeAlsoSection.matchAll(/\[([^\]]+)\]/g);
return Array.from(links, match => match[1]);
}
parseMarkdownFile(filePath: string, content: string): ParsedOpcode | null {
try {
const frontMatter = this.parseFrontMatter(content);
if (!frontMatter.id || !frontMatter.category) {
console.warn(`Skipping ${filePath}: missing id or category`);
return null;
}
const description = this.extractFirstParagraph(content);
const syntax = this.extractSyntax(content);
const initParams = this.extractParameters(content, 'Initialization');
const perfParams = this.extractParameters(content, 'Performance');
const example = this.extractExample(content);
const seeAlso = this.extractSeeAlso(content);
return {
id: frontMatter.id,
category: frontMatter.category,
title: frontMatter.id,
description,
syntaxModern: syntax.modern,
syntaxClassic: syntax.classic,
initParams,
perfParams,
example,
seeAlso,
rawContent: content,
};
} catch (error) {
console.error(`Error parsing ${filePath}:`, error);
return null;
}
}
parseDirectory(dirPath: string): void {
if (!fs.existsSync(dirPath)) {
throw new Error(`Directory not found: ${dirPath}`);
}
const files = fs.readdirSync(dirPath);
let parsed = 0;
let skipped = 0;
for (const file of files) {
if (!file.endsWith('.md')) continue;
const filePath = path.join(dirPath, file);
const content = fs.readFileSync(filePath, 'utf-8');
const opcode = this.parseMarkdownFile(file, content);
if (opcode) {
this.opcodes.set(opcode.id, opcode);
parsed++;
} else {
skipped++;
}
}
console.log(`Parsed ${parsed} opcodes, skipped ${skipped}`);
}
convertToReference(opcode: ParsedOpcode): CsoundReference {
const syntax = opcode.syntaxModern || opcode.syntaxClassic;
const parameters = [
...opcode.initParams.map(p => ({ ...p, type: 'initialization' as const })),
...opcode.perfParams.map(p => ({ ...p, type: 'performance' as const })),
];
const rates = this.extractRates(syntax || '');
return {
name: opcode.id,
type: 'opcode',
category: opcode.category,
description: opcode.description,
syntax,
example: opcode.example,
rates,
parameters: parameters.length > 0 ? parameters : undefined,
seeAlso: opcode.seeAlso.length > 0 ? opcode.seeAlso : undefined,
};
}
private extractRates(syntax: string): string[] {
const rates: Set<string> = new Set();
if (/\bares\b|\basin\b|\basig\b/.test(syntax)) rates.add('a-rate');
if (/\bkres\b|\bkamp\b|\bkcps\b|\bkin\b/.test(syntax)) rates.add('k-rate');
if (/\bires\b|\bifn\b|\biphs\b/.test(syntax)) rates.add('i-rate');
return Array.from(rates);
}
getOpcodes(): ParsedOpcode[] {
return Array.from(this.opcodes.values());
}
getOpcodesByCategory(): Map<string, ParsedOpcode[]> {
const categories = new Map<string, ParsedOpcode[]>();
for (const opcode of this.opcodes.values()) {
const category = opcode.category;
if (!categories.has(category)) {
categories.set(category, []);
}
categories.get(category)!.push(opcode);
}
return categories;
}
getReferences(): CsoundReference[] {
return this.getOpcodes().map(op => this.convertToReference(op));
}
getReferencesByCategory(): Map<string, CsoundReference[]> {
const categories = new Map<string, CsoundReference[]>();
for (const opcode of this.opcodes.values()) {
const category = opcode.category;
if (!categories.has(category)) {
categories.set(category, []);
}
categories.get(category)!.push(this.convertToReference(opcode));
}
return categories;
}
}