From 6f6cffbc67aa9dc057f4c3524a8447015c33ded7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=89=8B=E7=93=9C=E4=B8=80=E5=8D=81=E9=9B=AA?= Date: Fri, 4 Apr 2025 13:28:28 +0800 Subject: [PATCH] feat: codec wav --- src/common/audio-enhance/codec/wav.ts | 145 ++++++++++++++++++++++++++ src/common/audio_pure.ts | 61 ++++------- 2 files changed, 166 insertions(+), 40 deletions(-) create mode 100644 src/common/audio-enhance/codec/wav.ts diff --git a/src/common/audio-enhance/codec/wav.ts b/src/common/audio-enhance/codec/wav.ts new file mode 100644 index 00000000..dde6adce --- /dev/null +++ b/src/common/audio-enhance/codec/wav.ts @@ -0,0 +1,145 @@ +// WAV 文件头结构 +interface WavHeader { + riffChunkId: string; // "RIFF" + riffChunkSize: number; // 文件大小 - 8 + riffFormat: string; // "WAVE" + fmtChunkId: string; // "fmt " + fmtChunkSize: number; // 16 + audioFormat: number; // 1 = PCM + numChannels: number; // 声道数 + sampleRate: number; // 采样率 + byteRate: number; // 字节率 (SampleRate * NumChannels * BitsPerSample / 8) + blockAlign: number; // 块对齐 (NumChannels * BitsPerSample / 8) + bitsPerSample: number; // 采样位数 + dataChunkId: string; // "data" + dataChunkSize: number; // 音频数据大小 +} + +export class WavEncoder { + private header: WavHeader; + private data: Buffer; + private dataOffset: number; + public bitsPerSample: number; + + constructor(sampleRate: number, numChannels: number, bitsPerSample: number) { + if (![8, 16, 24, 32].includes(bitsPerSample)) { + throw new Error("Unsupported bitsPerSample value. Must be 8, 16, 24, or 32."); + } + + this.bitsPerSample = bitsPerSample; + this.header = { + riffChunkId: "RIFF", + riffChunkSize: 0, // 待计算 + riffFormat: "WAVE", + fmtChunkId: "fmt ", + fmtChunkSize: 16, + audioFormat: 1, // PCM + numChannels: numChannels, + sampleRate: sampleRate, + byteRate: sampleRate * numChannels * bitsPerSample / 8, + blockAlign: numChannels * bitsPerSample / 8, + bitsPerSample: bitsPerSample, + dataChunkId: "data", + dataChunkSize: 0 // 待计算 + }; + this.data = Buffer.alloc(0); + this.dataOffset = 0; + } + + public write(buffer: Buffer): void { + this.data = Buffer.concat([this.data, buffer]); + this.dataOffset += buffer.length; + } + + public encode(): Buffer { + this.header.dataChunkSize = this.dataOffset; + this.header.riffChunkSize = 36 + this.dataOffset; + + const headerBuffer = Buffer.alloc(44); + + headerBuffer.write(this.header.riffChunkId, 0, 4, 'ascii'); + headerBuffer.writeUInt32LE(this.header.riffChunkSize, 4); + headerBuffer.write(this.header.riffFormat, 8, 4, 'ascii'); + headerBuffer.write(this.header.fmtChunkId, 12, 4, 'ascii'); + headerBuffer.writeUInt32LE(this.header.fmtChunkSize, 16); + headerBuffer.writeUInt16LE(this.header.audioFormat, 20); + headerBuffer.writeUInt16LE(this.header.numChannels, 22); + headerBuffer.writeUInt32LE(this.header.sampleRate, 24); + headerBuffer.writeUInt32LE(this.header.byteRate, 28); + headerBuffer.writeUInt16LE(this.header.blockAlign, 32); + headerBuffer.writeUInt16LE(this.header.bitsPerSample, 34); + headerBuffer.write(this.header.dataChunkId, 36, 4, 'ascii'); + headerBuffer.writeUInt32LE(this.header.dataChunkSize, 40); + + return Buffer.concat([headerBuffer, this.data]); + } +} + +export class WavDecoder { + private header: WavHeader; + private data: Buffer; + private dataOffset: number; + public bitsPerSample: number; + + constructor(private buffer: Buffer) { + this.header = { + riffChunkId: "", + riffChunkSize: 0, + riffFormat: "", + fmtChunkId: "", + fmtChunkSize: 0, + audioFormat: 0, + numChannels: 0, + sampleRate: 0, + byteRate: 0, + blockAlign: 0, + bitsPerSample: 0, + dataChunkId: "", + dataChunkSize: 0 + }; + this.data = Buffer.alloc(0); + this.dataOffset = 0; + this.decodeHeader(); + this.decodeData(); + this.bitsPerSample = this.header.bitsPerSample; + } + + private decodeHeader(): void { + this.header.riffChunkId = this.buffer.toString('ascii', 0, 4); + this.header.riffChunkSize = this.buffer.readUInt32LE(4); + this.header.riffFormat = this.buffer.toString('ascii', 8, 4); + this.header.fmtChunkId = this.buffer.toString('ascii', 12, 4); + this.header.fmtChunkSize = this.buffer.readUInt32LE(16); + this.header.audioFormat = this.buffer.readUInt16LE(20); + this.header.numChannels = this.buffer.readUInt16LE(22); + this.header.sampleRate = this.buffer.readUInt32LE(24); + this.header.byteRate = this.buffer.readUInt32LE(28); + this.header.blockAlign = this.buffer.readUInt16LE(32); + this.header.bitsPerSample = this.buffer.readUInt16LE(34); + this.header.dataChunkId = this.buffer.toString('ascii', 36, 4); + this.header.dataChunkSize = this.buffer.readUInt32LE(40); + + this.dataOffset = 44; + + // 可以在此处添加对 header 值的校验 + if (this.header.riffChunkId !== "RIFF" || this.header.riffFormat !== "WAVE") { + throw new Error("Invalid WAV file format."); + } + + if (![8, 16, 24, 32].includes(this.header.bitsPerSample)) { + throw new Error(`Unsupported bitsPerSample: ${this.header.bitsPerSample}`); + } + } + + private decodeData(): void { + this.data = this.buffer.slice(this.dataOffset, this.dataOffset + this.header.dataChunkSize); + } + + public getHeader(): WavHeader { + return this.header; + } + + public getData(): Buffer { + return this.data; + } +} \ No newline at end of file diff --git a/src/common/audio_pure.ts b/src/common/audio_pure.ts index 625910a1..39ef7721 100644 --- a/src/common/audio_pure.ts +++ b/src/common/audio_pure.ts @@ -14,7 +14,7 @@ import { readFile, writeFile } from 'fs/promises'; import path from 'path'; import audioDecode from 'audio-decode'; // 解码 WAV MP3 OGG FLAC import { Mp3Encoder } from '@breezystack/lamejs'; // 编码 MP3 -import * as wav from 'node-wav'; +import { WavEncoder, WavDecoder } from './audio-enhance/codec/wav'; // 导入 WavEncoder 和 WavDecoder // import { Encoder as FlacEncoder } from 'libflacjs/lib/encoder'; // 编码 FLAC // import * as Flac from 'libflacjs'; // 编码 FLAC @@ -263,6 +263,7 @@ class AudioProcessor { let processedSamples = pcmData.samples; // 如果需要重采样 + console.log(`重采样: ${pcmData.sampleRate}Hz → ${targetSampleRate}Hz`); if (pcmData.sampleRate !== targetSampleRate) { processedSamples = this.resample( processedSamples, @@ -288,7 +289,6 @@ class AudioProcessor { metadata: options?.preserveMetadata ? pcmData.metadata : undefined }; } - /** * 从Buffer中提取音频元数据 */ @@ -357,9 +357,7 @@ class GenericDecoder { const audioData = await audioDecode(buffer); return { - samples: audioData.getChannelData(0).length === audioData.length - ? audioData.getChannelData(0) - : this.interleaveSamples(audioData), + samples: this.interleaveSamples(audioData), sampleRate: audioData.sampleRate, channels: audioData.numberOfChannels, metadata: AudioProcessor.extractMetadata({}) @@ -477,31 +475,27 @@ class WAVCodec extends BaseCodec { override async decode(buffer: Buffer, options?: ConvertOptions): Promise { try { - // 使用node-wav解析WAV文件 - const decoded = wav.decode(buffer); + const decoder = new WavDecoder(buffer); + const header = decoder.getHeader(); + const data = decoder.getData(); - // node-wav 返回的格式: { sampleRate, channelData } - // channelData是一个包含每个声道Float32Array数据的数组 + const sampleRate = header.sampleRate; + const channels = header.numChannels; + const bitsPerSample = header.bitsPerSample; - // 获取基本参数 - const sampleRate = decoded.sampleRate; - const channels = decoded.channelData.length; - - // 将多声道数据合并为单个交织的Float32Array - const samples = new Float32Array(decoded.channelData[0]!.length * channels); - - for (let c = 0; c < channels; c++) { - const channelData = decoded.channelData[c]!; - for (let i = 0; i < channelData.length; i++) { - samples[i * channels + c] = channelData[i]!; - } + // 将Buffer转换为Float32Array + let samples: Float32Array; + if (bitsPerSample === 8 || bitsPerSample === 16 || bitsPerSample === 32) { + samples = AudioProcessor.pcmToFloat(data, bitsPerSample); + } else { + throw new AudioError(`不支持的WAV位深: ${bitsPerSample}`, 'decode', 'wav'); } return { samples, sampleRate, channels, - metadata: undefined // node-wav不提取元数据 + metadata: undefined }; } catch (error: any) { // WAV解析失败,尝试使用通用解码器 @@ -514,26 +508,13 @@ class WAVCodec extends BaseCodec { const processed = AudioProcessor.processPCM(pcmData, options); const bitDepth = options?.bitDepth ?? 16; - // 将交织的PCM数据拆分为各声道数据 - const channelData = []; - const samplesPerChannel = processed.samples.length / processed.channels; + const encoder = new WavEncoder(processed.sampleRate, processed.channels, bitDepth); - for (let c = 0; c < processed.channels; c++) { - const channelSamples = new Float32Array(samplesPerChannel); - for (let i = 0; i < samplesPerChannel; i++) { - channelSamples[i] = processed.samples[i * processed.channels + c]!; - } - channelData.push(channelSamples); - } + // 将Float32Array转换为指定位深度的Buffer + const pcmBuffer = AudioProcessor.floatToPCM(processed.samples, bitDepth); + encoder.write(pcmBuffer); - // 使用node-wav编码 - const wavBuffer = wav.encode(channelData, { - sampleRate: processed.sampleRate, - float: false, // 使用整数PCM - bitDepth: bitDepth as 8 | 16 | 32 - }); - - return Buffer.from(wavBuffer); + return encoder.encode(); } catch (error: any) { throw new AudioError(`WAV编码错误: ${error.message}`, 'encode', 'wav', error); }