diff --git a/src/common/utils/audio.ts b/src/common/utils/audio.ts index ea31221..7328a30 100644 --- a/src/common/utils/audio.ts +++ b/src/common/utils/audio.ts @@ -1,124 +1,93 @@ -import fs from 'fs' -import fsPromise from 'fs/promises' -import { decode, encode, getDuration, getWavFileInfo, isWav, isSilk } from 'silk-wasm' -import { log } from './log' import path from 'node:path' +import ffmpeg from 'fluent-ffmpeg' +import fsPromise from 'node:fs/promises' +import { decode, encode, getDuration, getWavFileInfo, isWav, isSilk, EncodeResult } from 'silk-wasm' +import { log } from './log' import { TEMP_DIR } from './index' import { getConfigUtil } from '../config' -import { spawn } from 'node:child_process' import { randomUUID } from 'node:crypto' +import { Readable } from 'node:stream' + +interface FFmpegOptions { + input?: string[] + output?: string[] +} + +type Input = string | Readable + +function convert(input: Input, options: FFmpegOptions): Promise +function convert(input: Input, options: FFmpegOptions, outputPath: string): Promise +function convert(input: Input, options: FFmpegOptions, outputPath?: string): Promise | Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = [] + let command = ffmpeg(input) + .on('error', err => { + log(`FFmpeg处理转换出错: `, err.message) + reject(err) + }) + .on('end', () => { + if (!outputPath) { + resolve(Buffer.concat(chunks)) + } else { + resolve(outputPath) + } + }) + if (options.input) { + command = command.inputOptions(options.input) + } + if (options.output) { + command = command.outputOptions(options.output) + } + const ffmpegPath = getConfigUtil().getConfig().ffmpeg + if (ffmpegPath) { + command = command.setFfmpegPath(ffmpegPath) + } + if (!outputPath) { + const stream = command.pipe() + stream.on('data', chunk => { + chunks.push(chunk) + }) + } else { + command.save(outputPath) + } + }) +} export async function encodeSilk(filePath: string) { - function getFileHeader(filePath: string) { - // 定义要读取的字节数 - const bytesToRead = 7 - try { - const buffer = fs.readFileSync(filePath, { - encoding: null, - flag: 'r', - }) - - const fileHeader = buffer.toString('hex', 0, bytesToRead) - return fileHeader - } catch (err) { - console.error('读取文件错误:', err) - return - } - } - - async function isWavFile(filePath: string) { - return isWav(fs.readFileSync(filePath)) - } - - async function guessDuration(pttPath: string) { - const pttFileInfo = await fsPromise.stat(pttPath) - let duration = pttFileInfo.size / 1024 / 3 // 3kb/s - duration = Math.floor(duration) - duration = Math.max(1, duration) - log(`通过文件大小估算语音的时长:`, duration) - return duration - } - - // function verifyDuration(oriDuration: number, guessDuration: number) { - // // 单位都是秒 - // if (oriDuration - guessDuration > 10) { - // return guessDuration - // } - // oriDuration = Math.max(1, oriDuration) - // return oriDuration - // } - // async function getAudioSampleRate(filePath: string) { - // try { - // const mm = await import('music-metadata'); - // const metadata = await mm.parseFile(filePath); - // log(`${filePath}采样率`, metadata.format.sampleRate); - // return metadata.format.sampleRate; - // } catch (error) { - // log(`${filePath}采样率获取失败`, error.stack); - // // console.error(error); - // } - // } - try { const file = await fsPromise.readFile(filePath) - const pttPath = path.join(TEMP_DIR, randomUUID()) if (!isSilk(file)) { log(`语音文件${filePath}需要转换成silk`) - const _isWav = isWav(file) - const pcmPath = pttPath + '.pcm' - let sampleRate = 0 - const convert = () => { - return new Promise((resolve, reject) => { - const ffmpegPath = getConfigUtil().getConfig().ffmpeg || process.env.FFMPEG_PATH || 'ffmpeg' - const cp = spawn(ffmpegPath, ['-y', '-i', filePath, '-ar', '24000', '-ac', '1', '-f', 's16le', pcmPath]) - cp.on('error', (err) => { - log(`FFmpeg处理转换出错: `, err.message) - return reject(err) - }) - cp.on('exit', (code, signal) => { - const EXIT_CODES = [0, 255] - if (code == null || EXIT_CODES.includes(code)) { - sampleRate = 24000 - const data = fs.readFileSync(pcmPath) - fs.unlink(pcmPath, (err) => { - }) - return resolve(data) - } - log(`FFmpeg exit: code=${code ?? 'unknown'} sig=${signal ?? 'unknown'}`) - reject(Error(`FFmpeg处理转换失败`)) - }) - }) - } - let input: Buffer - if (!_isWav) { - input = await convert() + let result: EncodeResult + const allowSampleRate = [8000, 12000, 16000, 24000, 32000, 44100, 48000] + if (isWav(file) && allowSampleRate.includes(getWavFileInfo(file).fmt.sampleRate)) { + result = await encode(file, 0) } else { - input = file - const allowSampleRate = [8000, 12000, 16000, 24000, 32000, 44100, 48000] - const { fmt } = getWavFileInfo(input) - // log(`wav文件信息`, fmt) - if (!allowSampleRate.includes(fmt.sampleRate)) { - input = await convert() - } + const input = await convert(filePath, { + output: [ + '-ar 24000', + '-ac 1', + '-f s16le' + ] + }) + result = await encode(input, 24000) } - const silk = await encode(input, sampleRate) - fs.writeFileSync(pttPath, silk.data) - log(`语音文件${filePath}转换成功!`, pttPath, `时长:`, silk.duration) + const pttPath = path.join(TEMP_DIR, randomUUID()) + await fsPromise.writeFile(pttPath, result.data) + log(`语音文件${filePath}转换成功!`, pttPath, `时长:`, result.duration) return { converted: true, path: pttPath, - duration: silk.duration / 1000, + duration: result.duration / 1000, } } else { const silk = file - let duration = 0 + let duration = 1 try { duration = getDuration(silk) / 1000 } catch (e: any) { - log('获取语音文件时长失败, 使用文件大小推测时长', filePath, e.stack) - duration = await guessDuration(filePath) + log('获取语音文件时长失败, 默认为1秒', filePath, e.stack) } - return { converted: false, path: filePath, @@ -131,40 +100,17 @@ export async function encodeSilk(filePath: string) { } } -export async function decodeSilk(inputFilePath: string, outFormat: 'mp3' | 'amr' | 'wma' | 'm4a' | 'spx' | 'ogg' | 'wav' | 'flac' = 'mp3') { - const silkArrayBuffer = await fsPromise.readFile(inputFilePath) - const data = (await decode(silkArrayBuffer, 24000)).data - const fileName = path.join(TEMP_DIR, path.basename(inputFilePath)) - const outPCMPath = fileName + '.pcm' - const outFilePath = fileName + '.' + outFormat - await fsPromise.writeFile(outPCMPath, data) - const convert = () => { - return new Promise((resolve, reject) => { - const ffmpegPath = getConfigUtil().getConfig().ffmpeg || process.env.FFMPEG_PATH || 'ffmpeg' - const cp = spawn(ffmpegPath, [ - '-y', - '-f', 's16le', // PCM format - '-ar', '24000', // Sample rate - '-ac', '1', // Number of audio channels - '-i', outPCMPath, - outFilePath, - ]) - cp.on('error', (err) => { - log(`FFmpeg处理转换出错: `, err.message) - return reject(err) - }) - cp.on('exit', (code, signal) => { - const EXIT_CODES = [0, 255] - if (code == null || EXIT_CODES.includes(code)) { - fs.unlink(outPCMPath, (err) => { - }) - return resolve(outFilePath) - } - const exitErr = `FFmpeg exit: code=${code ?? 'unknown'} sig=${signal ?? 'unknown'}` - log(exitErr) - reject(Error(`FFmpeg处理转换失败,${exitErr}`)) - }) - }) - } - return convert() +type OutFormat = 'mp3' | 'amr' | 'wma' | 'm4a' | 'spx' | 'ogg' | 'wav' | 'flac' + +export async function decodeSilk(inputFilePath: string, outFormat: OutFormat = 'mp3') { + const silk = await fsPromise.readFile(inputFilePath) + const { data } = await decode(silk, 24000) + const outFilePath = path.join(TEMP_DIR, path.basename(inputFilePath)) + `.${outFormat}` + return convert(Readable.from(data), { + input: [ + '-f s16le', + '-ar 24000', + '-ac 1' + ] + }, outFilePath) } \ No newline at end of file