opt: audio encoding and decoding

2024-11-22 01:56:33 +00:00 · 2024-08-07 04:22:51 +08:00
parent d7e40e488c
commit 5005d83ce0
1 changed files with 81 additions and 135 deletions
--- a/src/common/utils/audio.ts
+++ b/src/common/utils/audio.ts
@@ -1,124 +1,93 @@
 import fs from 'fs'
 import fsPromise from 'fs/promises'
 import { decode, encode, getDuration, getWavFileInfo, isWav, isSilk } from 'silk-wasm'
 import { log } from './log'
 import path from 'node:path'
 import ffmpeg from 'fluent-ffmpeg'
 import fsPromise from 'node:fs/promises'
 import { decode, encode, getDuration, getWavFileInfo, isWav, isSilk, EncodeResult } from 'silk-wasm'
 import { log } from './log'
 import { TEMP_DIR } from './index'
 import { getConfigUtil } from '../config'
 import { spawn } from 'node:child_process'
 import { randomUUID } from 'node:crypto'
 import { Readable } from 'node:stream'
 interface FFmpegOptions {
  input?: string[]
  output?: string[]
 }
 type Input = string | Readable
 function convert(input: Input, options: FFmpegOptions): Promise<Buffer>
 function convert(input: Input, options: FFmpegOptions, outputPath: string): Promise<string>
 function convert(input: Input, options: FFmpegOptions, outputPath?: string): Promise<Buffer> | Promise<string> {
  return new Promise<any>((resolve, reject) => {
    const chunks: Buffer[] = []
    let command = ffmpeg(input)
      .on('error', err => {
        log(`FFmpeg处理转换出错: `, err.message)
        reject(err)
      })
      .on('end', () => {
        if (!outputPath) {
          resolve(Buffer.concat(chunks))
        } else {
          resolve(outputPath)
        }
      })
    if (options.input) {
      command = command.inputOptions(options.input)
    }
    if (options.output) {
      command = command.outputOptions(options.output)
    }
    const ffmpegPath = getConfigUtil().getConfig().ffmpeg
    if (ffmpegPath) {
      command = command.setFfmpegPath(ffmpegPath)
    }
    if (!outputPath) {
      const stream = command.pipe()
      stream.on('data', chunk => {
        chunks.push(chunk)
      })
    } else {
      command.save(outputPath)
    }
  })
 }
 export async function encodeSilk(filePath: string) {
  function getFileHeader(filePath: string) {
    // 定义要读取的字节数
    const bytesToRead = 7
    try {
      const buffer = fs.readFileSync(filePath, {
        encoding: null,
        flag: 'r',
      })
      const fileHeader = buffer.toString('hex', 0, bytesToRead)
      return fileHeader
    } catch (err) {
      console.error('读取文件错误:', err)
      return
    }
  }
  async function isWavFile(filePath: string) {
    return isWav(fs.readFileSync(filePath))
  }
  async function guessDuration(pttPath: string) {
    const pttFileInfo = await fsPromise.stat(pttPath)
    let duration = pttFileInfo.size / 1024 / 3 // 3kb/s
    duration = Math.floor(duration)
    duration = Math.max(1, duration)
    log(`通过文件大小估算语音的时长:`, duration)
    return duration
  }
  // function verifyDuration(oriDuration: number, guessDuration: number) {
  //     // 单位都是秒
  //     if (oriDuration - guessDuration > 10) {
  //         return guessDuration
  //     }
  //     oriDuration = Math.max(1, oriDuration)
  //     return oriDuration
  // }
  // async function getAudioSampleRate(filePath: string) {
  //     try {
  //         const mm = await import('music-metadata');
  //         const metadata = await mm.parseFile(filePath);
  //         log(`${filePath}采样率`, metadata.format.sampleRate);
  //         return metadata.format.sampleRate;
  //     } catch (error) {
  //         log(`${filePath}采样率获取失败`, error.stack);
  //         // console.error(error);
  //     }
  // }
  try {
    const file = await fsPromise.readFile(filePath)
    const pttPath = path.join(TEMP_DIR, randomUUID())
    if (!isSilk(file)) {
      log(`语音文件${filePath}需要转换成silk`)
-      const _isWav = isWav(file)
+      let result: EncodeResult
-      const pcmPath = pttPath + '.pcm'
+      const allowSampleRate = [8000, 12000, 16000, 24000, 32000, 44100, 48000]
-      let sampleRate = 0
+      if (isWav(file) && allowSampleRate.includes(getWavFileInfo(file).fmt.sampleRate)) {
-      const convert = () => {
+        result = await encode(file, 0)
        return new Promise<Buffer>((resolve, reject) => {
          const ffmpegPath = getConfigUtil().getConfig().ffmpeg || process.env.FFMPEG_PATH || 'ffmpeg'
          const cp = spawn(ffmpegPath, ['-y', '-i', filePath, '-ar', '24000', '-ac', '1', '-f', 's16le', pcmPath])
          cp.on('error', (err) => {
            log(`FFmpeg处理转换出错: `, err.message)
            return reject(err)
          })
          cp.on('exit', (code, signal) => {
            const EXIT_CODES = [0, 255]
            if (code == null || EXIT_CODES.includes(code)) {
              sampleRate = 24000
              const data = fs.readFileSync(pcmPath)
              fs.unlink(pcmPath, (err) => {
              })
              return resolve(data)
            }
            log(`FFmpeg exit: code=${code ?? 'unknown'} sig=${signal ?? 'unknown'}`)
            reject(Error(`FFmpeg处理转换失败`))
          })
        })
      }
      let input: Buffer
      if (!_isWav) {
        input = await convert()
      } else {
-        input = file
+        const input = await convert(filePath, {
-        const allowSampleRate = [8000, 12000, 16000, 24000, 32000, 44100, 48000]
+          output: [
-        const { fmt } = getWavFileInfo(input)
+            '-ar 24000',
-        // log(`wav文件信息`, fmt)
+            '-ac 1',
-        if (!allowSampleRate.includes(fmt.sampleRate)) {
+            '-f s16le'
-          input = await convert()
+          ]
-        }
+        })
        result = await encode(input, 24000)
      }
-      const silk = await encode(input, sampleRate)
+      const pttPath = path.join(TEMP_DIR, randomUUID())
-      fs.writeFileSync(pttPath, silk.data)
+      await fsPromise.writeFile(pttPath, result.data)
-      log(`语音文件${filePath}转换成功!`, pttPath, `时长:`, silk.duration)
+      log(`语音文件${filePath}转换成功!`, pttPath, `时长:`, result.duration)
      return {
        converted: true,
        path: pttPath,
-        duration: silk.duration / 1000,
+        duration: result.duration / 1000,
      }
    } else {
      const silk = file
-      let duration = 0
+      let duration = 1
      try {
        duration = getDuration(silk) / 1000
      } catch (e: any) {
-        log('获取语音文件时长失败, 使用文件大小推测时长', filePath, e.stack)
+        log('获取语音文件时长失败, 默认为1秒', filePath, e.stack)
        duration = await guessDuration(filePath)
      }
      return {
        converted: false,
        path: filePath,
@@ -131,40 +100,17 @@ export async function encodeSilk(filePath: string) {
  }
 }
-export async function decodeSilk(inputFilePath: string, outFormat: 'mp3' | 'amr' | 'wma' | 'm4a' | 'spx' | 'ogg' | 'wav' | 'flac' = 'mp3') {
+type OutFormat = 'mp3' | 'amr' | 'wma' | 'm4a' | 'spx' | 'ogg' | 'wav' | 'flac'
-  const silkArrayBuffer = await fsPromise.readFile(inputFilePath)
+
-  const data = (await decode(silkArrayBuffer, 24000)).data
+export async function decodeSilk(inputFilePath: string, outFormat: OutFormat = 'mp3') {
-  const fileName = path.join(TEMP_DIR, path.basename(inputFilePath))
+  const silk = await fsPromise.readFile(inputFilePath)
-  const outPCMPath = fileName + '.pcm'
+  const { data } = await decode(silk, 24000)
-  const outFilePath = fileName + '.' + outFormat
+  const outFilePath = path.join(TEMP_DIR, path.basename(inputFilePath)) + `.${outFormat}`
-  await fsPromise.writeFile(outPCMPath, data)
+  return convert(Readable.from(data), {
-  const convert = () => {
+    input: [
-    return new Promise<string>((resolve, reject) => {
+      '-f s16le',
-      const ffmpegPath = getConfigUtil().getConfig().ffmpeg || process.env.FFMPEG_PATH || 'ffmpeg'
+      '-ar 24000',
-      const cp = spawn(ffmpegPath, [
+      '-ac 1'
-        '-y',
+    ]
-        '-f', 's16le',  // PCM format
+  }, outFilePath)
        '-ar', '24000', // Sample rate
        '-ac', '1',     // Number of audio channels
        '-i', outPCMPath,
        outFilePath,
      ])
      cp.on('error', (err) => {
        log(`FFmpeg处理转换出错: `, err.message)
        return reject(err)
      })
      cp.on('exit', (code, signal) => {
        const EXIT_CODES = [0, 255]
        if (code == null || EXIT_CODES.includes(code)) {
          fs.unlink(outPCMPath, (err) => {
          })
          return resolve(outFilePath)
        }
        const exitErr = `FFmpeg exit: code=${code ?? 'unknown'} sig=${signal ?? 'unknown'}`
        log(exitErr)
        reject(Error(`FFmpeg处理转换失败,${exitErr}`))
      })
    })
  }
  return convert()
 }