使用Python的librosa库提取音频MFCC特征详解

文章目录

MFCC特征

代码分享

MFCC特征

MFCC（Mel-Frequency Cepstral Coefficients）是通过人耳对声音频率的感知方式对音频信号进行处理得到的特征，广泛用于语音识别和音频处理。

代码分享

import os
import librosa
import pywt
import matplotlib.pyplot as plt
import numpy as np
import cv2
from pathlib import Path
from tqdm import tqdm  # 需要安装 tqdm 库
from skimage.transform import resize

#数据处理 绘制图形
def process_audio_files(main_folder):
    main_path = Path(main_folder)
    output_base = main_path / "MFCC"
    output_base.mkdir(parents=True, exist_ok=True)

    # 收集所有音频文件（递归遍历所有子目录）
    all_audio_files = list(main_path.rglob("*.wav"))
    total_files = len(all_audio_files)

    if total_files == 0:
        print("未找到任何音频文件！")
        return

    # 显示总进度条
    with tqdm(total=total_files, desc="处理进度") as pbar:
        for file_path in all_audio_files:
            try:
                # 获取文件的相对路径
                relative_path = file_path.relative_to(main_path)

                # 创建对应的输出目录结构
                output_subfolder = output_base / relative_path.parent
                output_subfolder.mkdir(parents=True, exist_ok=True)

                # 读取音频文件
                audio_files, sr = librosa.load(file_path, sr=None)

                # 处理音频文件，确保每个音频文件的长度都是固定的
                target_duration = 20  # 每个音频文件被设置为20秒长
                y = librosa.util.fix_length(audio_files,size=target_duration * sr)

                # 计算MFCC
                # mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=24)
                spectrograms_base = librosa.feature.melspectrogram(y=y,sr=sr, n_mels=24)
                spectrograms_dB= librosa.power_to_db(spectrograms_base, ref=np.max)
                spectrograms_dB_base = librosa.feature.mfcc(S=spectrograms_dB, n_mfcc=24)
                # print(spectrograms_dB_base.shape)
             
                librosa.display.specshow(spectrograms_dB_base)
                plt.axis('off')  # Turn off axes
                plt.xticks([])  # Remove x-axis ticks
                plt.yticks([])  # Remove y-axis ticks

                # 保存图像并关闭绘图
                output_path = output_subfolder / f"{file_path.stem}.png"
                plt.savefig(output_path, dpi=100, pad_inches=0, bbox_inches='tight')
                plt.close()

                pbar.update(1)  # 更新进度条
                # print(f"已处理: {file_path} → {output_path}")

            except Exception as e:
                pbar.update(1)  # 即使出错也更新进度条
                print(f"错误处理 {file_path}: {str(e)}")


if __name__ == "__main__":
    # 安装依赖库（如果未安装）
    # pip install librosa matplotlib numpy tqdm

    main_folder = "你的文件路径"
    process_audio_files(main_folder)

作者：二猛子

物联沃分享整理
物联沃-IOTWORD物联网 » 使用Python的librosa库提取音频MFCC特征详解