使用Python的librosa库提取音频MFCC特征详解
文章目录
MFCC特征
MFCC(Mel-Frequency Cepstral Coefficients)是通过人耳对声音频率的感知方式对音频信号进行处理得到的特征,广泛用于语音识别和音频处理。
代码分享
import os
import librosa
import pywt
import matplotlib.pyplot as plt
import numpy as np
import cv2
from pathlib import Path
from tqdm import tqdm # 需要安装 tqdm 库
from skimage.transform import resize
#数据处理 绘制图形
def process_audio_files(main_folder):
main_path = Path(main_folder)
output_base = main_path / "MFCC"
output_base.mkdir(parents=True, exist_ok=True)
# 收集所有音频文件(递归遍历所有子目录)
all_audio_files = list(main_path.rglob("*.wav"))
total_files = len(all_audio_files)
if total_files == 0:
print("未找到任何音频文件!")
return
# 显示总进度条
with tqdm(total=total_files, desc="处理进度") as pbar:
for file_path in all_audio_files:
try:
# 获取文件的相对路径
relative_path = file_path.relative_to(main_path)
# 创建对应的输出目录结构
output_subfolder = output_base / relative_path.parent
output_subfolder.mkdir(parents=True, exist_ok=True)
# 读取音频文件
audio_files, sr = librosa.load(file_path, sr=None)
# 处理音频文件,确保每个音频文件的长度都是固定的
target_duration = 20 # 每个音频文件被设置为20秒长
y = librosa.util.fix_length(audio_files,size=target_duration * sr)
# 计算MFCC
# mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=24)
spectrograms_base = librosa.feature.melspectrogram(y=y,sr=sr, n_mels=24)
spectrograms_dB= librosa.power_to_db(spectrograms_base, ref=np.max)
spectrograms_dB_base = librosa.feature.mfcc(S=spectrograms_dB, n_mfcc=24)
# print(spectrograms_dB_base.shape)
librosa.display.specshow(spectrograms_dB_base)
plt.axis('off') # Turn off axes
plt.xticks([]) # Remove x-axis ticks
plt.yticks([]) # Remove y-axis ticks
# 保存图像并关闭绘图
output_path = output_subfolder / f"{file_path.stem}.png"
plt.savefig(output_path, dpi=100, pad_inches=0, bbox_inches='tight')
plt.close()
pbar.update(1) # 更新进度条
# print(f"已处理: {file_path} → {output_path}")
except Exception as e:
pbar.update(1) # 即使出错也更新进度条
print(f"错误处理 {file_path}: {str(e)}")
if __name__ == "__main__":
# 安装依赖库(如果未安装)
# pip install librosa matplotlib numpy tqdm
main_folder = "你的文件路径"
process_audio_files(main_folder)
作者:二猛子