本文共 11124 字,大约阅读时间需要 37 分钟。
本项目基于卷积神经网络(CNN)技术,旨在实现对人类面部表情的自动识别。以下是项目的详细实施过程和结果分析。
面部表情识别属于典型的图像分类问题,目标是对人类面部表情图片进行自动分类识别。数据集cnn_train.csv包含人类面部表情图片的标签和特征信息,共包含7个表情类别。数据集规模较大,包含28709张表情图片,每张图片为48×48像素,提供了丰富的数据基础。
为便于数据的读取和处理,对原始数据集进行分离处理,分别生成cnn_label.csv和cnn_data.csv,其中cnn_label.csv存储表情类别标签,cnn_data.csv存储图片像素数据。
# cnn_feature_label.pyimport pandas as pd# 数据路径path = 'cnn_train.csv'# 读取数据df = pd.read_csv(path)# 提取标签和特征数据df_y = df[['label']]df_x = df[['feature']]df_y.to_csv('cnn_label.csv', index=False, header=False)df_x.to_csv('cnn_data.csv', index=False, header=False) 将像素数据转换为可视化的48×48表情图片。以下是实现代码:
# face_view.pyimport cv2import numpy as np# 指定存放图片的路径path = './/face'# 读取像素数据data = np.loadtxt('cnn_data.csv')# 按行处理每张图片for i in range(data.shape[0]): face_array = data[i, :].reshape((48, 48)) # 拉伸成48×48矩阵 cv2.imwrite(path + '//' + '{}.jpg'.format(i), face_array) 将图片划分为训练集和测试集。训练集包含前24000张图片,测试集包含剩余图片。同时,分别对训练集和测试集进行标注,存储在dataset.csv中。
# cnn_picture_label.pyimport osimport pandas as pddef data_label(path): df_label = pd.read_csv('cnn_label.csv', header=None) files_dir = os.listdir(path) path_list = [] label_list = [] for file_dir in files_dir: if os.path.splitext(file_dir)[1] == '.jpg': path_list.append(file_dir) index = int(os.path.splitext(file_dir)[0]) label_list.append(df_label.iat[index, 0]) df = pd.DataFrame({ 'path': path_list, 'label': label_list }) df.to_csv(path + '\\dataset.csv', index=False, header=False)def main(): train_path = 'D:\\PyCharm_Project\\deep learning\\model\\cnn_train' val_path = 'D:\\PyCharm_Project\\deep learning\\model\\cnn_val' data_label(train_path) data_label(val_path)if __name__ == "__main__": main() 定义一个FaceDataset类,继承自data.Dataset,用于加载训练好的数据集。
# FaceDataset.pyimport bisectimport warningsfrom torch._utils import _accumulatefrom torch import randpermclass FaceDataset(data.Dataset): def __init__(self, root): super(FaceDataset, self).__init__() self.root = root # 读取数据集信息 df_path = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[0]) df_label = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[1]) self.path = np.array(df_path)[:, 0] self.label = np.array(df_label)[:, 0] def __getitem__(self, item): # 读取单张图片 face = cv2.imread(self.root + '\\' + self.path[item]) face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) face_hist = cv2.equalizeHist(face_gray) # 像素值标准化 face_normalized = face_hist.reshape(1, 48, 48) / 255.0 face_tensor = torch.from_numpy(face_normalized).type('torch.FloatTensor') label = self.label[item] return face_tensor, label def __len__(self): return self.path.shape[0] 基于开源项目模型B,使用RReLU作为激活函数,网络结构如下:
# FaceCNN.pyclass FaceCNN(nn.Module): def __init__(self): super(FaceCNN, self).__init__() # 第一层卷积 self.conv1 = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(num_features=64), nn.RReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) # 第二层卷积 self.conv2 = nn.Sequential( nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(num_features=128), nn.RReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) # 第三层卷积 self.conv3 = nn.Sequential( nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(num_features=256), nn.RReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) # 参数初始化 self.apply(gaussian_weights_init) # 全连接层 self.fc = nn.Sequential( nn.Dropout(p=0.2), nn.Linear(in_features=256*6*6, out_features=4096), nn.RReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(in_features=4096, out_features=1024), nn.RReLU(inplace=True), nn.Linear(in_features=1024, out_features=256), nn.RReLU(inplace=True), nn.Linear(in_features=256, out_features=7) ) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = x.view(x.shape[0], -1) y = self.fc(x) return y
Conv2d层参数解析: in_channels:输入通道数out_channels:输出通道数kernel_size:卷积核大小stride:卷积步长padding:对称填充行列数# train.pydef train(train_dataset, val_dataset, batch_size, epochs, learning_rate, wt_decay): train_loader = data.DataLoader(train_dataset, batch_size) model = FaceCNN() loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=wt_decay) for epoch in range(epochs): model.train() loss_rate = 0 for images, labels in train_loader: optimizer.zero_grad() output = model.forward(images) loss_rate = loss_function(output, labels) loss_rate.backward() optimizer.step() print(f"After {epoch + 1} epochs, the loss_rate is : {loss_rate.item()}") if epoch % 5 == 0: model.eval() acc_train = validate(model, train_dataset, batch_size) acc_val = validate(model, val_dataset, batch_size) print(f"After {epoch + 1} epochs, the acc_train is : {acc_train}") print(f"After {epoch + 1} epochs, the acc_val is : {acc_val}") return model # validate.pydef validate(model, dataset, batch_size): val_loader = data.DataLoader(dataset, batch_size) result, num = 0.0, 0 for images, labels in val_loader: pred = model.forward(images) pred = np.argmax(pred.data.numpy(), axis=1) labels = labels.data.numpy() result += np.sum((pred == labels)) num += len(images) acc = result / num return acc
训练过程中发现,模型在训练集上的准确率在60轮达到99%以上,但在测试集上的准确率仅为60%,表明存在过拟合问题。通过调整模型结构和优化训练策略,可进一步提升模型性能。
import torchimport torch.utils.data as dataimport torch.nn as nnimport torch.optim as optimimport numpy as npimport pandas as pdimport cv2def gaussian_weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.04)def validate(model, dataset, batch_size): val_loader = data.DataLoader(dataset, batch_size) result, num = 0.0, 0 for images, labels in val_loader: pred = model.forward(images) pred = np.argmax(pred.data.numpy(), axis=1) labels = labels.data.numpy() result += np.sum((pred == labels)) num += len(images) acc = result / num return accclass FaceDataset(data.Dataset): def __init__(self, root): super(FaceDataset, self).__init__() self.root = root df_path = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[0]) df_label = pd.read_csv(root + '\\dataset.csv', header=None, usecols=[1]) self.path = np.array(df_path)[:, 0] self.label = np.array(df_label)[:, 0] def __getitem__(self, item): face = cv2.imread(self.root + '\\' + self.path[item]) face_gray = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY) face_hist = cv2.equalizeHist(face_gray) face_normalized = face_hist.reshape(1, 48, 48) / 255.0 face_tensor = torch.from_numpy(face_normalized).type('torch.FloatTensor') label = self.label[item] return face_tensor, label def __len__(self): return self.path.shape[0]class FaceCNN(nn.Module): def __init__(self): super(FaceCNN, self).__init__() self.conv1 = nn.Sequential( nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(num_features=64), nn.RReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) self.conv2 = nn.Sequential( nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(num_features=128), nn.RReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) self.conv3 = nn.Sequential( nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(num_features=256), nn.RReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2) ) self.apply(gaussian_weights_init) self.fc = nn.Sequential( nn.Dropout(p=0.2), nn.Linear(in_features=256*6*6, out_features=4096), nn.RReLU(inplace=True), nn.Dropout(p=0.5), nn.Linear(in_features=4096, out_features=1024), nn.RReLU(inplace=True), nn.Linear(in_features=1024, out_features=256), nn.RReLU(inplace=True), nn.Linear(in_features=256, out_features=7) ) def forward(self, x): x = self.conv1(x) x = self.conv2(x) x = self.conv3(x) x = x.view(x.shape[0], -1) y = self.fc(x) return ydef train(train_dataset, val_dataset, batch_size, epochs, learning_rate, wt_decay): train_loader = data.DataLoader(train_dataset, batch_size) model = FaceCNN() loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=wt_decay) for epoch in range(epochs): model.train() loss_rate = 0 for images, labels in train_loader: optimizer.zero_grad() output = model.forward(images) loss_rate = loss_function(output, labels) loss_rate.backward() optimizer.step() print(f"After {epoch + 1} epochs, the loss_rate is : {loss_rate.item()}") if epoch % 5 == 0: model.eval() acc_train = validate(model, train_dataset, batch_size) acc_val = validate(model, val_dataset, batch_size) print(f"After {epoch + 1} epochs, the acc_train is : {acc_train}") print(f"After {epoch + 1} epochs, the acc_val is : {acc_val}") return modeldef main(): train_dataset = FaceDataset(root='D:\PyCharm_Project\deep learning\model\cnn_train') val_dataset = FaceDataset(root='D:\PyCharm_Project\deep learning\model\cnn_val') model = train(train_dataset, val_dataset, batch_size=128, epochs=100, learning_rate=0.1, wt_decay=0) torch.save(model, 'model_net.pkl')if __name__ == "__main__": main() 通过本项目的实践,我掌握了卷积神经网络的基础实现方法,包括数据处理、模型训练和优化等技能。虽然在训练过程中遇到过过拟合等问题,但通过不断调整参数和优化模型结构,相信可以进一步提升模型性能。
转载地址:http://iamkz.baihongyu.com/