4.1 主程序
下面是系统的主程序,集成了所有模块,实现完整的手势识别控制功能。
import cv2
import time
import numpy as np
import argparse
from image_capture import ImageCapture
from hand_detector import HandDetector
from gesture_recognizer import GestureRecognizer
from gesture_controller import GestureController
def main():
# 解析命令行参数
parser = argparse.ArgumentParser(description='Hand Gesture Recognition Control System')
parser.add_argument('--camera', type=int, default=0, help='Camera device ID')
parser.add_argument('--width', type=int, default=640, help='Camera width')
parser.add_argument('--height', type=int, default=480, help='Camera height')
parser.add_argument('--model', type=str, default='models/dynamic_gesture_model.h5', help='Path to dynamic gesture model')
parser.add_argument('--debug', action='store_true', help='Enable debug mode')
args = parser.parse_args()
# 初始化模块
image_capture = ImageCapture(camera_id=args.camera, width=args.width, height=args.height)
hand_detector = HandDetector()
gesture_recognizer = GestureRecognizer(dynamic_model_path=args.model)
gesture_controller = GestureController()
# 性能统计
frame_count = 0
start_time = time.time()
fps = 0
print("Hand Gesture Recognition Control System Started!")
print("Press 'q' to quit, 'd' to toggle debug mode")
debug_mode = args.debug
while True:
# 获取帧
frame = image_capture.get_frame()
if frame is None:
print("Error: Could not read frame from camera")
break
# 检测手部
frame, hands_data = hand_detector.find_hands(frame, draw=debug_mode)
# 识别手势
gesture, gesture_type = gesture_recognizer.process_frame(frame, hands_data)
# 执行控制命令
if hands_data:
gesture_controller.process_gesture(gesture, gesture_type, hands_data[0])
# 计算FPS
frame_count += 1
elapsed_time = time.time() - start_time
if elapsed_time > 1:
fps = frame_count / elapsed_time
frame_count = 0
start_time = time.time()
# 显示调试信息
if debug_mode:
# 显示FPS
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 显示手势类型
if gesture:
cv2.putText(frame, f"Gesture: {gesture} ({gesture_type})", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
# 显示窗口
cv2.imshow("Hand Gesture Control", frame)
# 检测键盘输入
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
elif key == ord('d'):
debug_mode = not debug_mode
print(f"Debug mode: {'ON' if debug_mode else 'OFF'}")
# 释放资源
image_capture.release()
cv2.destroyAllWindows()
print("System terminated.")
if __name__ == "__main__":
main()
4.2 模型训练
为了训练动态手势识别模型,我们需要收集手势数据集。下面是数据收集和模型训练的脚本。
import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from image_capture import ImageCapture
from hand_detector import HandDetector
from gesture_recognizer import DynamicGestureRecognizer
def collect_data():
"""收集手势数据"""
# 初始化
image_capture = ImageCapture()
hand_detector = HandDetector()
# 手势类型
gestures = [
'swipe_right', 'swipe_left', 'swipe_up', 'swipe_down',
'circle', 'zoom_in', 'zoom_out', 'wave', 'grab', 'release'
]
# 创建数据存储目录
os.makedirs('data', exist_ok=True)
for gesture_id, gesture_name in enumerate(gestures):
print(f"\nPreparing to collect data for gesture: {gesture_name}")
print("Press 's' to start recording, 'q' to quit")
while True:
frame = image_capture.get_frame()
if frame is None:
continue
# 显示指导信息
cv2.putText(frame, f"Gesture: {gesture_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, "Press 's' to start recording", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
# 显示帧
cv2.imshow("Data Collection", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
return
elif key == ord('s'):
break
print(f"Recording gesture: {gesture_name}. Perform the gesture multiple times.")
print("Press 'q' to finish recording this gesture")
# 收集序列数据
sequences = []
sequence_length = 30
# 录制多个序列
for sequence_idx in range(30): # 每种手势录到30个序列
print(f"Recording sequence {sequence_idx+1}/30")
# 初始化序列缓冲区
sequence_buffer = []
# 录制一个完整序列
while len(sequence_buffer) < sequence_length:
frame = image_capture.get_frame()
if frame is None:
continue
# 检测手部
frame, hands_data = hand_detector.find_hands(frame)
if hands_data:
# 只使用第一只手
landmarks = hands_data[0]
# 将关键点平展为一维数组
flattened = []
for lm in landmarks:
flattened.extend([lm[0], lm[1]]) # 只使用x和y坐标
sequence_buffer.append(flattened)
# 显示进度
cv2.putText(frame, f"Gesture: {gesture_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, f"Sequence: {sequence_idx+1}/30", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(frame, f"Frames: {len(sequence_buffer)}/{sequence_length}", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.imshow("Data Collection", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
# 如果序列完整,添加到数据集
if len(sequence_buffer) == sequence_length:
sequences.append(sequence_buffer)
# 检查是否退出
if key == ord('q'):
break
# 保存数据
if sequences:
np.save(f"data/{gesture_name}.npy", np.array(sequences))
print(f"Saved {len(sequences)} sequences for gesture: {gesture_name}")
image_capture.release()
cv2.destroyAllWindows()
print("Data collection completed!")
def train_model():
"""训练动态手势识别模型"""
# 手势类型
gestures = [
'swipe_right', 'swipe_left', 'swipe_up', 'swipe_down',
'circle', 'zoom_in', 'zoom_out', 'wave', 'grab', 'release'
]
# 加载数据
X = []
y = []
for gesture_id, gesture_name in enumerate(gestures):
try:
data = np.load(f"data/{gesture_name}.npy")
for sequence in data:
X.append(sequence)
y.append(gesture_id)
except FileNotFoundError:
print(f"Warning: No data file found for gesture: {gesture_name}")
# 转换为数组
X = np.array(X)
y = to_categorical(np.array(y), num_classes=len(gestures))
# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 创建模型
sequence_length = X.shape[1]
num_landmarks = X.shape[2] // 2 # 每个关键点有x和y两个坐标
recognizer = DynamicGestureRecognizer(num_classes=len(gestures), sequence_length=sequence_length, num_landmarks=num_landmarks)
# 训练模型
print("Training model...")
history = recognizer.train(X_train, y_train, epochs=100, batch_size=16, validation_split=0.2)
# 评估模型
loss, accuracy = recognizer.model.evaluate(X_test, y_test)
print(f"Test accuracy: {accuracy:.4f}")
# 保存模型
os.makedirs('models', exist_ok=True)
recognizer.save_model('models/dynamic_gesture_model.h5')
print("Model saved to 'models/dynamic_gesture_model.h5'")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description='Hand Gesture Recognition Model Training')
parser.add_argument('--collect', action='store_true', help='Collect training data')
parser.add_argument('--train', action='store_true', help='Train model')
args = parser.parse_args()
if args.collect:
collect_data()
if args.train:
train_model()
4.3 使用案例
本系统可应用于多种场景,以下是几个典型的使用案例:
-
计算机控制:使用手势控制鼠标移动、点击、滑动等操作,实现非接触式人机交互。
-
演示控制:在演讲或演示时,使用手势控制PPT幻灯片的切换。
-
智能家居控制:通过手势控制智能家居设备,如灯光、空调、电视等。
-
游戏控制:开发基于手势控制的游戏,提供更自然的交互体验。
-
辅助技术:为行动不便的人群提供辅助交互方式。