Python项目--基于计算机视觉的手势识别控制系统-4. 应用实例

时间:2025-04-25 07:41:11

4.1 主程序

下面是系统的主程序,集成了所有模块,实现完整的手势识别控制功能。

import cv2
import time
import numpy as np
import argparse

from image_capture import ImageCapture
from hand_detector import HandDetector
from gesture_recognizer import GestureRecognizer
from gesture_controller import GestureController

def main():
    # 解析命令行参数
    parser = argparse.ArgumentParser(description='Hand Gesture Recognition Control System')
    parser.add_argument('--camera', type=int, default=0, help='Camera device ID')
    parser.add_argument('--width', type=int, default=640, help='Camera width')
    parser.add_argument('--height', type=int, default=480, help='Camera height')
    parser.add_argument('--model', type=str, default='models/dynamic_gesture_model.h5', help='Path to dynamic gesture model')
    parser.add_argument('--debug', action='store_true', help='Enable debug mode')
    args = parser.parse_args()
    
    # 初始化模块
    image_capture = ImageCapture(camera_id=args.camera, width=args.width, height=args.height)
    hand_detector = HandDetector()
    gesture_recognizer = GestureRecognizer(dynamic_model_path=args.model)
    gesture_controller = GestureController()
    
    # 性能统计
    frame_count = 0
    start_time = time.time()
    fps = 0
    
    print("Hand Gesture Recognition Control System Started!")
    print("Press 'q' to quit, 'd' to toggle debug mode")
    
    debug_mode = args.debug
    
    while True:
        # 获取帧
        frame = image_capture.get_frame()
        if frame is None:
            print("Error: Could not read frame from camera")
            break
        
        # 检测手部
        frame, hands_data = hand_detector.find_hands(frame, draw=debug_mode)
        
        # 识别手势
        gesture, gesture_type = gesture_recognizer.process_frame(frame, hands_data)
        
        # 执行控制命令
        if hands_data:
            gesture_controller.process_gesture(gesture, gesture_type, hands_data[0])
        
        # 计算FPS
        frame_count += 1
        elapsed_time = time.time() - start_time
        if elapsed_time > 1:
            fps = frame_count / elapsed_time
            frame_count = 0
            start_time = time.time()
        
        # 显示调试信息
        if debug_mode:
            # 显示FPS
            cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            # 显示手势类型
            if gesture:
                cv2.putText(frame, f"Gesture: {gesture} ({gesture_type})", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
            
            # 显示窗口
            cv2.imshow("Hand Gesture Control", frame)
        
        # 检测键盘输入
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q'):
            break
        elif key == ord('d'):
            debug_mode = not debug_mode
            print(f"Debug mode: {'ON' if debug_mode else 'OFF'}")
    
    # 释放资源
    image_capture.release()
    cv2.destroyAllWindows()
    print("System terminated.")

if __name__ == "__main__":
    main()

4.2 模型训练

为了训练动态手势识别模型,我们需要收集手势数据集。下面是数据收集和模型训练的脚本。

import os
import cv2
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

from image_capture import ImageCapture
from hand_detector import HandDetector
from gesture_recognizer import DynamicGestureRecognizer

def collect_data():
    """收集手势数据"""
    # 初始化
    image_capture = ImageCapture()
    hand_detector = HandDetector()
    
    # 手势类型
    gestures = [
        'swipe_right', 'swipe_left', 'swipe_up', 'swipe_down',
        'circle', 'zoom_in', 'zoom_out', 'wave', 'grab', 'release'
    ]
    
    # 创建数据存储目录
    os.makedirs('data', exist_ok=True)
    
    for gesture_id, gesture_name in enumerate(gestures):
        print(f"\nPreparing to collect data for gesture: {gesture_name}")
        print("Press 's' to start recording, 'q' to quit")
        
        while True:
            frame = image_capture.get_frame()
            if frame is None:
                continue
            
            # 显示指导信息
            cv2.putText(frame, f"Gesture: {gesture_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, "Press 's' to start recording", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            
            # 显示帧
            cv2.imshow("Data Collection", frame)
            
            key = cv2.waitKey(1) & 0xFF
            if key == ord('q'):
                return
            elif key == ord('s'):
                break
        
        print(f"Recording gesture: {gesture_name}. Perform the gesture multiple times.")
        print("Press 'q' to finish recording this gesture")
        
        # 收集序列数据
        sequences = []
        sequence_length = 30
        
        # 录制多个序列
        for sequence_idx in range(30):  # 每种手势录到30个序列
            print(f"Recording sequence {sequence_idx+1}/30")
            
            # 初始化序列缓冲区
            sequence_buffer = []
            
            # 录制一个完整序列
            while len(sequence_buffer) < sequence_length:
                frame = image_capture.get_frame()
                if frame is None:
                    continue
                
                # 检测手部
                frame, hands_data = hand_detector.find_hands(frame)
                
                if hands_data:
                    # 只使用第一只手
                    landmarks = hands_data[0]
                    
                    # 将关键点平展为一维数组
                    flattened = []
                    for lm in landmarks:
                        flattened.extend([lm[0], lm[1]])  # 只使用x和y坐标
                    
                    sequence_buffer.append(flattened)
                
                # 显示进度
                cv2.putText(frame, f"Gesture: {gesture_name}", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.putText(frame, f"Sequence: {sequence_idx+1}/30", (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                cv2.putText(frame, f"Frames: {len(sequence_buffer)}/{sequence_length}", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
                
                cv2.imshow("Data Collection", frame)
                
                key = cv2.waitKey(1) & 0xFF
                if key == ord('q'):
                    break
            
            # 如果序列完整,添加到数据集
            if len(sequence_buffer) == sequence_length:
                sequences.append(sequence_buffer)
            
            # 检查是否退出
            if key == ord('q'):
                break
        
        # 保存数据
        if sequences:
            np.save(f"data/{gesture_name}.npy", np.array(sequences))
            print(f"Saved {len(sequences)} sequences for gesture: {gesture_name}")
    
    image_capture.release()
    cv2.destroyAllWindows()
    print("Data collection completed!")

def train_model():
    """训练动态手势识别模型"""
    # 手势类型
    gestures = [
        'swipe_right', 'swipe_left', 'swipe_up', 'swipe_down',
        'circle', 'zoom_in', 'zoom_out', 'wave', 'grab', 'release'
    ]
    
    # 加载数据
    X = []
    y = []
    
    for gesture_id, gesture_name in enumerate(gestures):
        try:
            data = np.load(f"data/{gesture_name}.npy")
            for sequence in data:
                X.append(sequence)
                y.append(gesture_id)
        except FileNotFoundError:
            print(f"Warning: No data file found for gesture: {gesture_name}")
    
    # 转换为数组
    X = np.array(X)
    y = to_categorical(np.array(y), num_classes=len(gestures))
    
    # 划分训练集和测试集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # 创建模型
    sequence_length = X.shape[1]
    num_landmarks = X.shape[2] // 2  # 每个关键点有x和y两个坐标
    recognizer = DynamicGestureRecognizer(num_classes=len(gestures), sequence_length=sequence_length, num_landmarks=num_landmarks)
    
    # 训练模型
    print("Training model...")
    history = recognizer.train(X_train, y_train, epochs=100, batch_size=16, validation_split=0.2)
    
    # 评估模型
    loss, accuracy = recognizer.model.evaluate(X_test, y_test)
    print(f"Test accuracy: {accuracy:.4f}")
    
    # 保存模型
    os.makedirs('models', exist_ok=True)
    recognizer.save_model('models/dynamic_gesture_model.h5')
    print("Model saved to 'models/dynamic_gesture_model.h5'")

if __name__ == "__main__":
    import argparse
    
    parser = argparse.ArgumentParser(description='Hand Gesture Recognition Model Training')
    parser.add_argument('--collect', action='store_true', help='Collect training data')
    parser.add_argument('--train', action='store_true', help='Train model')
    args = parser.parse_args()
    
    if args.collect:
        collect_data()
    if args.train:
        train_model()

4.3 使用案例

本系统可应用于多种场景,以下是几个典型的使用案例:

  1. 计算机控制:使用手势控制鼠标移动、点击、滑动等操作,实现非接触式人机交互。

  2. 演示控制:在演讲或演示时,使用手势控制PPT幻灯片的切换。

  3. 智能家居控制:通过手势控制智能家居设备,如灯光、空调、电视等。

  4. 游戏控制:开发基于手势控制的游戏,提供更自然的交互体验。

  5. 辅助技术:为行动不便的人群提供辅助交互方式。