k230/k230/face_detect_module.py

# face_detect_module.py
# 人脸检测模块
# 适用于庐山派 K230-CanMV 开发板

from libs.PipeLine import PipeLine, ScopedTiming
from libs.AIBase import AIBase
from libs.AI2D import Ai2d
import os
import nncase_runtime as nn
import ulab.numpy as np
import aidemo
import gc


class FaceDetector:
    """
    人脸检测类 - 使用K230的AI能力检测人脸

    功能:
        - 检测图像中的人脸
        - 返回人脸边界框坐标 (x, y, w, h, score)
        - 支持在图像上绘制检测框

    参数:
        display_mode: 显示模式 "lcd" 或 "hdmi"
        confidence_threshold: 置信度阈值 (默认0.5)
        nms_threshold: 非极大值抑制阈值 (默认0.2)
        debug_mode: 调试模式开关 (默认0关闭)

    使用示例:
        detector = FaceDetector(display_mode="lcd")
        detector.start()
        while True:
            img, faces = detector.detect()
            # faces = [(x, y, w, h, score), ...]
            detector.show()
        detector.stop()
    """

    # 模型配置
    KMODEL_PATH = "/sdcard/examples/kmodel/face_detection_320.kmodel"
    ANCHORS_PATH = "/sdcard/examples/utils/prior_data_320.bin"
    MODEL_INPUT_SIZE = [320, 320]
    ANCHOR_LEN = 4200
    DET_DIM = 4

    # 显示配置
    DISPLAY_CONFIGS = {
        "lcd": {
            "display_size": [800, 480],
            "rgb888p_size": [1920, 1080],
        },
        "hdmi": {
            "display_size": [1920, 1080],
            "rgb888p_size": [1920, 1080],
        }
    }

    def __init__(self, display_mode="lcd", confidence_threshold=0.5,
                 nms_threshold=0.2, debug_mode=0):
        """
        初始化人脸检测器

        参数:
            display_mode: 显示模式 "lcd" 或 "hdmi"
            confidence_threshold: 置信度阈值，值越高过滤越严格
            nms_threshold: NMS阈值，防止重复检测
            debug_mode: 调试模式，1开启计时输出
        """
        if display_mode not in self.DISPLAY_CONFIGS:
            raise ValueError("display_mode必须是 'lcd' 或 'hdmi'")

        self._display_mode = display_mode
        self._confidence_threshold = confidence_threshold
        self._nms_threshold = nms_threshold
        self._debug_mode = debug_mode

        # 获取显示配置
        config = self.DISPLAY_CONFIGS[display_mode]
        self._display_size = config["display_size"]
        self._rgb888p_size = config["rgb888p_size"]

        # 加载锚点数据
        self._anchors = np.fromfile(self.ANCHORS_PATH, dtype=np.float)
        self._anchors = self._anchors.reshape((self.ANCHOR_LEN, self.DET_DIM))

        self._pipeline = None
        self._face_det = None
        self._is_running = False
        self._last_faces = []

    def start(self):
        """启动人脸检测器，初始化摄像头和模型"""
        if self._is_running:
            return

        # 初始化Pipeline
        self._pipeline = PipeLine(
            rgb888p_size=self._rgb888p_size,
            display_size=self._display_size,
            display_mode=self._display_mode
        )
        self._pipeline.create()

        # 初始化人脸检测模型
        self._face_det = _FaceDetectionApp(
            kmodel_path=self.KMODEL_PATH,
            model_input_size=self.MODEL_INPUT_SIZE,
            anchors=self._anchors,
            confidence_threshold=self._confidence_threshold,
            nms_threshold=self._nms_threshold,
            rgb888p_size=self._rgb888p_size,
            display_size=self._display_size,
            debug_mode=self._debug_mode
        )
        self._face_det.config_preprocess()

        self._is_running = True

    def detect(self):
        """
        检测当前帧中的人脸

        返回:
            tuple: (img, faces)
                - img: 当前帧图像
                - faces: 人脸列表，每个元素为 (x, y, w, h, score)
                  坐标已转换为显示分辨率下的实际坐标
        """
        if not self._is_running:
            raise RuntimeError("检测器未启动，请先调用 start()")

        os.exitpoint()

        # 获取当前帧
        img = self._pipeline.get_frame()

        # 推理
        res = self._face_det.run(img)

        # 转换坐标为显示分辨率
        faces = []
        if res:
            for det in res:
                x, y, w, h = map(lambda v: int(round(v, 0)), det[:4])
                score = det[4] if len(det) > 4 else 1.0
                # 转换为显示坐标
                x = x * self._display_size[0] // self._rgb888p_size[0]
                y = y * self._display_size[1] // self._rgb888p_size[1]
                w = w * self._display_size[0] // self._rgb888p_size[0]
                h = h * self._display_size[1] // self._rgb888p_size[1]
                faces.append((x, y, w, h, score))

        self._last_faces = faces
        return img, faces

    def draw_boxes(self, color=(255, 255, 0, 255), thickness=2):
        """
        在OSD层绘制人脸检测框

        参数:
            color: 框颜色 (R, G, B, A)
            thickness: 线条粗细
        """
        if not self._is_running:
            return

        self._pipeline.osd_img.clear()
        for (x, y, w, h, score) in self._last_faces:
            self._pipeline.osd_img.draw_rectangle(
                x, y, w, h,
                color=color,
                thickness=thickness
            )

    def show(self):
        """显示当前帧（需要先调用detect和draw_boxes）"""
        if self._is_running:
            self._pipeline.show_image()
            gc.collect()

    def stop(self):
        """停止人脸检测器，释放资源"""
        if self._face_det:
            self._face_det.deinit()
            self._face_det = None
        if self._pipeline:
            self._pipeline.destroy()
            self._pipeline = None
        self._is_running = False

    @property
    def is_running(self):
        """是否正在运行"""
        return self._is_running

    @property
    def display_size(self):
        """显示分辨率"""
        return self._display_size

    @property
    def last_faces(self):
        """上次检测到的人脸列表"""
        return self._last_faces


class _FaceDetectionApp(AIBase):
    """内部人脸检测应用类，继承自AIBase"""

    def __init__(self, kmodel_path, model_input_size, anchors,
                 confidence_threshold=0.5, nms_threshold=0.2,
                 rgb888p_size=[224, 224], display_size=[1920, 1080],
                 debug_mode=0):
        super().__init__(kmodel_path, model_input_size, rgb888p_size, debug_mode)
        self.kmodel_path = kmodel_path
        self.model_input_size = model_input_size
        self.confidence_threshold = confidence_threshold
        self.nms_threshold = nms_threshold
        self.anchors = anchors
        # 对宽度进行16字节对齐
        self.rgb888p_size = [self._align_up(rgb888p_size[0], 16), rgb888p_size[1]]
        self.display_size = [self._align_up(display_size[0], 16), display_size[1]]
        self.debug_mode = debug_mode
        self.ai2d = Ai2d(debug_mode)
        self.ai2d.set_ai2d_dtype(
            nn.ai2d_format.NCHW_FMT,
            nn.ai2d_format.NCHW_FMT,
            np.uint8,
            np.uint8
        )

    def _align_up(self, value, alignment):
        """向上对齐"""
        return ((value + alignment - 1) // alignment) * alignment

    def config_preprocess(self, input_image_size=None):
        """配置预处理"""
        with ScopedTiming("set preprocess config", self.debug_mode > 0):
            ai2d_input_size = input_image_size if input_image_size else self.rgb888p_size
            top, bottom, left, right = self.get_padding_param()
            self.ai2d.pad([0, 0, 0, 0, top, bottom, left, right], 0, [104, 117, 123])
            self.ai2d.resize(nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel)
            self.ai2d.build(
                [1, 3, ai2d_input_size[1], ai2d_input_size[0]],
                [1, 3, self.model_input_size[1], self.model_input_size[0]]
            )

    def postprocess(self, results):
        """后处理"""
        with ScopedTiming("postprocess", self.debug_mode > 0):
            post_ret = aidemo.face_det_post_process(
                self.confidence_threshold,
                self.nms_threshold,
                self.model_input_size[1],
                self.anchors,
                self.rgb888p_size,
                results
            )
            if len(post_ret) == 0:
                return post_ret
            else:
                return post_ret[0]

    def get_padding_param(self):
        """计算填充参数"""
        dst_w = self.model_input_size[0]
        dst_h = self.model_input_size[1]
        ratio_w = dst_w / self.rgb888p_size[0]
        ratio_h = dst_h / self.rgb888p_size[1]
        ratio = min(ratio_w, ratio_h)
        new_w = int(ratio * self.rgb888p_size[0])
        new_h = int(ratio * self.rgb888p_size[1])
        dw = (dst_w - new_w) / 2
        dh = (dst_h - new_h) / 2
        top = int(round(0))
        bottom = int(round(dh * 2 + 0.1))
        left = int(round(0))
        right = int(round(dw * 2 - 0.1))
        return top, bottom, left, right