k230/k230/face_detect_module.py

281 lines
9.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# face_detect_module.py
# 人脸检测模块
# 适用于庐山派 K230-CanMV 开发板
from libs.PipeLine import PipeLine, ScopedTiming
from libs.AIBase import AIBase
from libs.AI2D import Ai2d
import os
import nncase_runtime as nn
import ulab.numpy as np
import aidemo
import gc
class FaceDetector:
"""
人脸检测类 - 使用K230的AI能力检测人脸
功能:
- 检测图像中的人脸
- 返回人脸边界框坐标 (x, y, w, h, score)
- 支持在图像上绘制检测框
参数:
display_mode: 显示模式 "lcd""hdmi"
confidence_threshold: 置信度阈值 (默认0.5)
nms_threshold: 非极大值抑制阈值 (默认0.2)
debug_mode: 调试模式开关 (默认0关闭)
使用示例:
detector = FaceDetector(display_mode="lcd")
detector.start()
while True:
img, faces = detector.detect()
# faces = [(x, y, w, h, score), ...]
detector.show()
detector.stop()
"""
# 模型配置
KMODEL_PATH = "/sdcard/examples/kmodel/face_detection_320.kmodel"
ANCHORS_PATH = "/sdcard/examples/utils/prior_data_320.bin"
MODEL_INPUT_SIZE = [320, 320]
ANCHOR_LEN = 4200
DET_DIM = 4
# 显示配置
DISPLAY_CONFIGS = {
"lcd": {
"display_size": [800, 480],
"rgb888p_size": [1920, 1080],
},
"hdmi": {
"display_size": [1920, 1080],
"rgb888p_size": [1920, 1080],
}
}
def __init__(self, display_mode="lcd", confidence_threshold=0.5,
nms_threshold=0.2, debug_mode=0):
"""
初始化人脸检测器
参数:
display_mode: 显示模式 "lcd""hdmi"
confidence_threshold: 置信度阈值,值越高过滤越严格
nms_threshold: NMS阈值防止重复检测
debug_mode: 调试模式1开启计时输出
"""
if display_mode not in self.DISPLAY_CONFIGS:
raise ValueError("display_mode必须是 'lcd''hdmi'")
self._display_mode = display_mode
self._confidence_threshold = confidence_threshold
self._nms_threshold = nms_threshold
self._debug_mode = debug_mode
# 获取显示配置
config = self.DISPLAY_CONFIGS[display_mode]
self._display_size = config["display_size"]
self._rgb888p_size = config["rgb888p_size"]
# 加载锚点数据
self._anchors = np.fromfile(self.ANCHORS_PATH, dtype=np.float)
self._anchors = self._anchors.reshape((self.ANCHOR_LEN, self.DET_DIM))
self._pipeline = None
self._face_det = None
self._is_running = False
self._last_faces = []
def start(self):
"""启动人脸检测器,初始化摄像头和模型"""
if self._is_running:
return
# 初始化Pipeline
self._pipeline = PipeLine(
rgb888p_size=self._rgb888p_size,
display_size=self._display_size,
display_mode=self._display_mode
)
self._pipeline.create()
# 初始化人脸检测模型
self._face_det = _FaceDetectionApp(
kmodel_path=self.KMODEL_PATH,
model_input_size=self.MODEL_INPUT_SIZE,
anchors=self._anchors,
confidence_threshold=self._confidence_threshold,
nms_threshold=self._nms_threshold,
rgb888p_size=self._rgb888p_size,
display_size=self._display_size,
debug_mode=self._debug_mode
)
self._face_det.config_preprocess()
self._is_running = True
def detect(self):
"""
检测当前帧中的人脸
返回:
tuple: (img, faces)
- img: 当前帧图像
- faces: 人脸列表,每个元素为 (x, y, w, h, score)
坐标已转换为显示分辨率下的实际坐标
"""
if not self._is_running:
raise RuntimeError("检测器未启动,请先调用 start()")
os.exitpoint()
# 获取当前帧
img = self._pipeline.get_frame()
# 推理
res = self._face_det.run(img)
# 转换坐标为显示分辨率
faces = []
if res:
for det in res:
x, y, w, h = map(lambda v: int(round(v, 0)), det[:4])
score = det[4] if len(det) > 4 else 1.0
# 转换为显示坐标
x = x * self._display_size[0] // self._rgb888p_size[0]
y = y * self._display_size[1] // self._rgb888p_size[1]
w = w * self._display_size[0] // self._rgb888p_size[0]
h = h * self._display_size[1] // self._rgb888p_size[1]
faces.append((x, y, w, h, score))
self._last_faces = faces
return img, faces
def draw_boxes(self, color=(255, 255, 0, 255), thickness=2):
"""
在OSD层绘制人脸检测框
参数:
color: 框颜色 (R, G, B, A)
thickness: 线条粗细
"""
if not self._is_running:
return
self._pipeline.osd_img.clear()
for (x, y, w, h, score) in self._last_faces:
self._pipeline.osd_img.draw_rectangle(
x, y, w, h,
color=color,
thickness=thickness
)
def show(self):
"""显示当前帧需要先调用detect和draw_boxes"""
if self._is_running:
self._pipeline.show_image()
gc.collect()
def stop(self):
"""停止人脸检测器,释放资源"""
if self._face_det:
self._face_det.deinit()
self._face_det = None
if self._pipeline:
self._pipeline.destroy()
self._pipeline = None
self._is_running = False
@property
def is_running(self):
"""是否正在运行"""
return self._is_running
@property
def display_size(self):
"""显示分辨率"""
return self._display_size
@property
def last_faces(self):
"""上次检测到的人脸列表"""
return self._last_faces
class _FaceDetectionApp(AIBase):
"""内部人脸检测应用类继承自AIBase"""
def __init__(self, kmodel_path, model_input_size, anchors,
confidence_threshold=0.5, nms_threshold=0.2,
rgb888p_size=[224, 224], display_size=[1920, 1080],
debug_mode=0):
super().__init__(kmodel_path, model_input_size, rgb888p_size, debug_mode)
self.kmodel_path = kmodel_path
self.model_input_size = model_input_size
self.confidence_threshold = confidence_threshold
self.nms_threshold = nms_threshold
self.anchors = anchors
# 对宽度进行16字节对齐
self.rgb888p_size = [self._align_up(rgb888p_size[0], 16), rgb888p_size[1]]
self.display_size = [self._align_up(display_size[0], 16), display_size[1]]
self.debug_mode = debug_mode
self.ai2d = Ai2d(debug_mode)
self.ai2d.set_ai2d_dtype(
nn.ai2d_format.NCHW_FMT,
nn.ai2d_format.NCHW_FMT,
np.uint8,
np.uint8
)
def _align_up(self, value, alignment):
"""向上对齐"""
return ((value + alignment - 1) // alignment) * alignment
def config_preprocess(self, input_image_size=None):
"""配置预处理"""
with ScopedTiming("set preprocess config", self.debug_mode > 0):
ai2d_input_size = input_image_size if input_image_size else self.rgb888p_size
top, bottom, left, right = self.get_padding_param()
self.ai2d.pad([0, 0, 0, 0, top, bottom, left, right], 0, [104, 117, 123])
self.ai2d.resize(nn.interp_method.tf_bilinear, nn.interp_mode.half_pixel)
self.ai2d.build(
[1, 3, ai2d_input_size[1], ai2d_input_size[0]],
[1, 3, self.model_input_size[1], self.model_input_size[0]]
)
def postprocess(self, results):
"""后处理"""
with ScopedTiming("postprocess", self.debug_mode > 0):
post_ret = aidemo.face_det_post_process(
self.confidence_threshold,
self.nms_threshold,
self.model_input_size[1],
self.anchors,
self.rgb888p_size,
results
)
if len(post_ret) == 0:
return post_ret
else:
return post_ret[0]
def get_padding_param(self):
"""计算填充参数"""
dst_w = self.model_input_size[0]
dst_h = self.model_input_size[1]
ratio_w = dst_w / self.rgb888p_size[0]
ratio_h = dst_h / self.rgb888p_size[1]
ratio = min(ratio_w, ratio_h)
new_w = int(ratio * self.rgb888p_size[0])
new_h = int(ratio * self.rgb888p_size[1])
dw = (dst_w - new_w) / 2
dh = (dst_h - new_h) / 2
top = int(round(0))
bottom = int(round(dh * 2 + 0.1))
left = int(round(0))
right = int(round(dw * 2 - 0.1))
return top, bottom, left, right