[OpenCV-dlib]人脸识别功能拓展-通过随机要求头部动作实现活体检测-CSDN博客

阿里云国内75折回扣微信号：monov8

阿里云国际，腾讯云国际，低至75折。AWS 93折免费开户实名账号代冲值优惠多多微信号：monov8 飞机：@monov6

引言

在现代计算机视觉中面部检测和姿势识别是一个重要的领域它在各种应用中发挥着关键作用包括人脸解锁、表情识别、虚拟现实等。本文将深入探讨一个使用Python编写的应用程序该应用程序结合了多个库和技术用于面部检测和姿势识别。

文章目录

结尾与未来展望
下一步计划

面部检测

面部检测是任何面部识别任务的基础。在本应用程序中我们使用了两个主要库来进行面部检测dlib、OpenCV。

dlib库

dlib库是一个功能强大的面部检测和特征标定工具。它提供了用于检测人脸及面部特征的算法。在本应用程序中dlib用于检测人脸的位置和特征点。
dlib库的跨平台安装
全面横扫dlib Python API在Linux和Windows的配置方案

 【香橙派-OpenCV-Torch-dlib】TF损坏变成RAW格式解决方案及python环境配置

import dlib

OpenCV库

OpenCV是一个广泛用于图像处理和计算机视觉任务的库。在本应用程序中OpenCV用于图像处理、显示和保存。

import cv2
import numpy as np

Retinaface-FaceNet实现人脸识别

代码基于人工智能领域大佬Bubbliiiing聪明的人脸识别4——Pytorch 利用Retinaface+Facenet搭建人脸识别平台微调

Retinaface+FaceNet人脸识别系统-Gradio界面设计
github:
Face-recognition-web-ui

retinaface_new.py

import time

import cv2
import numpy as np
import torch
import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont
from tqdm import tqdm

from nets.facenet import Facenet
from nets_retinaface.retinaface import RetinaFace
from utils.anchors import Anchors
from utils.config import cfg_mnet, cfg_re50
from utils.utils import (Alignment_1, compare_faces, letterbox_image,
                         preprocess_input)
from utils.utils_bbox import (decode, decode_landm, non_max_suppression,
                              retinaface_correct_boxes)


# --------------------------------------#
#   写中文需要转成PIL来写。
# --------------------------------------#

def cv2ImgAddText(img, label, left, top, textColor=(255, 255, 255)):
    img = Image.fromarray(np.uint8(img))
    # ---------------#
    #   设置字体
    # ---------------#
    font = ImageFont.truetype(font='model_data/simhei.ttf', size=20)

    draw = ImageDraw.Draw(img)
    label = label.encode('utf-8')
    draw.text((left, top), str(label, 'UTF-8'), fill=textColor, font=font)
    return np.asarray(img)


# --------------------------------------#
#   一定注意backbone和model_path的对应。
#   在更换facenet_model后
#   一定要注意重新编码人脸。
# --------------------------------------#
class Retinaface(object):
    _defaults = {
        # ----------------------------------------------------------------------#
        #   retinaface训练完的权值路径
        # ----------------------------------------------------------------------#
        "retinaface_model_path": 'model_data/Retinaface_mobilenet0.25.pth',
        # ----------------------------------------------------------------------#
        #   retinaface所使用的主干网络有mobilenet和resnet50
        # ----------------------------------------------------------------------#
        "retinaface_backbone": "mobilenet",
        # ----------------------------------------------------------------------#
        #   retinaface中只有得分大于置信度的预测框会被保留下来
        # ----------------------------------------------------------------------#
        "confidence": 0.5,
        # ----------------------------------------------------------------------#
        #   retinaface中非极大抑制所用到的nms_iou大小
        # ----------------------------------------------------------------------#
        "nms_iou": 0.3,
        # ----------------------------------------------------------------------#
        #   是否需要进行图像大小限制。
        #   输入图像大小会大幅度地影响FPS想加快检测速度可以减少input_shape。
        #   开启后会将输入图像的大小限制为input_shape。否则使用原图进行预测。
        #   会导致检测结果偏差主干为resnet50不存在此问题。
        #   可根据输入图像的大小自行调整input_shape注意为32的倍数如[640, 640, 3]
        # ----------------------------------------------------------------------#
        "retinaface_input_shape": [640, 640, 3],
        # ----------------------------------------------------------------------#
        #   是否需要进行图像大小限制。
        # ----------------------------------------------------------------------#
        "letterbox_image": True,

        # ----------------------------------------------------------------------#
        #   facenet训练完的权值路径
        # ----------------------------------------------------------------------#
        "facenet_model_path": 'model_data/facenet_mobilenet.pth',
        # ----------------------------------------------------------------------#
        #   facenet所使用的主干网络 mobilenet和inception_resnetv1
        # ----------------------------------------------------------------------#
        "facenet_backbone": "mobilenet",
        # ----------------------------------------------------------------------#
        #   facenet所使用到的输入图片大小
        # ----------------------------------------------------------------------#
        "facenet_input_shape": [160, 160, 3],
        # ----------------------------------------------------------------------#
        #   facenet所使用的人脸距离门限
        # ----------------------------------------------------------------------#
        "facenet_threhold": 0.9,

        # --------------------------------#
        #   是否使用Cuda
        #   没有GPU可以设置成False
        # --------------------------------#
        # "cuda": False
        "cuda": True
    }

    @classmethod
    def get_defaults(cls, n):
        if n in cls._defaults:
            return cls._defaults[n]
        else:
            return "Unrecognized attribute name '" + n + "'"

    # ---------------------------------------------------#
    #   初始化Retinaface
    # ---------------------------------------------------#
    def __init__(self, encoding=0, **kwargs):
        self.__dict__.update(self._defaults)
        for name, value in kwargs.items():
            setattr(self, name, value)

        # ---------------------------------------------------#
        #   不同主干网络的config信息
        # ---------------------------------------------------#
        if self.retinaface_backbone == "mobilenet":
            self.cfg = cfg_mnet
        else:
            self.cfg = cfg_re50

        # ---------------------------------------------------#
        #   先验框的生成
        # ---------------------------------------------------#
        self.anchors = Anchors(self.cfg, image_size=(
            self.retinaface_input_shape[0], self.retinaface_input_shape[1])).get_anchors()
        self.generate()

        try:
            self.known_face_encodings = np.load(
                "model_data/{backbone}_face_encoding.npy".format(backbone=self.facenet_backbone))
            self.known_face_names = np.load("model_data/{backbone}_names.npy".format(backbone=self.facenet_backbone))
        except:
            if not encoding:
                print("载入已有人脸特征失败请检查model_data下面是否生成了相关的人脸特征文件。")
            pass

    # ---------------------------------------------------#
    #   获得所有的分类
    # ---------------------------------------------------#
    def generate(self):
        # -------------------------------#
        #   载入模型与权值
        # -------------------------------#
        self.net = RetinaFace(cfg=self.cfg, phase='eval', pre_train=False).eval()
        self.facenet = Facenet(backbone=self.facenet_backbone, mode="predict").eval()
        # torch.cuda.empty_cache()

        print('Loading weights into state dict...')
        # state_dict = torch.load(self.retinaface_model_path, map_location=torch.device('cpu'))
        state_dict = torch.load(self.retinaface_model_path)
        self.net.load_state_dict(state_dict)

        # state_dict = torch.load(self.facenet_model_path, map_location=torch.device('cpu'))
        state_dict = torch.load(self.facenet_model_path)
        self.facenet.load_state_dict(state_dict, strict=False)

        if self.cuda:
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

            self.facenet = nn.DataParallel(self.facenet)
            self.facenet = self.facenet.cuda()
        print('Finished!')

    def encode_face_dataset(self, image_paths, names):
        face_encodings = []
        for index, path in enumerate(tqdm(image_paths)):
            # print('index,path',index,path)
            # ---------------------------------------------------#
            #   打开人脸图片
            # ---------------------------------------------------#
            image = np.array(Image.open(path), np.float32)
            # ---------------------------------------------------#
            #   对输入图像进行一个备份
            # ---------------------------------------------------#
            old_image = image.copy()
            # ---------------------------------------------------#
            #   计算输入图片的高和宽
            # ---------------------------------------------------#
            im_height, im_width, _ = np.shape(image)
            # ---------------------------------------------------#
            #   计算scale用于将获得的预测框转换成原图的高宽
            # ---------------------------------------------------#
            scale = [
                np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]
            ]
            scale_for_landmarks = [
                np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
                np.shape(image)[1], np.shape(image)[0]
            ]
            if self.letterbox_image:
                image = letterbox_image(image, [self.retinaface_input_shape[1], self.retinaface_input_shape[0]])
                anchors = self.anchors
            else:
                anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()

            # ---------------------------------------------------#
            #   将处理完的图片传入Retinaface网络当中进行预测
            # ---------------------------------------------------#
            with torch.no_grad():
                # print(names[index], "here")
                # -----------------------------------------------------------#
                #   图片预处理归一化。
                # -----------------------------------------------------------#
                image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(
                    torch.FloatTensor)

                if self.cuda:
                    image = image.cuda()
                    anchors = anchors.cuda()

                loc, conf, landms = self.net(image)
                # -----------------------------------------------------------#
                #   对预测框进行解码
                # -----------------------------------------------------------#
                boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance'])
                # -----------------------------------------------------------#
                #   获得预测结果的置信度
                # -----------------------------------------------------------#
                conf = conf.data.squeeze(0)[:, 1:2]
                # -----------------------------------------------------------#
                #   对人脸关键点进行解码
                # -----------------------------------------------------------#
                landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance'])

                # -----------------------------------------------------------#
                #   对人脸检测结果进行堆叠
                # -----------------------------------------------------------#
                boxes_conf_landms = torch.cat([boxes, conf, landms], -1)
                boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)

                if len(boxes_conf_landms) <= 0:
                    print(names[index], "未检测到人脸")
                    continue
                # ---------------------------------------------------------#
                #   如果使用了letterbox_image的话要把灰条的部分去除掉。
                # ---------------------------------------------------------#
                if self.letterbox_image:
                    boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \
                                                                 np.array([self.retinaface_input_shape[0],
                                                                           self.retinaface_input_shape[1]]),
                                                                 np.array([im_height, im_width]))

            boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale
            boxes_conf_landms[:, 5:] = boxes_conf_landms[:, 5:] * scale_for_landmarks

            # ---------------------------------------------------#
            #   选取最大的人脸框。
            # ---------------------------------------------------#
            best_face_location = None
            biggest_area = 0
            for result in boxes_conf_landms:
                left, top, right, bottom = result[0:4]

                w = right - left
                h = bottom - top
                if w * h > biggest_area:
                    biggest_area = w * h
                    best_face_location = result

            # ---------------------------------------------------#
            #   截取图像
            # ---------------------------------------------------#
            crop_img = old_image[int(best_face_location[1]):int(best_face_location[3]),
                       int(best_face_location[0]):int(best_face_location[2])]
            landmark = np.reshape(best_face_location[5:], (5, 2)) - np.array(
                [int(best_face_location[0]), int(best_face_location[1])])
            crop_img, _ = Alignment_1(crop_img, landmark)

            crop_img = np.array(
                letterbox_image(np.uint8(crop_img), (self.facenet_input_shape[1], self.facenet_input_shape[0]))) / 255
            crop_img = crop_img.transpose(2, 0, 1)
            crop_img = np.expand_dims(crop_img, 0)
            # ---------------------------------------------------#
            #   利用图像算取长度为128的特征向量
            # ---------------------------------------------------#
            with torch.no_grad():
                crop_img = torch.from_numpy(crop_img).type(torch.FloatTensor)
                if self.cuda:
                    crop_img = crop_img.cuda()

                face_encoding = self.facenet(crop_img)[0].cpu().numpy()
                face_encodings.append(face_encoding)

        np.save("model_data/{backbone}_face_encoding.npy".format(backbone=self.facenet_backbone), face_encodings)
        np.save("model_data/{backbone}_names.npy".format(backbone=self.facenet_backbone), names)

    # ---------------------------------------------------#
    #   检测图片
    # ---------------------------------------------------#

    def live_detect_image(self, image, flag):
        # ---------------------------------------------------#
        #   对输入图像进行一个备份后面用于绘图
        # ---------------------------------------------------#
        old_image = image.copy()
        # ---------------------------------------------------#
        #   把图像转换成numpy的形式
        # ---------------------------------------------------#
        image = np.array(image, np.float32)

        # ---------------------------------------------------#
        #   Retinaface检测部分-开始
        # ---------------------------------------------------#
        # ---------------------------------------------------#
        #   计算输入图片的高和宽
        # ---------------------------------------------------#
        im_height, im_width, _ = np.shape(image)
        # ---------------------------------------------------#
        #   计算scale用于将获得的预测框转换成原图的高宽
        # ---------------------------------------------------#
        scale = [
            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]
        ]
        scale_for_landmarks = [
            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
            np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0],
            np.shape(image)[1], np.shape(image)[0]
        ]

        # ---------------------------------------------------------#
        #   letterbox_image可以给图像增加灰条实现不失真的resize
        # ---------------------------------------------------------#
        if self.letterbox_image:
            image = letterbox_image(image, [self.retinaface_input_shape[1], self.retinaface_input_shape[0]])
            anchors = self.anchors
        else:
            anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors()

        # ---------------------------------------------------#
        #   将处理完的图片传入Retinaface网络当中进行预测
        # ---------------------------------------------------#
        with torch.no_grad():
            # -----------------------------------------------------------#
            #   图片预处理归一化。
            # -----------------------------------------------------------#
            image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0).type(torch.FloatTensor)

            if self.cuda:
                anchors = anchors.cuda()
                image = image.cuda()

            # ---------------------------------------------------------#
            #   传入网络进行预测
            # ---------------------------------------------------------#
            loc, conf, landms = self.net(image)
            # ---------------------------------------------------#
            #   Retinaface网络的解码最终我们会获得预测框
            #   将预测结果进行解码和非极大抑制
            # ---------------------------------------------------#
            boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance'])

            conf = conf.data.squeeze(0)[:, 1:2]

            landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance'])

            # -----------------------------------------------------------#
            #   对人脸检测结果进行堆叠
            # -----------------------------------------------------------#
            boxes_conf_landms = torch.cat([boxes, conf, landms], -1)
            boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence)

            # ---------------------------------------------------#
            #   如果没有预测框则返回原图
            # ---------------------------------------------------#
            if len(boxes_conf_landms) <= 0:
                return old_image, 'False'

            # ---------------------------------------------------------#
            #   如果使用了letterbox_image的话要把灰条的部分去除掉。
            # ---------------------------------------------------------#
            if self.letterbox_image:
                boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \
                                                             np.array([self.retinaface_input_shape[0],
                                                                       self.retinaface_input_shape[1]]),
                                                             np.array([im_height, im_width]))

            boxes_conf_landms[:, :4] = boxes_conf_landms[:, :4] * scale
            boxes_conf_landms[:, 5:] = boxes_conf_landms[:, 5:] * scale_for_landmarks

        # ---------------------------------------------------#
        #   Retinaface检测部分-结束
        # ---------------------------------------------------#

        # -----------------------------------------------#
        #   Facenet编码部分-开始
        # -----------------------------------------------#
        face_encodings = []
        for boxes_conf_landm in boxes_conf_landms:
            # ----------------------#
            #   图像截取人脸矫正
            # ----------------------#
            boxes_conf_landm = np.maximum(boxes_conf_landm, 0)
            crop_img = np.array(old_image)[int(boxes_conf_landm[1]):int(boxes_conf_landm[3]),
                       int(boxes_conf_landm[0]):int(boxes_conf_landm[2])]
            landmark = np.reshape(boxes_conf_landm[5:], (5, 2)) - np.array(
                [int(boxes_conf_landm[0]), int(boxes_conf_landm[1])])
            crop_img, _ = Alignment_1(crop_img, landmark)

            # ----------------------#
            #   人脸编码
            # ----------------------#
            crop_img = np.array(
                letterbox_image(np.uint8(crop_img), (self.facenet_input_shape[1], self.facenet_input_shape[0]))) / 255
            crop_img = np.expand_dims(crop_img.transpose(2, 0, 1), 0)
            with torch.no_grad():
                crop_img = torch.from_numpy(crop_img).type(torch.FloatTensor)
                if self.cuda:
                    crop_img = crop_img.cuda()

                # -----------------------------------------------#
                #   利用facenet_model计算长度为128特征向量
                # -----------------------------------------------#
                face_encoding = self.facenet(crop_img)[0].cpu().numpy()
                face_encodings.append(face_encoding)
        # -----------------------------------------------#
        #   Facenet编码部分-结束
        # -----------------------------------------------#

        # -----------------------------------------------#
        #   人脸特征比对-开始
        # -----------------------------------------------#
        face_names = []
        for face_encoding in face_encodings:
            # -----------------------------------------------------#
            #   取出一张脸并与数据库中所有的人脸进行对比计算得分
            # -----------------------------------------------------#
            matches, face_distances = compare_faces(self.known_face_encodings, face_encoding,
                                                    tolerance=self.facenet_threhold)
            name = "Unknown"
            # -----------------------------------------------------#
            #   取出这个最近人脸的评分
            #   取出当前输入进来的人脸最接近的已知人脸的序号
            # -----------------------------------------------------#
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = self.known_face_names[best_match_index]
            if flag == 0:
                name = "False"
            face_names.append(name)
        # -----------------------------------------------#
        #   人脸特征比对-结束
        # -----------------------------------------------#
        for i, b in enumerate(boxes_conf_landms):
            text = "{:.4f}".format(b[4])
            b = list(map(int, b))
            # ---------------------------------------------------#
            #   b[0]-b[3]为人脸框的坐标b[4]为得分
            # ---------------------------------------------------#
            cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2)
            cx = b[0]
            cy = b[1] + 12
            cv2.putText(old_image, text, (cx, cy),
                        cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))

            # ---------------------------------------------------#
            #   b[5]-b[14]为人脸关键点的坐标
            # ---------------------------------------------------#
            cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4)
            cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4)
            cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4)
            cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4)
            cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4)

            name = face_names[i]
            # font = cv2.FONT_HERSHEY_SIMPLEX
            # cv2.putText(old_image, name, (b[0] , b[3] - 15), font, 0.75, (255, 255, 255), 2)
            # --------------------------------------------------------------#
            #   cv2不能写中文加上这段可以但是检测速度会有一定的下降。
            #   如果不是必须可以换成cv2只显示英文。
            # --------------------------------------------------------------#
            old_image = cv2ImgAddText(old_image, name, b[0] + 5, b[3] - 25)
        # print('ff:', face_names[0])
        if face_names:
            return old_image, face_names[0]
        else:
            return old_image, 'False'```

眨眼检测

眨眼检测是本应用程序的一个重要功能。我们使用了眨眼检测算法来监测眨眼动作。在BlinkDetection类中眨眼的EAR眼睛纵横比阈值被设置为0.2。当EAR低于这个阈值时认为用户眨了眼睛。

class BlinkDetection:
    def __init__(self):
        self.ear = None
        self.status = None
        self.frame_counter = 0
        self.blink_counter = 0
        self.EAR_THRESHOLD = 0.2  # 眨眼的 EAR 阈值

    def eye_aspect_ratio(self, eye):
        A = np.linalg.norm(eye[1] - eye[5])
        B = np.linalg.norm(eye[2] - eye[4])
        C = np.linalg.norm(eye[0] - eye[3])
        ear = (A + B) / (2.0 * C)
        return ear

    def detect(self, landmarks):
        left_eye = landmarks[36:42]
        right_eye = landmarks[42:48]

        EAR_left = self.eye_aspect_ratio(left_eye)
        EAR_right = self.eye_aspect_ratio(right_eye)

        self.ear = (EAR_left + EAR_right) / 2.0

        if self.ear < 0.21:
            self.frame_counter += 1
            self.status = "Blinking"
        else:
            if self.frame_counter >= 2:  # 改为2次算检测结束
                self.blink_counter += 1
                self.frame_counter = 0
            self.status = "Open"

        return self.blink_counter, self.status, self.ear

嘴部动作检测

嘴部动作检测用于监测用户是否张嘴。在MouthDetection类中我们计算了嘴巴的MAR嘴巴纵横比并将其与阈值0.5进行比较。当MAR大于0.5时表示用户张嘴。

class MouthDetection:

    def __init__(self):
        self.mStart, self.mEnd = (48, 68)
        self.mouth_counter = 0
        self.MAR_THRESHOLD = 0.5
        self.mouth_open = False  # 嘴巴状态初始为闭上

    def mouth_aspect_ratio(self, mouth):
        A = np.linalg.norm(mouth[2] - mouth[9])
        B = np.linalg.norm(mouth[4] - mouth[7])
        C = np.linalg.norm(mouth[0] - mouth[6])
        mar = (A + B) / (2.0 * C)
        return mar

    def detect(self, landmarks):
        mouth = landmarks[self.mStart:self.mEnd]
        mar = self.mouth_aspect_ratio(mouth)

        if mar > self.MAR_THRESHOLD:
            if not self.mouth_open:  # 从闭上到张开
                self.mouth_counter += 1
                self.mouth_open = True
        else:
            if self.mouth_open:  # 从张开到闭上
                self.mouth_open = False

        return self.mouth_counter

头部姿势检测

头部姿势检测用于监测用户头部的旋转角度。在HeadPoseDetection类中我们计算了头部的旋转角度并根据阈值判断头部的方向左、右、中。

class HeadPoseDetection:
    def __init__(self):
        self.left_counter = 0
        self.right_counter = 0

        self.nod_threshold = 10
        self.low_threshold = -10
        self.head_status = "neutral"

    def calculate_head_pose(self, shape):
        x, y = zip(*shape)
        face_center = (int(np.mean(x)), int(np.mean(y)))
        left_eye_center = np.mean(shape[36:42], axis=0)
        right_eye_center = np.mean(shape[42:48], axis=0)
        dX = right_eye_center[0] - left_eye_center[0]
        dY = right_eye_center[1] - left_eye_center[1]
        angle = np.degrees(np.arctan2(dY, dX))
        return angle

    def detect(self, shape):
        angle = self.calculate_head_pose(shape)

        if angle > self.nod_threshold:
            self.head_status = "left"
            self.left_counter += 1
            return self.head_status, self.left_counter

        elif angle < self.low_threshold:
            self.head_status = "right"
            self.right_counter += 1
            return self.head_status, self.right_counter
        else:
            self.head_status = "neutral"

            return self.head_status, 0

完整代码

在FaceDetection类中我们将上述功能整合在一起并使用摄像头或视频文件来进行面部检测和姿势识别。用户可以使用不同的动作来触发应用程序进入 “flag” 状态例如眨眼、张嘴、或头部旋转。一旦触发应用程序将采用Retinaface来检测面部特征并在窗口中显示视频帧。
在这段代码中首先我们通过随机选择一个顺序包括眨眼、张嘴和头部姿势检测。每个动作检测都有其独立的计数器例如眨眼计数器、张嘴计数器和头部计数器。只有在满足特定条件时相关动作的计数器才会递增。一旦三个动作的计数器均达到阈值应用程序的标志被设置为1表示活体检测成功。接下来我们使用Retinaface库检测面部特征计算FPS并在图像中显示检测结果。最后当应用程序标志被设置为1时我们可以执行人脸识别或其他相关操作以确保在进行人脸识别之前已完成活体检测。这种随机动作顺序实现了更加严格的活体检测提高了安全性和准确性。

"""
NAME : try_7
USER : admin
DATE : 9/10/2023
PROJECT_NAME : new_live_face
CSDN : friklogff
"""
import random
import time
import cv2
import numpy as np
from retinaface_new import Retinaface
import dlib
from imutils import face_utils





class BlinkDetection:
    def __init__(self):
        self.ear = None
        self.status = None
        self.frame_counter = 0
        self.blink_counter = 0
        self.EAR_THRESHOLD = 0.2  # 眨眼的 EAR 阈值

    def eye_aspect_ratio(self, eye):
        A = np.linalg.norm(eye[1] - eye[5])
        B = np.linalg.norm(eye[2] - eye[4])
        C = np.linalg.norm(eye[0] - eye[3])
        ear = (A + B) / (2.0 * C)
        return ear

    def detect(self, landmarks):
        left_eye = landmarks[36:42]
        right_eye = landmarks[42:48]

        EAR_left = self.eye_aspect_ratio(left_eye)
        EAR_right = self.eye_aspect_ratio(right_eye)

        self.ear = (EAR_left + EAR_right) / 2.0

        if self.ear < 0.21:
            self.frame_counter += 1
            self.status = "Blinking"
        else:
            if self.frame_counter >= 2:  # 改为2次算检测结束
                self.blink_counter += 1
                self.frame_counter = 0
            self.status = "Open"

        return self.blink_counter, self.status, self.ear


class MouthDetection:

    def __init__(self):
        self.mStart, self.mEnd = (48, 68)
        self.mouth_counter = 0
        self.MAR_THRESHOLD = 0.5
        self.mouth_open = False  # 嘴巴状态初始为闭上

    def mouth_aspect_ratio(self, mouth):
        A = np.linalg.norm(mouth[2] - mouth[9])
        B = np.linalg.norm(mouth[4] - mouth[7])
        C = np.linalg.norm(mouth[0] - mouth[6])
        mar = (A + B) / (2.0 * C)
        return mar

    def detect(self, landmarks):
        mouth = landmarks[self.mStart:self.mEnd]
        mar = self.mouth_aspect_ratio(mouth)

        if mar > self.MAR_THRESHOLD:
            if not self.mouth_open:  # 从闭上到张开
                self.mouth_counter += 1
                self.mouth_open = True
        else:
            if self.mouth_open:  # 从张开到闭上
                self.mouth_open = False

        return self.mouth_counter


class HeadPoseDetection:
    def __init__(self):
        self.left_counter = 0
        self.right_counter = 0

        self.nod_threshold = 10
        self.low_threshold = -10
        self.head_status = "neutral"

    def calculate_head_pose(self, shape):
        x, y = zip(*shape)
        face_center = (int(np.mean(x)), int(np.mean(y)))
        left_eye_center = np.mean(shape[36:42], axis=0)
        right_eye_center = np.mean(shape[42:48], axis=0)
        dX = right_eye_center[0] - left_eye_center[0]
        dY = right_eye_center[1] - left_eye_center[1]
        angle = np.degrees(np.arctan2(dY, dX))
        return angle

    def detect(self, shape):
        angle = self.calculate_head_pose(shape)

        if angle > self.nod_threshold:
            self.head_status = "left"
            self.left_counter += 1
            return self.head_status, self.left_counter

        elif angle < self.low_threshold:
            self.head_status = "right"
            self.right_counter += 1
            return self.head_status, self.right_counter
        else:
            self.head_status = "neutral"

            return self.head_status, 0


class FaceDetection:
    def __init__(self, video_path, video_save_path="", video_fps=25.0, use_camera=False):

        self.name = None
        self.mouth_flag = False
        self.head_flag = False
        self.blink_flag = False
        self.random_flag = random.randint(1, 3)
        if use_camera:
            self.capture = cv2.VideoCapture(0)
        else:
            self.capture = cv2.VideoCapture(video_path)
        self.video_save_path = video_save_path
        if video_save_path != "":
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            size = (int(self.capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
            self.out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
        self.ref, frame = self.capture.read()

        if not self.ref:
            raise ValueError("未能正确读取摄像头视频请注意是否正确安装摄像头是否正确填写视频路径。")
        self.fps = 0.0
        self.flag = 0
        self.detector = dlib.get_frontal_face_detector()
        self.predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

        self.blink_detector = BlinkDetection()
        self.mouth_detector = MouthDetection()
        self.head_pose_detector = HeadPoseDetection()

        self.nod_threshold = 10
        self.low_threshold = -10
        self.head_status = "neutral"
        self.blink_counter = 0
        self.mouth_counter = 0
        self.head_counter = 0
        self.ear = None
        self.status = None
        self.retinaface = Retinaface()

    def detect_blink(self, frame, landmarks):
        self.blink_counter, self.status, self.ear = self.blink_detector.detect(landmarks)
        cv2.putText(frame, "Blinks: {}".format(self.blink_counter), (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                    (0, 0, 255), 2)
        cv2.putText(frame, "EAR: {:.2f}".format(self.ear), (300, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
        cv2.putText(frame, "Eyes Status: {}".format(self.status), (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 0, 0),
                    2)
        return self.blink_counter

    def detect_mouth(self, frame, landmarks):
        self.mouth_counter = self.mouth_detector.detect(landmarks)
        cv2.putText(frame, "Mouth Count: {}".format(self.mouth_counter), (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                    (0, 0, 255), 2)
        return self.mouth_counter

    def detect_head_pose(self, frame, gray, face_rectangle):
        shape = self.predictor(gray, face_rectangle)
        shape = face_utils.shape_to_np(shape)
        self.head_status, self.head_counter = self.head_pose_detector.detect(shape)
        cv2.putText(frame, "Head Status: {}".format(self.head_status), (10, 120), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                    (0, 0, 255),
                    2)
        cv2.putText(frame, "Head Count: {}".format(self.head_counter), (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                    (0, 0, 255),
                    2)
        return self.head_counter

    def process_frame(self):
        t1 = time.time()
        self.ref, self.frame = self.capture.read()
        if not self.ref:
            return None
        gray = cv2.cvtColor(self.frame, cv2.COLOR_BGR2GRAY)
        faces = self.detector(gray, 0)
        if self.flag == 1:
            self.frame = cv2.cvtColor(self.frame, cv2.COLOR_BGR2RGB)
            old_image, self.name = self.retinaface.live_detect_image(self.frame, self.flag)
            self.frame = np.array(old_image)
            self.frame = cv2.cvtColor(self.frame, cv2.COLOR_RGB2BGR)
            self.fps = (self.fps + (1. / (time.time() - t1))) / 2
            # print("fps= %.2f" % (self.fps))
            self.frame = cv2.putText(self.frame, "fps= %.2f" % self.fps, (200, 60), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        elif len(faces) != 0:
            largest_index = self._largest_face(faces)
            face_rectangle = faces[largest_index]
            landmarks = np.matrix([[p.x, p.y] for p in self.predictor(self.frame, face_rectangle).parts()])
            if self.random_flag == 1:
                # 调用眨眼检测函数
                self.detect_blink(self.frame, landmarks)
                if self.blink_counter > 3:
                    self.blink_flag = True
                    self.random_flag = random.randint(1, 3)

            if self.random_flag == 2:

                # 调用嘴巴动作检测函数
                self.detect_mouth(self.frame, landmarks)
                if self.mouth_counter > 3:
                    self.mouth_flag = True
                    self.random_flag = random.randint(1, 3)
            if self.random_flag == 3:
                # 调用头部姿势检测函数
                self.detect_head_pose(self.frame, gray, face_rectangle)
                if self.head_counter == 0:
                    self.head_flag = True
                    self.random_flag = random.randint(1, 3)
            if self.blink_flag and self.mouth_flag and self.head_flag:
                self.flag = 1

        if self.video_save_path != "":
            self.out.write(self.frame)

        return self.ref, self.frame

    def _largest_face(self, dets):
        if len(dets) == 1:
            return 0
        face_areas = [(det.right() - det.left()) * (det.bottom() - det.top()) for det in dets]
        largest_area = face_areas[0]
        largest_index = 0
        for index in range(1, len(dets)):
            if face_areas[index] > largest_area:
                largest_index = index
                largest_area = face_areas[index]
        print("largest_face index is {} in {} faces".format(largest_index, len(dets)))
        return largest_index

    def release(self):
        print("Video Detection Done!")
        self.capture.release()
        if self.video_save_path != "":
            print("Save processed video to the path:" + self.video_save_path)
            self.out.release()

    def get_blink_counter(self):
        return self.blink_counter

    def get_mouth_counter(self):
        return self.mouth_counter

    def get_head_counter(self):
        return self.head_counter

    def get_flag(self):
        return self.flag

    def get_name(self):
        return self.name



if __name__ == "__main__":
    detector = FaceDetection('R.mp4')  # 使用摄像头也可以指定视频文件路径
    # detector = FaceDetection(0)  # 使用摄像头也可以指定视频文件路径

    while True:
        flag = detector.get_flag()

        ref, frame = detector.process_frame()
        if frame is None:
            break
        cv2.imshow("Frame", frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if flag == 1:
            print(flag)
            cv2.imwrite("last_frame.png", frame)
            # print(fname)
            break
    detector.release()
    cv2.destroyAllWindows()