|
#include <cmath> |
|
#include "mediapipe/framework/calculator_framework.h" |
|
#include "mediapipe/framework/formats/landmark.pb.h" |
|
#include "mediapipe/framework/formats/rect.pb.h" |
|
|
|
namespace mediapipe |
|
{ |
|
|
|
namespace |
|
{ |
|
constexpr char normRectTag[] = "NORM_RECT"; |
|
constexpr char normalizedLandmarkListTag[] = "NORM_LANDMARKS"; |
|
} // namespace |
|
|
|
// Graph config: |
|
// |
|
// node { |
|
// calculator: "HandGestureRecognitionCalculator" |
|
// input_stream: "NORM_LANDMARKS:scaled_landmarks" |
|
// input_stream: "NORM_RECT:hand_rect_for_next_frame" |
|
// } |
|
class HandGestureRecognitionCalculator : public CalculatorBase |
|
{ |
|
public: |
|
static ::mediapipe::Status GetContract(CalculatorContract *cc); |
|
::mediapipe::Status Open(CalculatorContext *cc) override; |
|
|
|
::mediapipe::Status Process(CalculatorContext *cc) override; |
|
|
|
private: |
|
float get_Euclidean_DistanceAB(float a_x, float a_y, float b_x, float b_y) |
|
{ |
|
float dist = std::pow(a_x - b_x, 2) + pow(a_y - b_y, 2); |
|
return std::sqrt(dist); |
|
} |
|
|
|
bool isThumbNearFirstFinger(NormalizedLandmark point1, NormalizedLandmark point2) |
|
{ |
|
float distance = this->get_Euclidean_DistanceAB(point1.x(), point1.y(), point2.x(), point2.y()); |
|
return distance < 0.1; |
|
} |
|
}; |
|
|
|
REGISTER_CALCULATOR(HandGestureRecognitionCalculator); |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::GetContract( |
|
CalculatorContract *cc) |
|
{ |
|
RET_CHECK(cc->Inputs().HasTag(normalizedLandmarkListTag)); |
|
cc->Inputs().Tag(normalizedLandmarkListTag).Set<mediapipe::NormalizedLandmarkList>(); |
|
|
|
RET_CHECK(cc->Inputs().HasTag(normRectTag)); |
|
cc->Inputs().Tag(normRectTag).Set<NormalizedRect>(); |
|
|
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::Open( |
|
CalculatorContext *cc) |
|
{ |
|
cc->SetOffset(TimestampDiff(0)); |
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
::mediapipe::Status HandGestureRecognitionCalculator::Process( |
|
CalculatorContext *cc) |
|
{ |
|
// hand closed (red) rectangle |
|
const auto rect = &(cc->Inputs().Tag(normRectTag).Get<NormalizedRect>()); |
|
float width = rect->width(); |
|
float height = rect->height(); |
|
|
|
if (width < 0.01 || height < 0.01) |
|
{ |
|
LOG(INFO) << "No Hand Detected"; |
|
return ::mediapipe::OkStatus(); |
|
} |
|
|
|
const auto &landmarkList = cc->Inputs() |
|
.Tag(normalizedLandmarkListTag) |
|
.Get<mediapipe::NormalizedLandmarkList>(); |
|
RET_CHECK_GT(landmarkList.landmark_size(), 0) << "Input landmark vector is empty."; |
|
|
|
// finger states |
|
bool thumbIsOpen = false; |
|
bool firstFingerIsOpen = false; |
|
bool secondFingerIsOpen = false; |
|
bool thirdFingerIsOpen = false; |
|
bool fourthFingerIsOpen = false; |
|
// |
|
|
|
float pseudoFixKeyPoint = landmarkList.landmark(2).x(); |
|
if (landmarkList.landmark(3).x() < pseudoFixKeyPoint && landmarkList.landmark(4).x() < pseudoFixKeyPoint) |
|
{ |
|
thumbIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(6).y(); |
|
if (landmarkList.landmark(7).y() < pseudoFixKeyPoint && landmarkList.landmark(8).y() < pseudoFixKeyPoint) |
|
{ |
|
firstFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(10).y(); |
|
if (landmarkList.landmark(11).y() < pseudoFixKeyPoint && landmarkList.landmark(12).y() < pseudoFixKeyPoint) |
|
{ |
|
secondFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(14).y(); |
|
if (landmarkList.landmark(15).y() < pseudoFixKeyPoint && landmarkList.landmark(16).y() < pseudoFixKeyPoint) |
|
{ |
|
thirdFingerIsOpen = true; |
|
} |
|
|
|
pseudoFixKeyPoint = landmarkList.landmark(18).y(); |
|
if (landmarkList.landmark(19).y() < pseudoFixKeyPoint && landmarkList.landmark(20).y() < pseudoFixKeyPoint) |
|
{ |
|
fourthFingerIsOpen = true; |
|
} |
|
|
|
// Hand gesture recognition |
|
if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FIVE!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FOUR!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "TREE!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "TWO!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "ONE!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "YEAH!"; |
|
} |
|
else if (!thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "ROCK!"; |
|
} |
|
else if (thumbIsOpen && firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "SPIDERMAN!"; |
|
} |
|
else if (!thumbIsOpen && !firstFingerIsOpen && !secondFingerIsOpen && !thirdFingerIsOpen && !fourthFingerIsOpen) |
|
{ |
|
LOG(INFO) << "FIST!"; |
|
} |
|
else if (!firstFingerIsOpen && secondFingerIsOpen && thirdFingerIsOpen && fourthFingerIsOpen && this->isThumbNearFirstFinger(landmarkList.landmark(4), landmarkList.landmark(8))) |
|
{ |
|
LOG(INFO) << "OK!"; |
|
} |
|
else |
|
{ |
|
LOG(INFO) << "Finger States: " << thumbIsOpen << firstFingerIsOpen << secondFingerIsOpen << thirdFingerIsOpen << fourthFingerIsOpen; |
|
LOG(INFO) << "___"; |
|
} |
|
|
|
return ::mediapipe::OkStatus(); |
|
} // namespace mediapipe |
|
|
|
} // namespace mediapipe |
Hey, I created a desktop application using some of your logic do check out.
https://github.com/atharvakale31/Custom_Gesture_Control
eg: Control power point slide
