Monado OpenXR Runtime
hg_sync.hpp
Go to the documentation of this file.
1// Copyright 2022, Collabora, Ltd.
2// SPDX-License-Identifier: BSL-1.0
3/*!
4 * @file
5 * @brief Mercury main header!
6 * @author Jakob Bornecrantz <jakob@collabora.com>
7 * @author Moses Turner <moses@collabora.com>
8 * @ingroup tracking
9 */
10
11#pragma once
12
13#include "hg_interface.h"
15
18
19#include "xrt/xrt_defines.h"
20#include "xrt/xrt_frame.h"
21#include "xrt/xrt_tracking.h"
22
23#include "math/m_api.h"
24#include "math/m_vec2.h"
25#include "math/m_vec3.h"
26#include "math/m_mathinclude.h"
28
30#include "util/u_logging.h"
31#include "util/u_sink.h"
33#include "util/u_worker.h"
34#include "util/u_trace_marker.h"
35#include "util/u_debug.h"
36#include "util/u_frame.h"
37#include "util/u_var.h"
38
39#include <assert.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <stdint.h>
44
45#include <opencv2/opencv.hpp>
46#include <onnxruntime_c_api.h>
47
48#include "kine_common.hpp"
50
51
52namespace xrt::tracking::hand::mercury {
53
54using namespace xrt::auxiliary::util;
55using namespace xrt::auxiliary::math;
56
57#define HG_TRACE(hgt, ...) U_LOG_IFL_T(hgt->log_level, __VA_ARGS__)
58#define HG_DEBUG(hgt, ...) U_LOG_IFL_D(hgt->log_level, __VA_ARGS__)
59#define HG_INFO(hgt, ...) U_LOG_IFL_I(hgt->log_level, __VA_ARGS__)
60#define HG_WARN(hgt, ...) U_LOG_IFL_W(hgt->log_level, __VA_ARGS__)
61#define HG_ERROR(hgt, ...) U_LOG_IFL_E(hgt->log_level, __VA_ARGS__)
62
63static constexpr uint16_t kDetectionInputSize = 160;
64static constexpr uint16_t kKeypointInputSize = 128;
65
66static constexpr uint16_t kKeypointOutputHeatmapSize = 22;
67static constexpr uint16_t kVisSpacerSize = 8;
68
69static const cv::Scalar RED(255, 30, 30);
70static const cv::Scalar YELLOW(255, 255, 0);
71static const cv::Scalar PINK(255, 0, 255);
72static const cv::Scalar GREEN(0, 255, 0);
73
74static const cv::Scalar colors[2] = {YELLOW, RED};
75
76constexpr enum xrt_hand_joint joints_5x5_to_26[5][5] = {
77 {
78 XRT_HAND_JOINT_WRIST,
79 XRT_HAND_JOINT_THUMB_METACARPAL,
80 XRT_HAND_JOINT_THUMB_PROXIMAL,
81 XRT_HAND_JOINT_THUMB_DISTAL,
82 XRT_HAND_JOINT_THUMB_TIP,
83 },
84 {
85 XRT_HAND_JOINT_INDEX_METACARPAL,
86 XRT_HAND_JOINT_INDEX_PROXIMAL,
87 XRT_HAND_JOINT_INDEX_INTERMEDIATE,
88 XRT_HAND_JOINT_INDEX_DISTAL,
89 XRT_HAND_JOINT_INDEX_TIP,
90 },
91 {
92 XRT_HAND_JOINT_MIDDLE_METACARPAL,
93 XRT_HAND_JOINT_MIDDLE_PROXIMAL,
94 XRT_HAND_JOINT_MIDDLE_INTERMEDIATE,
95 XRT_HAND_JOINT_MIDDLE_DISTAL,
96 XRT_HAND_JOINT_MIDDLE_TIP,
97 },
98 {
99 XRT_HAND_JOINT_RING_METACARPAL,
100 XRT_HAND_JOINT_RING_PROXIMAL,
101 XRT_HAND_JOINT_RING_INTERMEDIATE,
102 XRT_HAND_JOINT_RING_DISTAL,
103 XRT_HAND_JOINT_RING_TIP,
104 },
105 {
106 XRT_HAND_JOINT_LITTLE_METACARPAL,
107 XRT_HAND_JOINT_LITTLE_PROXIMAL,
108 XRT_HAND_JOINT_LITTLE_INTERMEDIATE,
109 XRT_HAND_JOINT_LITTLE_DISTAL,
110 XRT_HAND_JOINT_LITTLE_TIP,
111 },
112};
113
114namespace ROIProvenance {
115 enum ROIProvenance
116 {
117 HAND_DETECTION,
118 POSE_PREDICTION
119 };
120}
121
122
123// Forward declaration for ht_view
124struct HandTracking;
125struct ht_view;
126
127
128struct Hand3D
129{
130 struct xrt_vec3 kps[21];
131};
132
133using hand21_2d = std::array<vec2_5, 21>;
134
136{
137 Eigen::Quaternionf rot_quat = Eigen::Quaternionf::Identity();
138 float stereographic_radius = 0;
139 bool flip = false;
140 const t_camera_model_params &dist;
141
142 projection_instructions(const t_camera_model_params &dist) : dist(dist) {}
143};
144
146{
147 float *data = nullptr;
148 int64_t dimensions[4];
149 size_t num_dimensions = 0;
150
151 OrtValue *tensor = nullptr;
152 const char *name;
153};
154
156{
157 const OrtApi *api = nullptr;
158 OrtEnv *env = nullptr;
159
160 OrtMemoryInfo *meminfo = nullptr;
161 OrtSession *session = nullptr;
162
163 std::vector<model_input_wrap> wraps = {};
164};
165
166// Multipurpose.
167// * Hand detector writes into center_px, size_px, found and hand_detection_confidence
168// * Keypoint estimator operates on this to a direction/radius for the stereographic projection, and for the associated
169// keypoints.
171{
172 ROIProvenance::ROIProvenance provenance;
173
174 // Either set by the detection model or by predict_new_regions_of_interest/back_project
175 xrt_vec2 center_px;
176 float size_px;
177
178 bool found;
179 bool hand_detection_confidence;
180};
181
182
183
185{
186 ht_view *view;
187 // These are not duplicates of ht_view's regions_of_interest_this_frame!
188 // If some hands are already tracked, we have logic that only copies new ROIs to this frame's regions of
189 // interest.
190 hand_region_of_interest outputs[2];
191};
192
193
195{
196 ht_view *view;
197 bool hand_idx;
198};
199
201{
202 HandTracking *hgt;
203 onnx_wrap detection;
204 onnx_wrap keypoint[2];
205 int view;
206
207 struct t_camera_extra_info_one_view camera_info;
208
209 t_camera_model_params hgdist_orig;
210 // With fx, fy, cx, cy scaled to the current camera resolution as appropriate.
212
213
214 cv::Mat run_model_on_this;
215 cv::Mat debug_out_to_this;
216
217 struct hand_region_of_interest regions_of_interest_this_frame[2]; // left, right
218
219 struct keypoint_estimation_run_info run_info[2];
220};
221
222
224{
225 int num_hands;
226 float out_hand_size;
227 float out_hand_confidence;
228 float hand_size_refinement_schedule_x = 0;
229 float hand_size_refinement_schedule_y = 0;
230 bool optimizing = true;
231};
232
234{
235 // After setup, these reference the same piece of memory.
236 cv::Mat mat;
237 xrt_frame *xrtframe = NULL;
238
239 // After pushing to the debug UI, we reference the frame here so that we can copy memory out of it for next
240 // frame.
241 xrt_frame *old_frame = NULL;
242};
243
244/*!
245 * Main class of Mercury hand tracking.
246 *
247 * @ingroup aux_tracking
248 */
250{
251public:
252 // Base thing, has to be first.
253 t_hand_tracking_sync base = {};
254
255 struct u_sink_debug debug_sink_ann = {};
256 struct u_sink_debug debug_sink_model = {};
257 struct xrt_hand_masks_sink *hand_masks_sink;
258
259 float multiply_px_coord_for_undistort;
260
261
262 struct t_stereo_camera_calibration *calib;
263
264 struct xrt_size calibration_one_view_size_px = {};
265
266 // So that we can calibrate cameras at 1280x800 but ship images over USB at 640x400
267 struct xrt_size last_frame_one_view_size_px = {};
268
269 struct ht_view views[2] = {};
270
271 struct model_output_visualizers visualizers;
272
274
276
277
278 float baseline = {};
279 xrt_pose hand_pose_camera_offset = {};
280
281 uint64_t current_frame_timestamp = {};
282
283 bool debug_scribble = false;
284
285 char models_folder[1024];
286
287 enum u_logging_level log_level = U_LOGGING_INFO;
288
289 lm::KinematicHandLM *kinematic_hands[2];
290
291 // These are produced by the keypoint estimator and consumed by the nonlinear optimizer
292 // left hand, right hand THEN left view, right view
293 struct one_frame_input keypoint_outputs[2];
294
295 // Used to track whether this hand has *ever* been seen during this user's session, so that we can spend some
296 // extra time optimizing their hand size if one of their hands isn't visible for the first bit.
297 bool hand_seen_before[2] = {false, false};
298
299 // Used to:
300 // * see if a hand is currently being tracked.
301 // * If so, don't replace the bounding box with that from a hand detection.
302 // * Also, if both hands are being tracked, we just don't run the hand detector.
303 bool last_frame_hand_detected[2] = {false, false};
304
305 // Used to decide whether to run the keypoint estimator/nonlinear optimizer.
306 bool this_frame_hand_detected[2] = {false, false};
307
308 // Used to determine pose-predicted regions of interest. Contains the last 2 hand keypoint positions, or less
309 // if the hand has just started being tracked.
310 HistoryBuffer<Eigen::Array<float, 3, 21>, 2> history_hands[2] = {};
311
312 // Contains the last 2 timestamps, or less if hand tracking has just started.
313 HistoryBuffer<uint64_t, 2> history_timestamps = {};
314
315 // It'd be a staring contest between your hand and the heat death of the universe!
316 uint64_t hand_tracked_for_num_frames[2] = {0, 0};
317
318
319 // left hand, right hand
320 Eigen::Array<float, 3, 21> pose_predicted_keypoints[2];
321
322 int detection_counter = 0;
323
324 struct hand_size_refinement refinement = {};
325 float target_hand_size = STANDARD_HAND_SIZE;
326
327
328 xrt_frame *debug_frame;
329
330
331 // This should be removed.
332 void (*keypoint_estimation_run_func)(void *);
333
334
335
336 struct xrt_pose left_in_right = {};
337
338 u_frame_times_widget ft_widget = {};
339
340 struct hg_tuneable_values tuneable_values;
341
342public:
343 explicit HandTracking();
345
346 static inline HandTracking &
347 fromC(t_hand_tracking_sync *ht_sync)
348 {
349 return *reinterpret_cast<HandTracking *>(ht_sync);
350 }
351
352 static void
354 struct xrt_frame *left_frame,
355 struct xrt_frame *right_frame,
356 struct xrt_hand_joint_set *out_left_hand,
357 struct xrt_hand_joint_set *out_right_hand,
358 uint64_t *out_timestamp_ns);
359
360 static void
361 cCallbackDestroy(t_hand_tracking_sync *ht_sync);
362};
363
364
365void
366init_hand_detection(HandTracking *hgt, onnx_wrap *wrap);
367
368void
369run_hand_detection(void *ptr);
370
371void
372init_keypoint_estimation(HandTracking *hgt, onnx_wrap *wrap);
373
374void
375run_keypoint_estimation(void *ptr);
376
377void
378release_onnx_wrap(onnx_wrap *wrap);
379
380
381void
383 bool flip_after,
384 float expand_val,
385 float twist,
386 Eigen::Array<float, 3, 21> &joints,
387 projection_instructions &out_instructions,
388 hand21_2d &out_hand);
389
390
391void
392make_projection_instructions_angular(xrt_vec3 direction_3d,
393 bool flip_after,
394 float angular_radius,
395 float expand_val,
396 float twist,
397 projection_instructions &out_instructions);
398
399void
400stereographic_project_image(const t_camera_model_params &dist,
401 const projection_instructions &instructions,
402 cv::Mat &input_image,
403 cv::Mat *debug_image,
404 const cv::Scalar boundary_color,
405 cv::Mat &out);
406
407
408
409} // namespace xrt::tracking::hand::mercury
Stores some number of values in a ring buffer, overwriting the earliest-pushed-remaining element if o...
Definition: u_template_historybuf.hpp:38
u_logging_level
Logging level enum.
Definition: u_logging.h:40
@ U_LOGGING_INFO
Info messages: not very verbose, not indicating a problem.
Definition: u_logging.h:43
xrt_hand_joint
Number of joints in a hand.
Definition: xrt_defines.h:1154
Debug instrumentation for mercury_train or others to control hand tracking.
void make_projection_instructions(t_camera_model_params &dist, bool flip_after, float expand_val, float twist, Eigen::Array< float, 3, 21 > &joints, projection_instructions &out_instructions, hand21_2d &out_hand)
Definition: hg_image_distorter.cpp:492
Public interface of Mercury hand tracking.
void run_keypoint_estimation(void *ptr)
Definition: hg_model.cpp:637
Random common stuff for Mercury kinematic optimizers.
Interface for Levenberg-Marquardt kinematic optimizer.
C interface to math library.
Interoperability helpers connecting internal math types and Eigen.
Wrapper header for <math.h> to ensure pi-related math constants are defined.
C vec2 math library.
C vec3 math library.
C++-only functionality in the Math helper library.
Definition: m_documentation.hpp:15
Definition: u_worker.c:90
Definition: hg_debug_instrumentation.hpp:23
Definition: u_worker.c:49
Information about image boundary and camera orientation for one view.
Definition: t_hand_tracking.h:82
Floating point calibration data for a single calibrated camera.
Definition: t_camera_models.h:54
Synchronously processes frames and returns two hands.
Definition: t_hand_tracking.h:120
Stereo camera calibration data to be given to trackers.
Definition: t_tracking.h:248
Definition: u_frame_times_widget.h:24
Allows more safely to debug sink inputs and outputs.
Definition: u_sink.h:185
A worker group where you submit tasks to.
Definition: u_worker.h:102
A worker pool, can shared between multiple groups worker pool.
Definition: u_worker.h:33
Definition: hg_sync.hpp:129
Main class of Mercury hand tracking.
Definition: hg_sync.hpp:250
static void cCallbackProcess(struct t_hand_tracking_sync *ht_sync, struct xrt_frame *left_frame, struct xrt_frame *right_frame, struct xrt_hand_joint_set *out_left_hand, struct xrt_hand_joint_set *out_right_hand, uint64_t *out_timestamp_ns)
Definition: hg_sync.cpp:701
Definition: hg_sync.hpp:201
Definition: kine_common.hpp:53
Definition: hg_sync.hpp:156
Basic frame data structure - holds a pointer to buffer.
Definition: xrt_frame.h:25
Joint set type used for hand tracking.
Definition: xrt_defines.h:1233
An object to push xrt_hand_masks_sample to.
Definition: xrt_tracking.h:192
A pose composed of a position and orientation.
Definition: xrt_defines.h:465
Image size.
Definition: xrt_defines.h:409
A 2 element vector with single floats.
Definition: xrt_defines.h:250
A 3 element vector with single floats.
Definition: xrt_defines.h:271
Simple, untemplated, C, float-only, camera (un)projection functions for various camera models.
Hand tracking interfaces.
Small debug helpers.
xrt_frame helpers.
Shared code for visualizing frametimes.
Basic logging functionality.
xrt_frame_sink converters and other helpers.
Ringbuffer implementation for keeping track of the past state of things.
Tracing support code, see Tracing support.
Variable tracking code.
Worker and threading pool.
Common defines and enums for XRT.
Data frame header.
Header defining the tracking system integration in Monado.