Monado OpenXR Runtime
Loading...
Searching...
No Matches
hg_sync.hpp
Go to the documentation of this file.
1// Copyright 2022, Collabora, Ltd.
2// SPDX-License-Identifier: BSL-1.0
3/*!
4 * @file
5 * @brief Mercury main header!
6 * @author Jakob Bornecrantz <jakob@collabora.com>
7 * @author Moshi Turner <moshiturner@protonmail.com>
8 * @ingroup tracking
9 */
10
11#pragma once
12
13#include "hg_interface.h"
15
18
19#include "onnx/onnx_wrapper.hpp"
20
21#include "xrt/xrt_defines.h"
22#include "xrt/xrt_frame.h"
23#include "xrt/xrt_tracking.h"
24
25#include "math/m_api.h"
26#include "math/m_vec2.h"
27#include "math/m_vec3.h"
28#include "math/m_mathinclude.h"
30
32#include "util/u_logging.h"
33#include "util/u_sink.h"
35#include "util/u_worker.h"
36#include "util/u_trace_marker.h"
37#include "util/u_debug.h"
38#include "util/u_frame.h"
39#include "util/u_var.h"
40
41#include <assert.h>
42#include <stdio.h>
43#include <stdlib.h>
44#include <string.h>
45#include <stdint.h>
46
47#include <opencv2/opencv.hpp>
48#include <onnxruntime_c_api.h>
49
50#include "kine_common.hpp"
52
53
54namespace xrt::tracking::hand::mercury {
55
56using namespace xrt::auxiliary::util;
57using namespace xrt::auxiliary::math;
58using namespace xrt::auxiliary::onnx;
59
60#define HG_TRACE(hgt, ...) U_LOG_IFL_T(hgt->log_level, __VA_ARGS__)
61#define HG_DEBUG(hgt, ...) U_LOG_IFL_D(hgt->log_level, __VA_ARGS__)
62#define HG_INFO(hgt, ...) U_LOG_IFL_I(hgt->log_level, __VA_ARGS__)
63#define HG_WARN(hgt, ...) U_LOG_IFL_W(hgt->log_level, __VA_ARGS__)
64#define HG_ERROR(hgt, ...) U_LOG_IFL_E(hgt->log_level, __VA_ARGS__)
65
66static constexpr uint16_t kDetectionInputSize = 160;
67static constexpr uint16_t kKeypointInputSize = 128;
68
69static constexpr uint16_t kKeypointOutputHeatmapSize = 22;
70static constexpr uint16_t kVisSpacerSize = 8;
71
72static const cv::Scalar RED(255, 30, 30);
73static const cv::Scalar YELLOW(255, 255, 0);
74static const cv::Scalar PINK(255, 0, 255);
75static const cv::Scalar GREEN(0, 255, 0);
76
77static const cv::Scalar colors[2] = {YELLOW, RED};
78
79constexpr enum xrt_hand_joint joints_5x5_to_26[5][5] = {
80 {
81 XRT_HAND_JOINT_WRIST,
82 XRT_HAND_JOINT_THUMB_METACARPAL,
83 XRT_HAND_JOINT_THUMB_PROXIMAL,
84 XRT_HAND_JOINT_THUMB_DISTAL,
85 XRT_HAND_JOINT_THUMB_TIP,
86 },
87 {
88 XRT_HAND_JOINT_INDEX_METACARPAL,
89 XRT_HAND_JOINT_INDEX_PROXIMAL,
90 XRT_HAND_JOINT_INDEX_INTERMEDIATE,
91 XRT_HAND_JOINT_INDEX_DISTAL,
92 XRT_HAND_JOINT_INDEX_TIP,
93 },
94 {
95 XRT_HAND_JOINT_MIDDLE_METACARPAL,
96 XRT_HAND_JOINT_MIDDLE_PROXIMAL,
97 XRT_HAND_JOINT_MIDDLE_INTERMEDIATE,
98 XRT_HAND_JOINT_MIDDLE_DISTAL,
99 XRT_HAND_JOINT_MIDDLE_TIP,
100 },
101 {
102 XRT_HAND_JOINT_RING_METACARPAL,
103 XRT_HAND_JOINT_RING_PROXIMAL,
104 XRT_HAND_JOINT_RING_INTERMEDIATE,
105 XRT_HAND_JOINT_RING_DISTAL,
106 XRT_HAND_JOINT_RING_TIP,
107 },
108 {
109 XRT_HAND_JOINT_LITTLE_METACARPAL,
110 XRT_HAND_JOINT_LITTLE_PROXIMAL,
111 XRT_HAND_JOINT_LITTLE_INTERMEDIATE,
112 XRT_HAND_JOINT_LITTLE_DISTAL,
113 XRT_HAND_JOINT_LITTLE_TIP,
114 },
115};
116
117namespace ROIProvenance {
118 enum ROIProvenance
119 {
120 HAND_DETECTION,
121 POSE_PREDICTION
122 };
123}
124
125
126// Forward declaration for ht_view
127struct HandTracking;
128struct ht_view;
129
130
131struct Hand3D
132{
133 struct xrt_vec3 kps[21];
134};
135
136using hand21_2d = std::array<vec2_5, 21>;
137
139{
140 Eigen::Quaternionf rot_quat = Eigen::Quaternionf::Identity();
141 float stereographic_radius = 0;
142 bool flip = false;
143 const t_camera_model_params &dist;
144
145 projection_instructions(const t_camera_model_params &dist) : dist(dist) {}
146};
147
149{
150 float *data = nullptr;
151 int64_t dimensions[4];
152 size_t num_dimensions = 0;
153
154 OrtValue *tensor = nullptr;
155 const char *name;
156};
157
159{
160 std::unique_ptr<OnnxWrapper> wrap = {};
161
162 std::vector<model_input_wrap> wraps = {};
163};
164
165// Multipurpose.
166// * Hand detector writes into center_px, size_px, found and hand_detection_confidence
167// * Keypoint estimator operates on this to a direction/radius for the stereographic projection, and for the associated
168// keypoints.
170{
171 ROIProvenance::ROIProvenance provenance;
172
173 // Either set by the detection model or by predict_new_regions_of_interest/back_project
174 xrt_vec2 center_px;
175 float size_px;
176
177 bool found;
178 bool hand_detection_confidence;
179};
180
181
182
184{
185 ht_view *view;
186 // These are not duplicates of ht_view's regions_of_interest_this_frame!
187 // If some hands are already tracked, we have logic that only copies new ROIs to this frame's regions of
188 // interest.
189 hand_region_of_interest outputs[2];
190};
191
192
194{
195 ht_view *view;
196 bool hand_idx;
197};
198
200{
201 HandTracking *hgt;
202 onnx_state detection;
203 onnx_state keypoint[2];
204 int view;
205
206 struct t_camera_extra_info_one_view camera_info;
207
208 t_camera_model_params hgdist_orig;
209 // With fx, fy, cx, cy scaled to the current camera resolution as appropriate.
211
212
213 cv::Mat run_model_on_this;
214 cv::Mat debug_out_to_this;
215
216 struct hand_region_of_interest regions_of_interest_this_frame[2]; // left, right
217
218 struct keypoint_estimation_run_info run_info[2];
219};
220
221
223{
224 int num_hands;
225 float out_hand_size;
226 float out_hand_confidence;
227 float hand_size_refinement_schedule_x = 0;
228 float hand_size_refinement_schedule_y = 0;
229 bool optimizing = true;
230};
231
233{
234 // After setup, these reference the same piece of memory.
235 cv::Mat mat;
236 xrt_frame *xrtframe = NULL;
237
238 // After pushing to the debug UI, we reference the frame here so that we can copy memory out of it for next
239 // frame.
240 xrt_frame *old_frame = NULL;
241};
242
243/*!
244 * Main class of Mercury hand tracking.
245 *
246 * @ingroup aux_tracking
247 */
249{
250public:
251 // Base thing, has to be first.
252 t_hand_tracking_sync base = {};
253
254 struct u_sink_debug debug_sink_ann = {};
255 struct u_sink_debug debug_sink_model = {};
256 struct xrt_hand_masks_sink *hand_masks_sink;
257
258 float multiply_px_coord_for_undistort;
259
260
261 struct t_stereo_camera_calibration *calib;
262
263 struct xrt_size calibration_one_view_size_px = {};
264
265 // So that we can calibrate cameras at 1280x800 but ship images over USB at 640x400
266 struct xrt_size last_frame_one_view_size_px = {};
267
268 struct ht_view views[2] = {};
269
270 struct model_output_visualizers visualizers;
271
273
275
276
277 float baseline = {};
278 xrt_pose hand_pose_camera_offset = {};
279
280 uint64_t current_frame_timestamp = {};
281
282 bool debug_scribble = false;
283
284 char models_folder[1024];
285
286 enum u_logging_level log_level = U_LOGGING_INFO;
287
288 lm::KinematicHandLM *kinematic_hands[2];
289
290 // These are produced by the keypoint estimator and consumed by the nonlinear optimizer
291 // left hand, right hand THEN left view, right view
292 struct one_frame_input keypoint_outputs[2];
293
294 // Used to track whether this hand has *ever* been seen during this user's session, so that we can spend some
295 // extra time optimizing their hand size if one of their hands isn't visible for the first bit.
296 bool hand_seen_before[2] = {false, false};
297
298 // Used to:
299 // * see if a hand is currently being tracked.
300 // * If so, don't replace the bounding box with that from a hand detection.
301 // * Also, if both hands are being tracked, we just don't run the hand detector.
302 bool last_frame_hand_detected[2] = {false, false};
303
304 // Used to decide whether to run the keypoint estimator/nonlinear optimizer.
305 bool this_frame_hand_detected[2] = {false, false};
306
307 // Used to determine pose-predicted regions of interest. Contains the last 2 hand keypoint positions, or less
308 // if the hand has just started being tracked.
309 HistoryBuffer<Eigen::Array<float, 3, 21>, 2> history_hands[2] = {};
310
311 // Contains the last 2 timestamps, or less if hand tracking has just started.
312 HistoryBuffer<uint64_t, 2> history_timestamps = {};
313
314 // It'd be a staring contest between your hand and the heat death of the universe!
315 uint64_t hand_tracked_for_num_frames[2] = {0, 0};
316
317
318 // left hand, right hand
319 Eigen::Array<float, 3, 21> pose_predicted_keypoints[2];
320
321 int detection_counter = 0;
322
323 struct hand_size_refinement refinement = {};
324 float target_hand_size = STANDARD_HAND_SIZE;
325
326
327 xrt_frame *debug_frame;
328
329
330 // This should be removed.
331 void (*keypoint_estimation_run_func)(void *);
332
333
334
335 struct xrt_pose left_in_right = {};
336
337 u_frame_times_widget ft_widget = {};
338
339 struct hg_tuneable_values tuneable_values;
340
341public:
342 explicit HandTracking();
344
345 static inline HandTracking &
346 fromC(t_hand_tracking_sync *ht_sync)
347 {
348 return *reinterpret_cast<HandTracking *>(ht_sync);
349 }
350
351 static void
352 cCallbackProcess(struct t_hand_tracking_sync *ht_sync,
353 struct xrt_frame *left_frame,
354 struct xrt_frame *right_frame,
355 struct xrt_hand_joint_set *out_left_hand,
356 struct xrt_hand_joint_set *out_right_hand,
357 int64_t *out_timestamp_ns);
358
359 static void
360 cCallbackDestroy(t_hand_tracking_sync *ht_sync);
361};
362
363
364void
365init_hand_detection(HandTracking *hgt, onnx_state *wrap);
366
367void
368init_keypoint_estimation(HandTracking *hgt, onnx_state *wrap);
369
370// These are passed into C callbacks, so they have to be extern "C".
371extern "C" {
372//! Runs hand detection, expects `ptr` to be a `hand_detection_run_info *`
373void
374run_hand_detection(void *ptr);
375
376//! Runs keypoint estimation, excpets `ptr` to be a `keypoint_estimation_run_info *`
377void
378run_keypoint_estimation(void *ptr);
379};
380
381void
382release_onnx_state(onnx_state *wrap);
383
384
385void
386make_projection_instructions(t_camera_model_params &dist,
387 bool flip_after,
388 float expand_val,
389 float twist,
390 Eigen::Array<float, 3, 21> &joints,
391 projection_instructions &out_instructions,
392 hand21_2d &out_hand);
393
394
395void
396make_projection_instructions_angular(xrt_vec3 direction_3d,
397 bool flip_after,
398 float angular_radius,
399 float expand_val,
400 float twist,
401 projection_instructions &out_instructions);
402
403void
404stereographic_project_image(const t_camera_model_params &dist,
405 const projection_instructions &instructions,
406 cv::Mat &input_image,
407 cv::Mat *debug_image,
408 const cv::Scalar &boundary_color,
409 cv::Mat &out);
410
411
412
413} // namespace xrt::tracking::hand::mercury
Stores some number of values in a ring buffer, overwriting the earliest-pushed-remaining element if o...
Definition u_template_historybuf.hpp:38
u_logging_level
Logging level enum.
Definition u_logging.h:45
@ U_LOGGING_INFO
Info messages: not very verbose, not indicating a problem.
Definition u_logging.h:48
xrt_hand_joint
Number of joints in a hand.
Definition xrt_defines.h:1442
Debug instrumentation for mercury_train or others to control hand tracking.
Public interface of Mercury hand tracking.
void run_keypoint_estimation(void *ptr)
Runs keypoint estimation, excpets ptr to be a keypoint_estimation_run_info *
Definition hg_model.cpp:950
void run_hand_detection(void *ptr)
Runs hand detection, expects ptr to be a hand_detection_run_info *
Definition hg_model.cpp:410
Random common stuff for Mercury kinematic optimizers.
Interface for Levenberg-Marquardt kinematic optimizer.
C interface to math library.
Interoperability helpers connecting internal math types and Eigen.
Wrapper header for <math.h> to ensure pi-related math constants are defined.
C vec2 math library.
C vec3 math library.
C++-only functionality in the Math helper library.
Definition m_documentation.hpp:15
onnxruntime wrapper objects and functions.
Definition u_worker.c:91
Definition hg_debug_instrumentation.hpp:23
Definition u_worker.c:50
Information about image boundary and camera orientation for one view.
Definition t_hand_tracking.h:82
Floating point calibration data for a single calibrated camera.
Definition t_camera_models.h:59
Synchronously processes frames and returns two hands.
Definition t_hand_tracking.h:120
Stereo camera calibration data to be given to trackers.
Definition t_tracking.h:261
Definition u_frame_times_widget.h:24
Allows more safely to debug sink inputs and outputs.
Definition u_sink.h:214
A worker group where you submit tasks to.
Definition u_worker.h:102
A worker pool, can shared between multiple groups worker pool.
Definition u_worker.h:33
Definition hg_sync.hpp:132
Main class of Mercury hand tracking.
Definition hg_sync.hpp:249
Definition hg_sync.hpp:200
Basic frame data structure - holds a pointer to buffer.
Definition xrt_frame.h:25
Joint set type used for hand tracking.
Definition xrt_defines.h:1521
An object to push xrt_hand_masks_sample to.
Definition xrt_tracking.h:196
A pose composed of a position and orientation.
Definition xrt_defines.h:492
Image size.
Definition xrt_defines.h:436
A 2 element vector with single floats.
Definition xrt_defines.h:268
A 3 element vector with single floats.
Definition xrt_defines.h:289
Simple, untemplated, C, float-only, camera (un)projection functions for various camera models.
Hand tracking interfaces.
Small debug helpers.
xrt_frame helpers.
Shared code for visualizing frametimes.
Basic logging functionality.
xrt_frame_sink converters and other helpers.
Ringbuffer implementation for keeping track of the past state of things.
Tracing support code, see Tracing support.
Variable tracking code.
Worker and threading pool.
Common defines and enums for XRT.
Data frame header.
Header defining the tracking system integration in Monado.