Upload two_stream_attn_v1_finetune_20260515T104743Z
Browse files- README.md +4 -4
- config.json +5 -5
README.md
CHANGED
|
@@ -91,7 +91,7 @@ Input (B, T=32, 147)
|
|
| 91 |
|
| 92 |
| Class | Description |
|
| 93 |
|-------|-------------|
|
| 94 |
-
| `fist` |
|
| 95 |
| `swiping_right` | Horizontal swipe from left to right |
|
| 96 |
| `swiping_left` | Horizontal swipe from right to left |
|
| 97 |
| `swiping_down` | Vertical swipe downward |
|
|
@@ -106,7 +106,7 @@ Input (B, T=32, 147)
|
|
| 106 |
|
| 107 |
| Class | Mode | Command | Runtime handling |
|
| 108 |
|-------|------|---------|------------------|
|
| 109 |
-
| `fist` | `
|
| 110 |
| `swiping_right` | `discrete` | `next_slide` | Discrete command via GestureActivationController → CommandDispatcher |
|
| 111 |
| `swiping_left` | `discrete` | `previous_slide` | Discrete command via GestureActivationController → CommandDispatcher |
|
| 112 |
| `swiping_down` | `discrete` | `stop_presentation` | Discrete command via GestureActivationController → CommandDispatcher |
|
|
@@ -172,7 +172,7 @@ Two-phase transfer learning pipeline:
|
|
| 172 |
- **Phase 2 (fine-tuning):** head replaced and model adapted on Hybrid Jester+IPN 10-gesture vocabulary.
|
| 173 |
- **Stage A (frozen backbone):** 10 epoch(s) head-only warmup.
|
| 174 |
- **Stage B (full model):** up to 80 epoch(s) joint fine-tuning with scheduler/early stopping.
|
| 175 |
-
- **Stage B retention defences:** replay_max_samples_per_class=500, distillation_weight=0.0, replay_ce_weight=0.0, backbone_lr_multiplier=0.1, gpm_components=0, forgetting_penalty_weight=0.5.
|
| 176 |
|
| 177 |
## Training Configuration
|
| 178 |
|
|
@@ -182,7 +182,7 @@ Two-phase transfer learning pipeline:
|
|
| 182 |
| Input size | 147 |
|
| 183 |
| Hidden size | 96/stream (BiLSTM output: 192) |
|
| 184 |
| Projection dim | 96 |
|
| 185 |
-
| Num layers |
|
| 186 |
| MHA heads | 8 (head dim: 24) |
|
| 187 |
| Dropout | 0.4 |
|
| 188 |
| Learning rate | 3e-05 |
|
|
|
|
| 91 |
|
| 92 |
| Class | Description |
|
| 93 |
|-------|-------------|
|
| 94 |
+
| `fist` | Closed fist (all fingers curled, thumb tucked) |
|
| 95 |
| `swiping_right` | Horizontal swipe from left to right |
|
| 96 |
| `swiping_left` | Horizontal swipe from right to left |
|
| 97 |
| `swiping_down` | Vertical swipe downward |
|
|
|
|
| 106 |
|
| 107 |
| Class | Mode | Command | Runtime handling |
|
| 108 |
|-------|------|---------|------------------|
|
| 109 |
+
| `fist` | `discrete` | `erase_annotations` | Discrete command via GestureActivationController → CommandDispatcher |
|
| 110 |
| `swiping_right` | `discrete` | `next_slide` | Discrete command via GestureActivationController → CommandDispatcher |
|
| 111 |
| `swiping_left` | `discrete` | `previous_slide` | Discrete command via GestureActivationController → CommandDispatcher |
|
| 112 |
| `swiping_down` | `discrete` | `stop_presentation` | Discrete command via GestureActivationController → CommandDispatcher |
|
|
|
|
| 172 |
- **Phase 2 (fine-tuning):** head replaced and model adapted on Hybrid Jester+IPN 10-gesture vocabulary.
|
| 173 |
- **Stage A (frozen backbone):** 10 epoch(s) head-only warmup.
|
| 174 |
- **Stage B (full model):** up to 80 epoch(s) joint fine-tuning with scheduler/early stopping.
|
| 175 |
+
- **Stage B retention defences:** replay_max_samples_per_class=500, distillation_weight=0.0, replay_ce_weight=0.0, backbone_lr_multiplier=0.1, ewc_weight=N/A, gpm_components=0, forgetting_penalty_weight=0.5.
|
| 176 |
|
| 177 |
## Training Configuration
|
| 178 |
|
|
|
|
| 182 |
| Input size | 147 |
|
| 183 |
| Hidden size | 96/stream (BiLSTM output: 192) |
|
| 184 |
| Projection dim | 96 |
|
| 185 |
+
| Num layers | 2 |
|
| 186 |
| MHA heads | 8 (head dim: 24) |
|
| 187 |
| Dropout | 0.4 |
|
| 188 |
| Learning rate | 3e-05 |
|
config.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"model_version": "two_stream_attn_v1_finetune_20260515T104743Z",
|
| 3 |
"model_config": {
|
| 4 |
-
"model_name": "
|
| 5 |
"input_size": 147,
|
| 6 |
"hidden_size": 96,
|
| 7 |
-
"num_layers":
|
| 8 |
"dropout": 0.4,
|
| 9 |
"num_classes": 10
|
| 10 |
},
|
|
@@ -73,7 +73,7 @@
|
|
| 73 |
"point_two",
|
| 74 |
"unknown"
|
| 75 |
],
|
| 76 |
-
"created_at": "2026-05-
|
| 77 |
"gesture_command_mapping": {
|
| 78 |
"commands": {
|
| 79 |
"swiping_up": "start_presentation",
|
|
@@ -82,7 +82,7 @@
|
|
| 82 |
"swiping_left": "previous_slide",
|
| 83 |
"zooming_in_full_hand": "zoom_in_view",
|
| 84 |
"zooming_out_full_hand": "zoom_out_view",
|
| 85 |
-
"
|
| 86 |
"pinch": "activate_laser_pointer",
|
| 87 |
"click": "mouse_click",
|
| 88 |
"unknown": "no_action"
|
|
@@ -94,7 +94,7 @@
|
|
| 94 |
"swiping_left": "discrete",
|
| 95 |
"zooming_in_full_hand": "discrete",
|
| 96 |
"zooming_out_full_hand": "discrete",
|
| 97 |
-
"
|
| 98 |
"pinch": "discrete",
|
| 99 |
"click": "discrete",
|
| 100 |
"point_one": "continuous",
|
|
|
|
| 1 |
{
|
| 2 |
"model_version": "two_stream_attn_v1_finetune_20260515T104743Z",
|
| 3 |
"model_config": {
|
| 4 |
+
"model_name": "two_stream_attn_v1_2layer_finetune",
|
| 5 |
"input_size": 147,
|
| 6 |
"hidden_size": 96,
|
| 7 |
+
"num_layers": 2,
|
| 8 |
"dropout": 0.4,
|
| 9 |
"num_classes": 10
|
| 10 |
},
|
|
|
|
| 73 |
"point_two",
|
| 74 |
"unknown"
|
| 75 |
],
|
| 76 |
+
"created_at": "2026-05-15T13:15:28.451458+00:00",
|
| 77 |
"gesture_command_mapping": {
|
| 78 |
"commands": {
|
| 79 |
"swiping_up": "start_presentation",
|
|
|
|
| 82 |
"swiping_left": "previous_slide",
|
| 83 |
"zooming_in_full_hand": "zoom_in_view",
|
| 84 |
"zooming_out_full_hand": "zoom_out_view",
|
| 85 |
+
"fist": "erase_annotations",
|
| 86 |
"pinch": "activate_laser_pointer",
|
| 87 |
"click": "mouse_click",
|
| 88 |
"unknown": "no_action"
|
|
|
|
| 94 |
"swiping_left": "discrete",
|
| 95 |
"zooming_in_full_hand": "discrete",
|
| 96 |
"zooming_out_full_hand": "discrete",
|
| 97 |
+
"fist": "discrete",
|
| 98 |
"pinch": "discrete",
|
| 99 |
"click": "discrete",
|
| 100 |
"point_one": "continuous",
|