wzf19947 commited on
Commit
9a2ceae
·
1 Parent(s): a60277f

first commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.jpg filter=lfs diff=lfs merge=lfs -text
37
+ *.axmodel filter=lfs diff=lfs merge=lfs -text
AX650/yolo11s_drone_650.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841f06045c080fe0960f31c88b912d6996e3e8df235bc7b2fd826cb6b21145c9
3
+ size 10164276
AX650/yolo26s_drone_650_u16.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:291908fcdbe9209cb004d696d8f5117a2ff5e3ced858cda8397a5c2b557f8527
3
+ size 10985685
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: agpl-3.0
3
+ language:
4
+ - en
5
+ pipeline_tag: object-detection
6
+ tags:
7
+ - Axera
8
+ - YOLO11
9
+ - YOLO26
10
+ - NPU
11
+ - Ultralytics
12
+ - Drone Detection
13
+ - Object Detection
14
+ ---
15
+
16
+ # Drone-axera
17
+
18
+ This version of **Drone-axera** has been converted to run on the Axera NPU using **w8a16** quantization. It is trained with yolo11s/yolo26s to detect drones.
19
+
20
+ ## Supported Classes
21
+
22
+ This model is trained to detect drones in our life with one label:
23
+ 1. **Drone**
24
+
25
+ Compatible with Pulsar2 version: 5.2.
26
+
27
+ ## Convert tools links:
28
+
29
+ For those who are interested in model conversion, you can try to export axmodel through:
30
+ - [The repo of AXera Platform](https://github.com/AXERA-TECH/ax-samples), where you can get the detailed guide.
31
+ - [Pulsar2 Link, How to Convert ONNX to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/pulsar2/introduction.html)
32
+
33
+ ## Support Platform
34
+
35
+ https://docs.m5stack.com/zh_CN/ai_hardware/AI_Pyramid-Pro
36
+
37
+ - **AX650N/AX8850**
38
+ - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
39
+ - [AI Pyramid](https://docs.m5stack.com/zh_CN/ai_hardware/AI_Pyramid-Pro)
40
+ - [M.2 Accelerator card](https://docs.m5stack.com/en/ai_hardware/LLM-8850_Card)
41
+
42
+ ## How to use
43
+
44
+ Download all files from this repository to the device.
45
+
46
+ ### python env requirement
47
+
48
+ #### pyaxengine
49
+
50
+ https://github.com/AXERA-TECH/pyaxengine
51
+
52
+ ```bash
53
+ wget https://github.com/AXERA-TECH/pyaxengine/releases/download/0.1.3.rc2/axengine-0.1.3-py3-none-any.whl
54
+ pip install axengine-0.1.3-py3-none-any.whl
55
+ ```
56
+
57
+ ### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro)
58
+
59
+ Input image:
60
+ ![](test/23.jpg)
61
+
62
+ run
63
+ ```bash
64
+ python3 axmodel_infer_yolo26.py
65
+ or
66
+ python3 axmodel_infer_yolo11.py
67
+ ```
68
+
69
+ ```bash
70
+ root@ax650:~/Drone# python3 axmodel_infer_yolo11.py
71
+ [INFO] Available providers: ['AxEngineExecutionProvider', 'AXCLRTExecutionProvider']
72
+ [INFO] Using provider: AxEngineExecutionProvider
73
+ [INFO] Chip type: ChipType.MC50
74
+ [INFO] VNPU type: VNPUType.DISABLED
75
+ [INFO] Engine version: 2.12.0s
76
+ [INFO] Model type: 0 (single core)
77
+ [INFO] Compiler version: 5.2 df2fe798
78
+ 0/1: ./test/23.jpg
79
+ class: Drone:0.97, bbox: [294, 226, 335, 270], score: 0.97
80
+ 结果已保存到 ./drone_yolo11_res
81
+
82
+ ```
83
+
84
+ Output image:
85
+ ![](drone_yolo11_res/23.jpg)
axmodel_infer_yolo11.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import axengine as axe
2
+ import numpy as np
3
+ import cv2
4
+ import glob
5
+ import os
6
+ import argparse
7
+ from dataclasses import dataclass
8
+
9
+ # Class Names
10
+ CLASSES = [
11
+ 'Drone'
12
+ ]
13
+
14
+ @dataclass
15
+ class Object:
16
+ bbox: list # [x0, y0, width, height]
17
+ label: int
18
+ prob: float
19
+
20
+ def sigmoid(x):
21
+ return 1 / (1 + np.exp(-x))
22
+
23
+ def softmax(x, axis=-1):
24
+ x = x - np.max(x, axis=axis, keepdims=True)
25
+ e_x = np.exp(x)
26
+ return e_x / np.sum(e_x, axis=axis, keepdims=True)
27
+
28
+ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
29
+
30
+ shape = im.shape[:2]
31
+ if isinstance(new_shape, int):
32
+ new_shape = (new_shape, new_shape)
33
+
34
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
35
+ if not scaleup:
36
+ r = min(r, 1.0)
37
+
38
+ ratio = r, r
39
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
40
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
41
+ if auto:
42
+ dw, dh = np.mod(dw, stride), np.mod(dh, stride)
43
+ elif scaleFill:
44
+ dw, dh = 0.0, 0.0
45
+ new_unpad = (new_shape[1], new_shape[0])
46
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]
47
+
48
+ dw /= 2
49
+ dh /= 2
50
+
51
+ if shape[::-1] != new_unpad:
52
+ im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
53
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
54
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
55
+ im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
56
+ return im, ratio, (dw, dh)
57
+
58
+ def decode_distributions(feat, reg_max=16):
59
+ prob = softmax(feat, axis=-1)
60
+ dis = np.sum(prob * np.arange(reg_max), axis=-1)
61
+ return dis
62
+
63
+ def preprocess(image_path, input_size):
64
+ image = cv2.imread(image_path)
65
+ if image is None:
66
+ raise FileNotFoundError(f"Unable to read image file: {image_path}")
67
+ original_shape = image.shape[:2]
68
+ img = letterbox(image, input_size, auto=False, stride=32)[0]
69
+ img = np.ascontiguousarray(img)
70
+ img = np.asarray(img, dtype=np.uint8)
71
+ img = np.expand_dims(img, 0)
72
+ return img, original_shape, image
73
+
74
+ def postprocess(outputs, original_shape, input_size, confidence_threshold, nms_threshold, num_classes, reg_max=16):
75
+ heads = [
76
+ {'output': outputs[0], 'grid_size': input_size[0] // 8, 'stride': 8},
77
+ {'output': outputs[1], 'grid_size': input_size[0] // 16, 'stride': 16},
78
+ {'output': outputs[2], 'grid_size': input_size[0] // 32, 'stride': 32}
79
+ ]
80
+ detections = []
81
+ bbox_channels = 4 * reg_max
82
+ class_channels = num_classes
83
+
84
+ for head in heads:
85
+ output = head['output']
86
+ batch_size, grid_h, grid_w, channels = output.shape
87
+ stride = head['stride']
88
+
89
+ bbox_part = output[:, :, :, :bbox_channels]
90
+ class_part = output[:, :, :, bbox_channels:]
91
+
92
+ bbox_part = bbox_part.reshape(batch_size, grid_h, grid_w, 4, reg_max)
93
+ bbox_part = bbox_part.reshape(grid_h * grid_w, 4, reg_max)
94
+ class_part = class_part.reshape(batch_size, grid_h * grid_w, class_channels)
95
+
96
+ for b in range(batch_size):
97
+ for i in range(grid_h * grid_w):
98
+ h = i // grid_w
99
+ w = i % grid_w
100
+ class_scores = class_part[b, i, :]
101
+ class_id = np.argmax(class_scores)
102
+ class_score = class_scores[class_id]
103
+ box_prob = sigmoid(class_score)
104
+ if box_prob < confidence_threshold:
105
+ continue
106
+ bbox = bbox_part[i, :, :]
107
+ dis_left = decode_distributions(bbox[0, :], reg_max)
108
+ dis_top = decode_distributions(bbox[1, :], reg_max)
109
+ dis_right = decode_distributions(bbox[2, :], reg_max)
110
+ dis_bottom = decode_distributions(bbox[3, :], reg_max)
111
+ pb_cx = (w + 0.5) * stride
112
+ pb_cy = (h + 0.5) * stride
113
+ x0 = pb_cx - dis_left * stride
114
+ y0 = pb_cy - dis_top * stride
115
+ x1 = pb_cx + dis_right * stride
116
+ y1 = pb_cy + dis_bottom * stride
117
+ scale_x = original_shape[1] / input_size[0]
118
+ scale_y = original_shape[0] / input_size[1]
119
+ x0 = np.clip(x0 * scale_x, 0, original_shape[1] - 1)
120
+ y0 = np.clip(y0 * scale_y, 0, original_shape[0] - 1)
121
+ x1 = np.clip(x1 * scale_x, 0, original_shape[1] - 1)
122
+ y1 = np.clip(y1 * scale_y, 0, original_shape[0] - 1)
123
+ width = x1 - x0
124
+ height = y1 - y0
125
+ detections.append(Object(
126
+ bbox=[float(x0), float(y0), float(width), float(height)],
127
+ label=int(class_id),
128
+ prob=float(box_prob)
129
+ ))
130
+
131
+ if len(detections) == 0:
132
+ return []
133
+ boxes = np.array([d.bbox for d in detections])
134
+ scores = np.array([d.prob for d in detections])
135
+ class_ids = np.array([d.label for d in detections])
136
+
137
+ final_detections = []
138
+ unique_classes = np.unique(class_ids)
139
+ for cls in unique_classes:
140
+ idxs = np.where(class_ids == cls)[0]
141
+ cls_boxes = boxes[idxs]
142
+ cls_scores = scores[idxs]
143
+ x1_cls = cls_boxes[:, 0]
144
+ y1_cls = cls_boxes[:, 1]
145
+ x2_cls = cls_boxes[:, 0] + cls_boxes[:, 2]
146
+ y2_cls = cls_boxes[:, 1] + cls_boxes[:, 3]
147
+ areas = (x2_cls - x1_cls) * (y2_cls - y1_cls)
148
+ order = cls_scores.argsort()[::-1]
149
+ keep = []
150
+ while order.size > 0:
151
+ i = order[0]
152
+ keep.append(i)
153
+ if order.size == 1:
154
+ break
155
+ xx1 = np.maximum(x1_cls[i], x1_cls[order[1:]])
156
+ yy1 = np.maximum(y1_cls[i], y1_cls[order[1:]])
157
+ xx2 = np.minimum(x2_cls[i], x2_cls[order[1:]])
158
+ yy2 = np.minimum(y2_cls[i], y2_cls[order[1:]])
159
+ w = np.maximum(0, xx2 - xx1)
160
+ h = np.maximum(0, yy2 - yy1)
161
+ intersection = w * h
162
+ iou = intersection / (areas[i] + areas[order[1:]] - intersection)
163
+ inds = np.where(iou <= nms_threshold)[0]
164
+ order = order[inds + 1]
165
+ for idx in keep:
166
+ final_detections.append(Object(
167
+ bbox=cls_boxes[idx].tolist(),
168
+ label=int(cls),
169
+ prob=float(cls_scores[idx])
170
+ ))
171
+ return final_detections
172
+
173
+ def main():
174
+ parser = argparse.ArgumentParser(description="YOLO11 AXEngine Inference")
175
+ parser.add_argument('--model', type=str, default='yolo11s_drone_650.axmodel', help='Model path')
176
+ parser.add_argument('--img_path', type=str, default='./test', help='Image path')
177
+ parser.add_argument('--save_path', type=str, default='./drone_yolo11_res', help='Save path')
178
+ parser.add_argument('--conf', type=float, default=0.3, help='Confidence threshold')
179
+ parser.add_argument('--nms', type=float, default=0.45, help='NMS threshold')
180
+ parser.add_argument('--size', type=int, nargs=2, default=[640, 640], help='Input size W H')
181
+ parser.add_argument('--regmax', type=int, default=16, help='DFL reg_max value')
182
+ args = parser.parse_args()
183
+
184
+ session = axe.InferenceSession(args.model)
185
+ input_name = session.get_inputs()[0].name
186
+ output_names = [output.name for output in session.get_outputs()]
187
+ os.makedirs(args.save_path, exist_ok=True)
188
+ imgs = glob.glob(f"{args.img_path}/*.jpg")
189
+ for idx,img in enumerate(imgs):
190
+ print(f"{idx}/{len(imgs)}: {img}")
191
+ input_tensor, original_shape, original_image = preprocess(img, tuple(args.size))
192
+ outputs = session.run(output_names, {input_name: input_tensor})
193
+
194
+ detections = postprocess(
195
+ outputs,
196
+ original_shape,
197
+ tuple(args.size),
198
+ args.conf,
199
+ args.nms,
200
+ len(CLASSES),
201
+ reg_max=args.regmax
202
+ )
203
+
204
+ for det in detections:
205
+ bbox = det.bbox
206
+ score = det.prob
207
+ class_id = det.label
208
+ if class_id >= len(CLASSES):
209
+ label = f"cls{class_id}:{score:.2f}"
210
+ else:
211
+ label = f"{CLASSES[class_id]}:{score:.2f}"
212
+ x, y, w, h = map(int, bbox)
213
+ print(f"class: {label}, bbox: [{x}, {y}, {x+w}, {y+h}], score: {score:.2f}")
214
+ cv2.rectangle(original_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
215
+ cv2.putText(original_image, label, (x, y - 10),
216
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
217
+
218
+ cv2.imwrite(f'{args.save_path}/{os.path.basename(img)}', original_image)
219
+ print(f"结果已保存到 {args.save_path}")
220
+
221
+ if __name__ == '__main__':
222
+ main()
axmodel_infer_yolo26.py ADDED
@@ -0,0 +1,563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import axengine as axe
2
+ import cv2
3
+ import numpy as np
4
+ import time
5
+ import glob
6
+ import os
7
+ import argparse
8
+
9
+ names = [
10
+ "Drone"
11
+ ]
12
+
13
+ def non_max_suppression(
14
+ prediction,
15
+ conf_thres: float = 0.25,
16
+ iou_thres: float = 0.45,
17
+ classes=None,
18
+ agnostic: bool = False,
19
+ multi_label: bool = False,
20
+ labels=(),
21
+ max_det: int = 300,
22
+ nc: int = 0, # number of classes (optional)
23
+ max_time_img: float = 0.05,
24
+ max_nms: int = 30000,
25
+ max_wh: int = 7680,
26
+ rotated: bool = False,
27
+ end2end: bool = False,
28
+ return_idxs: bool = False,
29
+ ):
30
+ """Perform non-maximum suppression (NMS) on prediction results using NumPy only."""
31
+ # Checks
32
+ assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
33
+ assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
34
+ if isinstance(prediction, (list, tuple)):
35
+ prediction = prediction[0]
36
+
37
+ # Convert to numpy if needed
38
+ if not isinstance(prediction, np.ndarray):
39
+ prediction = np.asarray(prediction)
40
+
41
+ if classes is not None:
42
+ classes = np.asarray(classes)
43
+
44
+ if prediction.shape[-1] == 6 or end2end: # end-to-end model (BNC, i.e. 1,300,6)
45
+ output = []
46
+ for pred in prediction:
47
+ mask = pred[:, 4] > conf_thres
48
+ filtered = pred[mask][:max_det]
49
+ if classes is not None:
50
+ class_mask = np.any(filtered[:, 5:6] == classes, axis=1)
51
+ filtered = filtered[class_mask]
52
+ output.append(filtered)
53
+ return output
54
+
55
+ bs = prediction.shape[0] # batch size
56
+ nc = nc or (prediction.shape[1] - 4) # number of classes
57
+ extra = prediction.shape[1] - nc - 4 # number of extra info
58
+ mi = 4 + nc # mask start index
59
+ xc = np.max(prediction[:, 4:mi], axis=1) > conf_thres # candidates
60
+
61
+ # Create index arrays
62
+ xinds = np.arange(prediction.shape[-1], dtype=np.int32)
63
+ xinds_expanded = np.tile(xinds[np.newaxis, :, np.newaxis], (bs, 1, 1))
64
+
65
+ time_limit = 2.0 + max_time_img * bs
66
+ multi_label &= nc > 1
67
+
68
+ prediction = np.transpose(prediction, (0, 2, 1)) # shape(1,6300,84)
69
+ if not rotated:
70
+ prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
71
+
72
+ t = time.time()
73
+ output = []
74
+ keepi = []
75
+
76
+ for xi in range(bs):
77
+ x = prediction[xi]
78
+ xk = xinds_expanded[xi]
79
+
80
+ # Apply confidence threshold
81
+ filt = xc[xi]
82
+ x = x[filt]
83
+ xk_filtered = xk[filt]
84
+
85
+ if x.shape[0] == 0:
86
+ output.append(np.zeros((0, 6 + extra), dtype=np.float32))
87
+ keepi.append(np.zeros((0, 1), dtype=np.int32))
88
+ continue
89
+
90
+ # Split boxes and classes
91
+ box = x[:, :4]
92
+ cls = x[:, 4:4+nc]
93
+ mask = x[:, 4+nc:] if extra > 0 else np.empty((x.shape[0], 0))
94
+
95
+ if multi_label:
96
+ i, j = np.where(cls > conf_thres)
97
+ selected_box = box[i]
98
+ selected_conf = cls[i, j:j+1]
99
+ selected_j = j[:, np.newaxis]
100
+ selected_mask = mask[i]
101
+ x = np.concatenate([selected_box, selected_conf, selected_j.astype(np.float32), selected_mask], axis=1)
102
+ xk_filtered = xk_filtered[i]
103
+ else:
104
+ conf = np.max(cls, axis=1, keepdims=True)
105
+ j = np.argmax(cls, axis=1, keepdims=True)
106
+ filt = conf[:, 0] > conf_thres
107
+ x = np.concatenate([box, conf, j.astype(np.float32), mask], axis=1)[filt]
108
+ xk_filtered = xk_filtered[filt]
109
+
110
+ # Filter by class
111
+ if classes is not None:
112
+ class_mask = np.any(x[:, 5:6] == classes, axis=1)
113
+ x = x[class_mask]
114
+ xk_filtered = xk_filtered[class_mask]
115
+
116
+ n = x.shape[0]
117
+ if n == 0:
118
+ output.append(np.zeros((0, 6 + extra), dtype=np.float32))
119
+ keepi.append(np.zeros((0, 1), dtype=np.int32))
120
+ continue
121
+
122
+ if n > max_nms:
123
+ sorted_idx = np.argsort(-x[:, 4])[:max_nms]
124
+ x = x[sorted_idx]
125
+ xk_filtered = xk_filtered[sorted_idx]
126
+
127
+ # NMS
128
+ c = x[:, 5:6] * (0 if agnostic else max_wh)
129
+ scores = x[:, 4]
130
+
131
+ if not rotated:
132
+ boxes = x[:, :4] + c
133
+ i = numpy_nms(boxes, scores, iou_thres)
134
+ else:
135
+ boxes = np.concatenate([x[:, :2] + c, x[:, 2:4], x[:, -1:]], axis=-1)
136
+ i = numpy_nms(boxes[:, :4], scores, iou_thres) # Simplified for rotated boxes
137
+
138
+ i = i[:max_det]
139
+
140
+ output.append(x[i])
141
+ keepi.append(xk_filtered[i:i].reshape(-1, 1))
142
+
143
+ if (time.time() - t) > time_limit:
144
+ print(f"NMS time limit {time_limit:.3f}s exceeded")
145
+ break
146
+
147
+ return (output, keepi) if return_idxs else output
148
+
149
+
150
+ def numpy_nms(boxes, scores, iou_threshold):
151
+ """Pure NumPy NMS implementation.
152
+
153
+ Args:
154
+ boxes: array of shape (N, 4) in format [x1, y1, x2, y2]
155
+ scores: array of shape (N,)
156
+ iou_threshold: NMS threshold
157
+
158
+ Returns:
159
+ indices of boxes to keep
160
+ """
161
+ if len(boxes) == 0:
162
+ return np.array([], dtype=np.int32)
163
+
164
+ # Get coordinates
165
+ x1 = boxes[:, 0]
166
+ y1 = boxes[:, 1]
167
+ x2 = boxes[:, 2]
168
+ y2 = boxes[:, 3]
169
+
170
+ # Calculate areas
171
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
172
+
173
+ # Sort by score descending
174
+ order = np.argsort(-scores)
175
+
176
+ keep = []
177
+ while len(order) > 0:
178
+ i = order[0]
179
+ keep.append(i)
180
+
181
+ if len(order) == 1:
182
+ break
183
+
184
+ # Calculate intersection with all remaining boxes
185
+ xx1 = np.maximum(x1[i], x1[order[1:]])
186
+ yy1 = np.maximum(y1[i], y1[order[1:]])
187
+ xx2 = np.minimum(x2[i], x2[order[1:]])
188
+ yy2 = np.minimum(y2[i], y2[order[1:]])
189
+
190
+ # Calculate width and height
191
+ w = np.maximum(0, xx2 - xx1 + 1)
192
+ h = np.maximum(0, yy2 - yy1 + 1)
193
+
194
+ # Calculate intersection area
195
+ inter = w * h
196
+
197
+ # Calculate union area
198
+ union = areas[i] + areas[order[1:]] - inter
199
+
200
+ # Calculate IoU
201
+ iou = inter / union
202
+
203
+ # Keep boxes with IoU below threshold
204
+ inds = np.where(iou <= iou_threshold)[0]
205
+ order = order[inds + 1]
206
+
207
+ return np.array(keep, dtype=np.int32)
208
+
209
+ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
210
+
211
+ shape = im.shape[:2]
212
+ if isinstance(new_shape, int):
213
+ new_shape = (new_shape, new_shape)
214
+
215
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
216
+ if not scaleup:
217
+ r = min(r, 1.0)
218
+
219
+ ratio = r, r
220
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
221
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
222
+ if auto:
223
+ dw, dh = np.mod(dw, stride), np.mod(dh, stride)
224
+ elif scaleFill:
225
+ dw, dh = 0.0, 0.0
226
+ new_unpad = (new_shape[1], new_shape[0])
227
+ ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]
228
+
229
+ dw /= 2
230
+ dh /= 2
231
+
232
+ if shape[::-1] != new_unpad:
233
+ im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
234
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
235
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
236
+ im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
237
+ return im, ratio, (dw, dh)
238
+
239
+ def data_process_cv2(frame, input_shape):
240
+ im0 = cv2.imread(frame)
241
+ img = letterbox(im0, input_shape, auto=False, stride=32)[0]
242
+ org_data = img.copy()
243
+ img = np.ascontiguousarray(img)
244
+ img = np.asarray(img, dtype=np.uint8)
245
+ img = np.expand_dims(img, 0)
246
+ return img, im0, org_data
247
+
248
+ # Define xywh2xyxy function for converting bounding box format
249
+ def xywh2xyxy(x):
250
+ y = x.copy()
251
+ y[:, 0] = x[:, 0] - x[:, 2] / 2
252
+ y[:, 1] = x[:, 1] - x[:, 3] / 2
253
+ y[:, 2] = x[:, 0] + x[:, 2] / 2
254
+ y[:, 3] = x[:, 1] + x[:, 3] / 2
255
+ return y
256
+
257
+ def xyxy2xywh(x):
258
+ # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
259
+ y = np.copy(x)
260
+ y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center
261
+ y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center
262
+ y[:, 2] = x[:, 2] - x[:, 0] # width
263
+ y[:, 3] = x[:, 3] - x[:, 1] # height
264
+ return y
265
+
266
+ def post_process_yolo(det, im, im0, gn, save_path, img_name):
267
+ detections = []
268
+ if len(det):
269
+ det[:, :4] = scale_boxes(im.shape[:2], det[:, :4], im0.shape).round()
270
+ colors = Colors()
271
+ for *xyxy, conf, cls in reversed(det):
272
+ print("class:",int(cls), "left:%.0f" % xyxy[0],"top:%.0f" % xyxy[1],"right:%.0f" % xyxy[2],"bottom:%.0f" % xyxy[3], "conf:",'{:.0f}%'.format(float(conf)*100))
273
+ int_coords = [int(tensor.item()) for tensor in xyxy]
274
+ detections.append(int_coords)
275
+ c = int(cls)
276
+ label = names[c]
277
+ res_img = plot_one_box(xyxy, im0, label=f'{label}:{conf:.2f}', color=colors(c, True), line_thickness=4)
278
+ cv2.imwrite(f'{save_path}/{img_name}.jpg',res_img)
279
+ # xywh = (xyxy2xywh(np.array(xyxy,dtype=np.float32).reshape(1, 4)) / gn).reshape(-1).tolist() # normalized xywh
280
+ # line = (cls, *xywh) # label format
281
+ # with open(f'{save_path}/{img_name}.txt', 'a') as f:
282
+ # f.write(('%g ' * len(line)).rstrip() % line + '\n')
283
+ return detections
284
+
285
+ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
286
+ if ratio_pad is None:
287
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])
288
+ pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2
289
+ else:
290
+ gain = ratio_pad[0][0]
291
+ pad = ratio_pad[1]
292
+
293
+ boxes[..., [0, 2]] -= pad[0]
294
+ boxes[..., [1, 3]] -= pad[1]
295
+ boxes[..., :4] /= gain
296
+ clip_boxes(boxes, img0_shape)
297
+ return boxes
298
+
299
+ def clip_boxes(boxes, shape):
300
+ boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])
301
+ boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])
302
+
303
+
304
+ class Colors:
305
+ # Ultralytics color palette https://ultralytics.com/
306
+ def __init__(self):
307
+ """
308
+ Initializes the Colors class with a palette derived from Ultralytics color scheme, converting hex codes to RGB.
309
+ Colors derived from `hex = matplotlib.colors.TABLEAU_COLORS.values()`.
310
+ """
311
+ hexs = (
312
+ "FF3838",
313
+ "FF9D97",
314
+ "FF701F",
315
+ "FFB21D",
316
+ "CFD231",
317
+ "48F90A",
318
+ "92CC17",
319
+ "3DDB86",
320
+ "1A9334",
321
+ "00D4BB",
322
+ "2C99A8",
323
+ "00C2FF",
324
+ "344593",
325
+ "6473FF",
326
+ "0018EC",
327
+ "8438FF",
328
+ "520085",
329
+ "CB38FF",
330
+ "FF95C8",
331
+ "FF37C7",
332
+ )
333
+ self.palette = [self.hex2rgb(f"#{c}") for c in hexs]
334
+ self.n = len(self.palette)
335
+
336
+ def __call__(self, i, bgr=False):
337
+ """Returns color from palette by index `i`, in BGR format if `bgr=True`, else RGB; `i` is an integer index."""
338
+ c = self.palette[int(i) % self.n]
339
+ return (c[2], c[1], c[0]) if bgr else c
340
+
341
+ @staticmethod
342
+ def hex2rgb(h):
343
+ """Converts hex color codes to RGB values (i.e. default PIL order)."""
344
+ return tuple(int(h[1 + i: 1 + i + 2], 16) for i in (0, 2, 4))
345
+
346
+ def plot_one_box(x, im, color=None, label=None, line_thickness=3, steps=2, orig_shape=None):
347
+ # Ensure image is contiguous
348
+ if not im.flags['C_CONTIGUOUS']:
349
+ im = np.ascontiguousarray(im)
350
+
351
+ tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1
352
+ c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
353
+ cv2.rectangle(im, c1, c2, color, thickness=tl*1//3, lineType=cv2.LINE_AA)
354
+ if label:
355
+ if len(label.split(':')) > 1:
356
+ tf = max(tl - 1, 1)
357
+ t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf)[0]
358
+ c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
359
+ cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)
360
+ cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 6, [225, 255, 255], thickness=tf//2, lineType=cv2.LINE_AA)
361
+ return im
362
+
363
+ def model_load(model):
364
+ providers = ['AxEngineExecutionProvider']
365
+ session = axe.InferenceSession(model, providers=providers)
366
+ input_name = session.get_inputs()[0].name
367
+ output_names = [ x.name for x in session.get_outputs()]
368
+ return session, output_names
369
+
370
+ def make_anchors(feats, strides, grid_cell_offset=0.5):
371
+ """Generate anchors from features."""
372
+ anchor_points, stride_tensor = [], []
373
+ assert feats is not None
374
+ dtype = feats[0].dtype
375
+ for i, stride in enumerate(strides):
376
+ h, w = feats[i].shape[2:] if isinstance(feats, list) else (int(feats[i][0]), int(feats[i][1]))
377
+ sx = np.arange(w, dtype=dtype) + grid_cell_offset # shift x
378
+ sy = np.arange(h, dtype=dtype) + grid_cell_offset # shift y
379
+ sy, sx = np.meshgrid(sy, sx, indexing='ij')
380
+ anchor_points.append(np.stack((sx, sy), axis=-1).reshape(-1, 2))
381
+ stride_tensor.append(np.full((h * w, 1), stride, dtype=dtype))
382
+ return np.concatenate(anchor_points), np.concatenate(stride_tensor)
383
+
384
+ def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
385
+ """Transform distance(ltrb) to box(xywh or xyxy)."""
386
+ lt, rb = np.split(distance, 2, axis=dim)
387
+ x1y1 = anchor_points - lt
388
+ x2y2 = anchor_points + rb
389
+ if xywh:
390
+ c_xy = (x1y1 + x2y2) / 2
391
+ wh = x2y2 - x1y1
392
+ return np.concatenate((c_xy, wh), axis=dim) # xywh bbox
393
+ return np.concatenate((x1y1, x2y2), axis=dim) # xyxy bbox
394
+
395
+
396
+ class DFL:
397
+ """
398
+ NumPy implementation of Distribution Focal Loss (DFL) integral module.
399
+ Original paper: Generalized Focal Loss (IEEE TPAMI 2023)
400
+ """
401
+
402
+ def __init__(self, c1=16):
403
+ """Initialize with given number of distribution channels"""
404
+ self.c1 = c1
405
+ # 初始化权重矩阵(等效于原conv层的固定权重)
406
+ self.weights = np.arange(c1, dtype=np.float32).reshape(1, c1, 1, 1)
407
+
408
+
409
+ def __call__(self, x):
410
+ """
411
+ 前向传播逻辑
412
+ 参数:
413
+ x: 输入张量,形状为(batch, channels, anchors)
414
+ 返回:
415
+ 处理后的张量,形状为(batch, 4, anchors)
416
+ """
417
+ b, c, a = x.shape
418
+
419
+ # 等效于原view->transpose->softmax操作
420
+ x_reshaped = x.reshape(b, 4, self.c1, a)
421
+ x_transposed = np.transpose(x_reshaped, (0, 2, 1, 3))
422
+ x_softmax = np.exp(x_transposed) / np.sum(np.exp(x_transposed), axis=1, keepdims=True)
423
+
424
+ # 等效卷积操作(通过张量乘积实现)
425
+ conv_result = np.sum(self.weights * x_softmax, axis=1)
426
+
427
+ return conv_result.reshape(b, 4, a)
428
+
429
+ class YOLO26Detector:
430
+ def __init__(self, model_path, imgsz=[640,640]):
431
+ self.model_path = model_path
432
+ self.session, self.output_names = model_load(self.model_path)
433
+ self.imgsz = imgsz
434
+ self.stride = [8.,16.,32.]
435
+ self.reg_max = 1
436
+ self.nc = len(names)
437
+ self.nl = len(self.stride)
438
+ self.dfl = DFL(self.reg_max)
439
+ self.max_det = 300
440
+
441
+ def postprocess(self, preds: np.ndarray) -> np.ndarray:
442
+ """Post-processes YOLO model predictions using NumPy.
443
+
444
+ Args:
445
+ preds (np.ndarray): Raw predictions with shape (batch_size, num_anchors, 4 + nc)
446
+
447
+ Returns:
448
+ (np.ndarray): Processed predictions with shape (batch_size, min(max_det, num_anchors), 6)
449
+ """
450
+ boxes = preds[:, :, :4]
451
+ scores = preds[:, :, 4:]
452
+ scores_topk, conf, idx = self.get_topk_index(scores, self.max_det)
453
+
454
+ # Gather corresponding boxes
455
+ boxes_selected = boxes[np.arange(boxes.shape[0])[:, None], idx[:, :, 0].astype(int)]
456
+
457
+ return np.concatenate([boxes_selected, scores_topk, conf], axis=-1)
458
+
459
+ def get_topk_index(self, scores: np.ndarray, max_det: int) -> tuple:
460
+ """Get top-k indices from scores using NumPy.
461
+
462
+ Args:
463
+ scores (np.ndarray): Scores array with shape (batch_size, num_anchors, num_classes).
464
+ max_det (int): Maximum detections per image.
465
+
466
+ Returns:
467
+ (tuple): Top scores, class indices, and filtered indices.
468
+ """
469
+ batch_size, anchors, nc = scores.shape
470
+ k = max_det
471
+
472
+ # Get max class score for each anchor: shape (batch_size, anchors)
473
+ max_scores = np.max(scores, axis=2)
474
+
475
+ # Get top-k indices for each batch
476
+ # Using argsort for each batch separately
477
+ output_scores = np.zeros((batch_size, k, 1), dtype=np.float32)
478
+ output_classes = np.zeros((batch_size, k, 1), dtype=np.float32)
479
+ output_indices = np.zeros((batch_size, k, 1), dtype=np.int32)
480
+
481
+ for b in range(batch_size):
482
+ # Get topk indices from max_scores
483
+ topk_indices = np.argsort(-max_scores[b])[:k]
484
+
485
+ # Pad if needed
486
+ if len(topk_indices) < k:
487
+ topk_indices = np.pad(topk_indices, (0, k - len(topk_indices)), mode='constant')
488
+
489
+ # Get scores for topk indices
490
+ topk_scores_array = scores[b, topk_indices] # shape (k, nc)
491
+
492
+ # Get class with max score
493
+ class_indices = np.argmax(topk_scores_array, axis=1)
494
+ topk_values = np.max(topk_scores_array, axis=1)
495
+
496
+ output_scores[b, :, 0] = topk_values
497
+ output_classes[b, :, 0] = class_indices
498
+ output_indices[b, :, 0] = topk_indices
499
+
500
+ return output_scores, output_classes, output_indices
501
+
502
+ def detect_objects(self, image, save_path, conf_threshold, nms_threshold):
503
+ im, im0, org_data = data_process_cv2(image, self.imgsz)
504
+ img_name = os.path.basename(image).split('.')[0]
505
+ x = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
506
+ x = [np.transpose(x[i],(0,3,1,2)) for i in range(self.nl)] #to nchw
507
+ anchors, strides = (np.transpose(x_arr, (1, 0)) for x_arr in make_anchors(x, self.stride, 0.5))
508
+ box = [x[i][:, :self.reg_max * 4, :] for i in range(self.nl)]
509
+ cls = [x[i][:, self.reg_max * 4:, :] for i in range(self.nl)]
510
+ boxes = np.concatenate([box[i].reshape(1, 4 * self.reg_max, -1) for i in range(self.nl)], axis=-1)
511
+ scores = np.concatenate([cls[i].reshape(1, self.nc, -1) for i in range(self.nl)], axis=-1)
512
+ if self.reg_max > 1:
513
+ dbox = dist2bbox(self.dfl(boxes), np.expand_dims(anchors, axis=0), xywh=False, dim=1) * strides
514
+ else: # 弃用DFL
515
+ dbox = dist2bbox(boxes, np.expand_dims(anchors, axis=0), xywh=False, dim=1) * strides
516
+ # y = np.concatenate((dbox, 1/(1 + np.exp(-scores))), axis=1)
517
+ scores = scores.astype(np.float32)
518
+ sigmoid_scores = np.zeros_like(scores)
519
+
520
+ # 对非负数和负数分别使用不同的公式,防止 exp 溢出
521
+ sigmoid_scores[scores >= 0] = 1.0 / (1 + np.exp(-scores[scores >= 0]))
522
+ sigmoid_scores[scores < 0] = np.exp(scores[scores < 0]) / (1 + np.exp(scores[scores < 0]))
523
+
524
+ y = np.concatenate((dbox, sigmoid_scores), axis=1)
525
+ y = y.transpose([0, 2, 1])
526
+ pred = self.postprocess(y) # Now returns numpy array directly
527
+ pred = non_max_suppression(
528
+ pred,
529
+ conf_threshold,
530
+ nms_threshold,
531
+ None,
532
+ False,
533
+ max_det=self.max_det,
534
+ nc=0,
535
+ end2end=True,
536
+ rotated=False,
537
+ return_idxs=None,
538
+ )
539
+ gn = np.array(org_data.shape)[[1, 0, 1, 0]].astype(np.float32)
540
+ res = post_process_yolo(pred[0], org_data, im0, gn, save_path, img_name)
541
+ return res, im0
542
+
543
+
544
+ if __name__ == '__main__':
545
+
546
+ parser = argparse.ArgumentParser(description="YOLO12 AXEngine Inference")
547
+ parser.add_argument('--model', type=str, default='yolo26s_drone_650_u16.axmodel', help='Model path')
548
+ parser.add_argument('--img_path', type=str, default='./test', help='Image path')
549
+ parser.add_argument('--save_path', type=str, default='./drone_yolo26_res', help='Save path')
550
+ parser.add_argument('--conf', type=float, default=0.3, help='Confidence threshold')
551
+ parser.add_argument('--nms', type=float, default=0.45, help='NMS threshold')
552
+ parser.add_argument('--size', type=int, nargs=2, default=[640, 640], help='Input size W H')
553
+ args = parser.parse_args()
554
+
555
+ detector = YOLO26Detector(model_path=args.model, imgsz=args.size)
556
+ img_path = args.img_path
557
+ det_path = args.save_path
558
+ os.makedirs(det_path, exist_ok=True)
559
+ imgs = glob.glob(f"{img_path}/*.jpg")
560
+ for idx,img in enumerate(imgs):
561
+ print(f"{idx}/{len(imgs)}: {img}")
562
+ pic_name=os.path.basename(img).split('.')[0]
563
+ det_result, res_img = detector.detect_objects(img,det_path,args.conf, args.nms)
drone_yolo11_res/23.jpg ADDED

Git LFS Details

  • SHA256: 525258545243dcff5effc4d35061f5995e13f61a9e1493ba8d9c91c69ac12092
  • Pointer size: 130 Bytes
  • Size of remote file: 40.1 kB
drone_yolo26_res/23.jpg ADDED

Git LFS Details

  • SHA256: bda2994802f1df7bc24fb86c91a035c4699e65dc6ae6126c48c32e5a91771c9a
  • Pointer size: 130 Bytes
  • Size of remote file: 40.4 kB
test/23.jpg ADDED

Git LFS Details

  • SHA256: e0909d98ca629d9d97a2687240351fa7d95a5d97056c7dff6065b06f5aa76bab
  • Pointer size: 130 Bytes
  • Size of remote file: 21.1 kB