i have c++ detection send frame and dectection wit...
Created on: October 15, 2025
Answered using GPT-5 Thinking by Chat01
Created on: October 15, 2025
Answered using GPT-5 Thinking by Chat01
i have c++ detection send frame and dectection with shared memory to python tracker but sometimes i have a problem
write copelete code for both side
and explain clearly
import sys, os, time, math, argparse, mmap, ctypes
import numpy as np
import cv2
from loguru import logger
import torch
import sys
import os
import sys
sys.path.append("/media/rayfan/d/projects/HOP_system/myhop")
from yolox.data.datasets import COCO_CLASSES
from yolox.utils.visualize import plot_tracking
from yolox.tracker.hop_tracker import HOPTracker, pixel_distribution
from track_utils_shm import dbscan_clustering, detection_rate_adjuster
from shm_layout import ShmBlock, RING_SIZE, MAX_DETS
def open_shm(shm_name: str):
# POSIX shm_open یک فایل در /dev/shm میسازد: /dev/shm/<name_without_slash>
path = f"/dev/shm/{shm_name.lstrip('/')}"
fd = os.open(path, os.O_RDWR)
size = ctypes.sizeof(ShmBlock)
mm = mmap.mmap(fd, size, mmap.MAP_SHARED, mmap.PROT_READ | mmap.PROT_WRITE)
os.close(fd)
block = ShmBlock.from_buffer(mm)
return mm, block
def read_slot(block: ShmBlock, frame_id_expected: int, timeout_ms=1000):
t0 = time.time()
# frame_id_expected = 0
while True:
last = block.write_frame
if last >= frame_id_expected:
slot_id = frame_id_expected % RING_SIZE
slot = block.slots[slot_id]
return slot # فقط وقتی slot آماده است برگردان
# else:
# continue
if (time.time() - t0) * 1000 > timeout_ms:
return None
time.sleep(0.001)
def slot_to_tensor(slot):
n = int(slot.num_dets)
if n <= 0:
return None
arr = np.zeros((n, 6), dtype=np.float32)
for i in range(n):
d = slot.dets[i]
arr[i, 0] = d.x1
arr[i, 1] = d.y1
arr[i, 2] = d.x2
arr[i, 3] = d.y2
arr[i, 4] = d.score
arr[i, 5] = float(d.class_id)
return arr
def read_frame(block: ShmBlock, frame_id_expected: int, timeout_ms=1000):
"""
فرض: هر slot یک فریم RGB uint8 با shape (H, W, 3) دارد که در block.slots[slot_id].frame قرار دارد
"""
t0 = time.time()
while True:
last = block.write_frame
if last >= frame_id_expected:
slot_id = frame_id_expected % RING_SIZE
slot = block.slots[slot_id]
# frame = np.frombuffer(slot.frame, dtype=np.uint8)
# frame = np.frombuffer(slot.img, dtype=np.uint8).reshape((HEIGHT_MAX, WIDTH_MAX, 3))
frame = np.frombuffer(slot.img, dtype=np.uint8).reshape((slot.img_h, slot.img_w, 3))
# frame = frame.reshape((block.height, block.width, 3))
# frame = frame.reshape((block.img_h, block.img_w, 3))
textreturn frame, slot if (time.time() - t0) * 1000 > timeout_ms: return None, None time.sleep(0.001)
from ctypes import Structure, c_uint64, c_int32, c_float, c_uint8
WIDTH_MAX = 640
HEIGHT_MAX = 640
MAX_DETS = 1024
RING_SIZE = 8
class Detection(Structure):
pack = 1
fields = [
("x1", c_float),
("y1", c_float),
("x2", c_float),
("y2", c_float),
("score", c_float),
("class_id", c_int32)
]
class Slot(Structure):
pack = 1
fields = [
("frame_id", c_uint64), # شماره فریم
("ready", c_int32), # 0: خالی, 1: آماده خواندن
("num_dets", c_int32), # تعداد دیتکشنهای معتبر
("img_w", c_int32),
("img_h", c_int32),
("fps", c_float),
("dets", Detection * MAX_DETS),
("img", c_uint8 * (HEIGHT_MAX * WIDTH_MAX * 3)) # RGB
]
class ShmBlock(Structure):
pack = 1
fields = [
("write_frame", c_uint64), # آخرین فریم نوشتهشده
("read_frame", c_uint64), # آخرین فریم خواندهشده
("ring_size", c_int32), # = RING_SIZE
("max_dets", c_int32), # = MAX_DETS
("img_w", c_int32),
("img_h", c_int32),
("fps", c_float),
("slots", Slot * RING_SIZE)
]
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-s', "--source", type=str, default="video", help="video or webcam")
# parser.add_argument('-p', "--path", type=str, required=True, help="input video")
parser.add_argument('-p', '--path', default=None, help='path to video file') # <- default=None makes it optional
parser.add_argument("-m", "--model", type=str, default="yolov5", help="kept for compatibility")
parser.add_argument("--save_result", action="store_true")
parser.add_argument("--dynamic", dest="dynamic", action="store_true")
parser.add_argument("--swift", dest="swift", action="store_true")
parser.add_argument("--accurate",dest="accurate",action="store_true", default=True)
#added by me this line
parser.add_argument("--dis_traj", action="store_true", help="display trajectories")
text# tracking args (همان قبلی) parser.add_argument("--track_thresh", type=float, default=0.3) parser.add_argument("--track_buffer", type=int, default=200) #60 parser.add_argument("--match_thresh", type=float, default=0.2) parser.add_argument("--aspect_ratio_thresh", type=float, default=0.25) parser.add_argument('--min_box_area', type=float, default=1) parser.add_argument("--shm", type=str, default="/yolo_shm", help="shared memory name") args = parser.parse_args() shm_mm, shm = open_shm(args.shm) # # ویدیو برای نمایش/ذخیره # cap = cv2.VideoCapture(args.path) # if not cap.isOpened(): # print("Failed to open video") # return # width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # fps = cap.get(cv2.CAP_PROP_FPS) or shm.fps or 30.0 width = shm.img_w height = shm.img_h # width = 640 # height = 640 fps = shm.fps if args.save_result: fourcc = cv2.VideoWriter_fourcc(*"mp4v") out = cv2.VideoWriter("tracked.mp4", fourcc, fps, (width, height)) else: out = None # انتخاب استراتژی نمونهبرداری (مثل قبل) if args.swift: sampling_strategy = 1 elif args.accurate: sampling_strategy = 2 else: sampling_strategy = 0 detection_rate = 9 # مقدار اولیه if args.path is None: args.path = "no_video" tracker = HOPTracker(args, frame_rate=math.ceil(fps)) light_multi_tracker = cv2.legacy.MultiTracker_create() frame_id = 1 predicted_bbox = [] online_targets = [] light_tracker_id = [] # حلقه اصلی while True: # ok, frame = cap.read() # if not ok: break # slot_id = frame_id % RING_SIZE # slot = shm.slots[slot_id] # frame = np.frombuffer(slot.img, dtype=np.uint8).reshape((HEIGHT_MAX, WIDTH_MAX, 3)) # slot , frame = read_frame(shm, frame_id) # if frame is None: # time.sleep(0.002) # continue # frame, slot = read_frame(shm, frame_id) # if frame is None or slot is None: # time.sleep(0.002) # continue # خواندن frame و slot به ترتیب صحیح (read_frame -> returns (frame, slot)) frame, slot = read_frame(shm, frame_id) # if frame is None or slot is None: if frame is None: print("frame is none") time.sleep(0.002) continue if slot is None: print("slot is none") time.sleep(0.002) continue frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # # محافظت اضافی (اگر به هر دلیلی frame شیء اشتباهی شد) # if not isinstance(frame, np.ndarray): # logger.error(f"Expected numpy.ndarray for frame but got {type(frame)} — skipping frame {frame_id}") # time.sleep(0.002) # continue # # هر فریم، دیتکشن مربوط به آن را از shm میخوانیم (C++ هر فریم نوشته) # slot = read_slot(shm, frame_id) # if slot is None: # # اگر C++ عقب افتاده، فریم را اسکپ میکنیم یا منتظر میمانیم # time.sleep(0.002) # continue # # تعریف دیتکشن خالی برای تست # slot = type('Slot', (), {})() # ساخت یک آبجکت خالی # slot.num_dets = 0 # slot.dets = [] # Before tracking start_time = time.time() # برای هماهنگی با ساختار قبلی: فقط هر N فریم از دیتکشن استفاده کن if frame_id % 4 == 1: # (قبلاً frame_id % detection_rate == 1) light_multi_tracker.clear() light_multi_tracker = cv2.legacy.MultiTracker_create() # dets = slot_to_tensor(slot) # Nx6 [x1 y1 x2 y2 score class] # if dets is None: # online_im = frame # else: # # detect + track fuse (مثل قبل) # online_targets = tracker.detect_track_fuse( # dets, [height, width, frame], (640, 640) # ) dets = slot_to_tensor(slot) # Nx6 [x1 y1 x2 y2 score class] if dets is None: print("dets is None") online_im = frame else: # تبدیل numpy.ndarray به torch.Tensor روی CPU dets = torch.from_numpy(dets).float().cpu() print("/////////////////////////////////////////////////////////////////") print("dets = ", dets) for row in dets: print([f"{v:.2f}" for v in row]) print("height = ", height) print("width = ", width) print("frame = ", frame) online_targets = tracker.detect_track_fuse( dets, [height, width, frame], (640, 640) ) online_tlwhs, online_ids, online_scores, online_class_id = [], [], [], [] predicted_bbox = []; light_tracker_id = [] for t in online_targets: print("online_targets=", online_targets) t.last_detected_frame = frame_id tlwh = t.tlwh; tid = t.track_id tlbr = np.asarray(tlwh).copy(); tlbr[2:] += tlbr[:2] tlbr = np.append(tlbr, [t.score, t.class_id, tid, *t.mean]) predicted_bbox.append(tlbr) if tlwh[2]*tlwh[3] > args.min_box_area and tlwh[3]/tlwh[2] >= args.aspect_ratio_thresh: hist_b, hist_g, hist_r = pixel_distribution( frame, int(t.tlwh[0]), int(t.tlwh[1]), int(t.tlwh[2]), int(t.tlwh[3]) ) t.color_dist = [hist_b, hist_g, hist_r] # لایتترکر روی باکسهای ایستا if t.mean[4]==0 and t.mean[5]==0 and t.mean[6]==0 and t.mean[7]==0: light_multi_tracker.add(cv2.legacy.TrackerMedianFlow_create(), frame, (int(t.tlwh[0]), int(t.tlwh[1]), int(t.tlwh[2]), int(t.tlwh[3]))) light_tracker_id.append(tid) online_tlwhs.append(tlwh) online_ids.append(tid) online_scores.append(t.score) online_class_id.append(t.class_id) # رندر online_im = plot_tracking( image=frame, tlwhs=online_tlwhs, obj_ids=online_ids, online_class_id=online_class_id, frame_id=frame_id, fps=fps, scores=online_scores, class_names=COCO_CLASSES ) else: # فریمهای میانی: فقط آپدیت کالمن/لایتترکر (مثل ساختار قبلی) if len(predicted_bbox) != 0 and len(online_targets) != 0: if frame_id % max(4, detection_rate) <= 3: light_ok, light_bbox = light_multi_tracker.update(frame) else: light_ok, light_bbox = False, [] predict_bbox = [] for idx, each_track in enumerate(online_targets): if light_ok and each_track.track_id in light_tracker_id and len(light_bbox)!=0: lid = light_tracker_id.index(each_track.track_id) nb = light_bbox[lid] predict_bbox.append([nb[0], nb[1], nb[0]+nb[2], nb[1]+nb[3], predicted_bbox[idx][4], predicted_bbox[idx][5]]) else: new_x = each_track.mean[0] + each_track.mean[4] new_y = each_track.mean[1] + each_track.mean[5] new_a = each_track.mean[2] + each_track.mean[6] new_h = each_track.mean[3] + each_track.mean[7] new_w = new_a * new_h tlwh = [new_x - new_w/2, new_y - new_h/2, new_w, new_h] tlbr = np.asarray(tlwh).copy(); tlbr[2:] += tlbr[:2] tlbr = np.append(tlbr, [predicted_bbox[idx][4], predicted_bbox[idx][5]]) predict_bbox.append(tlbr) predicted_bbox = np.array(predict_bbox, dtype=np.float32) online_targets = tracker.hopping_update( predicted_bbox, [height, width, frame], (640, 640) ) online_tlwhs, online_ids, online_scores, online_class_id = [], [], [], [] predicted_bbox = [] for t in online_targets: tlwh = t.tlwh; tid = t.track_id tlbr = np.asarray(tlwh).copy(); tlbr[2:] += tlbr[:2] tlbr = np.append(tlbr, [t.score, t.class_id, tid, *t.mean]) predicted_bbox.append(tlbr) if tlwh[2]*tlwh[3] > args.min_box_area and tlwh[3]/tlwh[2] >= args.aspect_ratio_thresh: online_tlwhs.append(tlwh); online_ids.append(tid) online_scores.append(t.score); online_class_id.append(t.class_id) online_im = plot_tracking( image=frame, tlwhs=online_tlwhs, obj_ids=online_ids, online_class_id=online_class_id, frame_id=frame_id, fps=fps, scores=online_scores, class_names=COCO_CLASSES ) else: online_im = frame # Add this right after tracking end_time = time.time() tracker_time_ms = (end_time - start_time) * 1000 print(f"Frame {frame_id}: tracker time = {tracker_time_ms:.2f} ms") # نمونهبرداری داینامیک (اختیاری، مثل قبل) if (frame_id % 2) == 0 and len(online_targets) != 0: #5 cluster_dic, cluster_num = dbscan_clustering(online_targets) detection_rate = detection_rate_adjuster(cluster_dic, cluster_num, os.path.basename(args.path), sampling_strategy) # خروجی/نمایش if out is not None: out.write(online_im) else: cv2.imshow("HOPTrack (Python Tracker + C++ TRT Detector)", online_im) if cv2.waitKey(1) & 0xFF in [27, ord('q'), ord('Q')]: break frame_id += 1 if out is not None: out.release() # cap.release() cv2.destroyAllWindows() shm_mm.close()
if name == "main":
main()
do yo want to send you c++ side?
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <cmath>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#include <cstring>
#include <algorithm>
#include <opencv2/opencv.hpp>
#include "NvInfer.h"
#include "cuda_runtime.h"
#include "shm_layout.h"
#include <numeric> // for std::iota
using namespace nvinfer1;
class Logger : public nvinfer1::ILogger
{
void log(Severity severity, const char* msg) noexcept override
{
// suppress info-level messages
if (severity <= Severity::kWARNING)
std::cout << msg << std::endl;
}
} logger;
// ---------- سادهساز مدیریت CUDA ----------
#define CHECK_CUDA(expr) do { cudaError_t __e = (expr); if (__e != cudaSuccess) {
std::cerr << "CUDA Error: " << cudaGetErrorString(__e) << " @ " << FILE << ":" << LINE << "\n"; exit(1);} } while(0)
struct Args {
std::string path;
std::string trt;
std::string shm_name = "/yolo_shm";
int img = 640;
float conf = 0.25f;
float nms = 0.45f;
bool dynamic = false;
};
static void parse_args(int argc, char** argv, Args& a) {
for (int i=1; i<argc; ++i) {
std::string k = argv[i];
auto next = &{ if(i+1>=argc) {std::cerr<<"Missing value for "<<k<<"\n"; exit(1);} return std::string(argv[++i]); };
if (k=="--path" || k=="-p") a.path = next();
else if (k=="--trt" || k=="-trt") a.trt = next();
else if (k=="--img") a.img = std::stoi(next());
else if (k=="--conf") a.conf = std::stof(next());
else if (k=="--nms") a.nms = std::stof(next());
else if (k=="--dynamic" || k=="--dyn" || k=="--dynamic-shape") a.dynamic = true;
else if (k=="--shm") a.shm_name = next();
else { std::cerr<<"Unknown arg: "<<k<<"\n"; exit(1); }
}
if (a.path.empty() || a.trt.empty()) {
std::cerr<<"Usage: detector_trt_shm --path <video> --trt <engine.trt> [--img 640] [--conf 0.25] [--nms 0.45] [--dynamic] [--shm /yolo_shm]\n";
exit(1);
}
}
// ---------- ابزار Letterbox ----------
struct LetterboxInfo { float scale; int pad_w; int pad_h; int net; };
static cv::Mat letterbox(const cv::Mat& img, int net_size, LetterboxInfo& info) {
int w = img.cols, h = img.rows; info.net = net_size;
float r = std::min((float)net_size / h, (float)net_size / w);
int new_unpad_w = std::round(w * r);
int new_unpad_h = std::round(h * r);
cv::Mat resized; cv::resize(img, resized, cv::Size(new_unpad_w, new_unpad_h), 0, 0, cv::INTER_LINEAR);
int dw = net_size - new_unpad_w;
int dh = net_size - new_unpad_h;
info.scale = r;
info.pad_w = dw/2;
info.pad_h = dh/2;
cv::Mat out(net_size, net_size, CV_8UC3, cv::Scalar(114,114,114));
resized.copyTo(out(cv::Rect(info.pad_w, info.pad_h, resized.cols, resized.rows)));
return out;
}
static inline float iou(const cv::Rect2f& a, const cv::Rect2f& b) {
float inter = (a & b).area();
float uni = a.area() + b.area() - inter + 1e-6f;
return inter / uni;
}
struct Det { cv::Rect2f box; float score; int cls; };
// NMS ساده
static std::vector<Det> nms(const std::vector<Det>& dets, float iou_thr) {
std::vector<int> idxs(dets.size());
std::iota(idxs.begin(), idxs.end(), 0);
std::sort(idxs.begin(), idxs.end(), [&](int i, int j){ return dets[i].score > dets[j].score; });
std::vector<Det> keep;
std::vector<char> removed(dets.size(), 0);
for (size_t _i=0; _i<idxs.size(); ++_i) {
int i = idxs[_i]; if (removed[i]) continue;
keep.push_back(dets[i]);
for (size_t _j=_i+1; _j<idxs.size(); ++_j) {
int j = idxs[_j]; if (removed[j]) continue;
if (iou(dets[i].box, dets[j].box) > iou_thr) removed[j] = 1;
}
}
return keep;
}
// دیسریالایز TRT
static ICudaEngine* load_engine(const std::string& path, IRuntime*& runtime) {
std::ifstream f(path, std::ios::binary);
if(!f) { std::cerr<<"Cannot open engine: "<<path<<"\n"; exit(1); }
f.seekg(0, std::ios::end); size_t sz = f.tellg(); f.seekg(0, std::ios::beg);
std::vector<char> buf(sz); f.read(buf.data(), sz);
textruntime = nvinfer1::createInferRuntime(logger); if (!runtime) { std::cerr<<"Failed to create runtime\n"; exit(1); } auto* eng = runtime->deserializeCudaEngine(buf.data(), sz); if (!eng) { std::cerr<<"Failed to deserialize engine\n"; exit(1); } return eng;
}
// کمکی: پیدا کردن اندیس ورودی/خروجی
static int find_binding_index(ICudaEngine* engine, bool input=true) {
for (int i=0;i<engine->getNbIOTensors();++i) {
const char* name = engine->getIOTensorName(i);
TensorIOMode mode = engine->getTensorIOMode(name);
if ( (input && mode==TensorIOMode::kINPUT) || (!input && mode==TensorIOMode::kOUTPUT) ) {
return i;
}
}
return -1;
}
// پارس کردن یخروجیهای متداول YOLOv5 (دو حالت معروف)
static void parse_yolov5_output(const float* out, const Dims& dims, float conf_thr, int net,
const LetterboxInfo& lb, int img_w, int img_h,
std::vector<Det>& out_dets)
{
// حالت A: (1, N, 85) یا (N, 85)
int nb = 1, N = 0, C = 0;
if (dims.nbDims==3) { nb = dims.d[0]; N = dims.d[1]; C = dims.d[2]; }
else if (dims.nbDims==2) { N = dims.d[0]; C = dims.d[1]; }
else { // حالت B: (1, 25200, 7) یا (M, 6)
// تلاش برای حالت عمومی
}
std::cout << "Output dims: nbDims=" << dims.nbDims;
for (int i=0;i<dims.nbDims;i++) std::cout << " " << dims.d[i];
std::cout << std::endl;
textif (C >= 6 && N > 0) { for (int i=0;i<N;i++) { const float* p = out + i*C; float x = p[0], y = p[1], w = p[2], h = p[3]; float obj = p[4]; // پیدا کردن بیشینه کلاس int best_cls = -1; float best_conf = 0.f; for (int k=5;k<C;k++) { float c = p[k]; if (c > best_conf) { best_conf = c; best_cls = k-5; } } float score = obj * best_conf; if (score < conf_thr) continue; // xywh → x1y1x2y2 در فضای شبکه float x1 = x - w/2.f; float y1 = y - h/2.f; float x2 = x + w/2.f; float y2 = y + h/2.f; // برگرداندن به تصویر اصلی (حذف پدینگ و اسکیل برعکس) x1 -= lb.pad_w; x2 -= lb.pad_w; y1 -= lb.pad_h; y2 -= lb.pad_h; x1 /= lb.scale; x2 /= lb.scale; y1 /= lb.scale; y2 /= lb.scale; // کِلَمپ به محدوده تصویر x1 = std::max(0.f, std::min((float)img_w-1.f, x1)); y1 = std::max(0.f, std::min((float)img_h-1.f, y1)); x2 = std::max(0.f, std::min((float)img_w-1.f, x2)); y2 = std::max(0.f, std::min((float)img_h-1.f, y2)); out_dets.push_back({ cv::Rect2f(cv::Point2f(x1,y1), cv::Point2f(x2,y2)), score, best_cls }); } return; } // حالت جایگزین: اگر خروجی مستقیم [x1,y1,x2,y2,score,cls] بود if (C==6 || C==7) { for (int i=0;i<N;i++) { const float* p = out + i*C; float x1=p[0], y1=p[1], x2=p[2], y2=p[3], score=p[4]; int cls = (int)std::round(p[5]); if (score < conf_thr) continue; // اسکیل برعکس (احتمالاً لازم نیست اگر دکدر در TRT انجام شده) out_dets.push_back({ cv::Rect2f(cv::Point2f(x1,y1), cv::Point2f(x2,y2)), score, cls }); } return; } // اگر هیچکدام نبود: // برای سادگی، اینجا خروجی ناشناخته تلقی میشود. std::cerr << "[WARN] Unrecognized output shape; please adapt parser.\n";
}
static size_t shm_size() { return sizeof(ShmBlock); }
int main(int argc, char** argv) {
Args args; parse_args(argc, argv, args);
text// --- Shared Memory --- int fd = shm_open(args.shm_name.c_str(), O_CREAT | O_RDWR, 0666); if (fd < 0) { perror("shm_open"); return 1; } if (ftruncate(fd, shm_size()) < 0) { perror("ftruncate"); return 1; } void* base = mmap(nullptr, shm_size(), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (base == MAP_FAILED) { perror("mmap"); return 1; } auto* shm = reinterpret_cast<ShmBlock*>(base); std::memset(shm, 0, sizeof(ShmBlock)); shm->ring_size = RING_SIZE; shm->max_dets = MAX_DETS; // --- OpenCV video --- cv::VideoCapture cap(args.path); if (!cap.isOpened()) { std::cerr<<"Cannot open video: "<<args.path<<"\n"; return 1; } int img_w = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int img_h = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); float fps = (float)cap.get(cv::CAP_PROP_FPS); shm->img_w = img_w; shm->img_h = img_h; shm->fps = fps; // --- TensorRT --- IRuntime* runtime=nullptr; ICudaEngine* engine = load_engine(args.trt, runtime); // اینجا پیام موفقیت اضافه شود std::cout << "TensorRT engine loaded successfully from " << args.trt << "\n"; IExecutionContext* ctx = engine->createExecutionContext(); if (!ctx) { std::cerr<<"Failed to create TRT context\n"; return 1; } // پیدا کردن نامها/ایندکس IO int in_idx = find_binding_index(engine, true); int out_idx = find_binding_index(engine, false); const char* in_name = engine->getIOTensorName(in_idx); const char* out_name = engine->getIOTensorName(out_idx); // تنظیم شکل ورودی برای داینامیک if (args.dynamic) { Dims inDims; inDims.nbDims = 4; inDims.d[0]=1; inDims.d[1]=3; inDims.d[2]=args.img; inDims.d[3]=args.img; if (!ctx->setInputShape(in_name, inDims)) { std::cerr<<"setInputShape failed\n"; return 1; } } // اندازه بافرها Dims inDims = ctx->getTensorShape(in_name); Dims outDims= ctx->getTensorShape(out_name); size_t in_size = 1; for (int i=0;i<inDims.nbDims;i++) in_size *= inDims.d[i]; size_t out_size = 1; for (int i=0;i<outDims.nbDims;i++) out_size *= outDims.d[i]; // بافرهای GPU/CPU float* d_in=nullptr; float* d_out=nullptr; CHECK_CUDA(cudaMalloc(&d_in, in_size * sizeof(float))); CHECK_CUDA(cudaMalloc(&d_out, out_size * sizeof(float))); std::vector<float> h_out(out_size); // استریم cudaStream_t stream; CHECK_CUDA(cudaStreamCreate(&stream)); // اجرای حلقه uint64_t frame_id = 0; while (true) { cv::Mat frame; if (!cap.read(frame)) break; LetterboxInfo lb; cv::Mat lbimg = letterbox(frame, args.img, lb); // BGR->RGB, HWC->CHW, float32/255 cv::Mat img_f; lbimg.convertTo(img_f, CV_32FC3, 1.0/255.0); std::vector<float> chw(3 * args.img * args.img); int hw = args.img * args.img; std::vector<cv::Mat> chs(3); for (int i=0;i<3;i++) chs[i] = cv::Mat(args.img, args.img, CV_32FC1, chw.data()+i*hw); cv::split(img_f, chs); // کپی به GPU CHECK_CUDA(cudaMemcpyAsync(d_in, chw.data(), chw.size()*sizeof(float), cudaMemcpyHostToDevice, stream)); // bind ctx->setTensorAddress(in_name, d_in); ctx->setTensorAddress(out_name, d_out); // اجرا if (!ctx->enqueueV3(stream)) { std::cerr<<"enqueueV3 failed\n"; break; } // خروجی به CPU CHECK_CUDA(cudaMemcpyAsync(h_out.data(), d_out, h_out.size()*sizeof(float), cudaMemcpyDeviceToHost, stream)); CHECK_CUDA(cudaStreamSynchronize(stream)); // پارس خروجی std::vector<Det> dets; parse_yolov5_output(h_out.data(), outDims, args.conf, args.img, lb, img_w, img_h, dets); // NMS dets = nms(dets, args.nms); /* // 🔹 اینجا کد رسم و نمایش اضافه کنید for (const auto& d : dets) { cv::rectangle(frame, d.box, cv::Scalar(0, 255, 0), 2); std::string label = "cls:" + std::to_string(d.cls) + " conf:" + cv::format("%.2f", d.score); int baseLine; cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine); cv::rectangle(frame, cv::Point((int)d.box.x, (int)d.box.y - labelSize.height - 2), cv::Point((int)d.box.x + labelSize.width, (int)d.box.y), cv::Scalar(0, 255, 0), cv::FILLED); cv::putText(frame, label, cv::Point((int)d.box.x, (int)d.box.y - 2), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0,0,0), 1); } */ cv::imshow("Detections", frame); if (cv::waitKey(1) == 27) break; // ESC برای خروج // نوشتن در شِردمموری frame_id++; int slot_id = frame_id % RING_SIZE; Slot& slot = shm->slots[slot_id]; slot.ready = 0; // قفل ساده slot.frame_id = frame_id; slot.img_w = img_w; slot.img_h = img_h; slot.fps = fps; int M = std::min((int)dets.size(), MAX_DETS); slot.num_dets = M; for (int i=0;i<M;i++) { slot.dets[i].x1 = dets[i].box.x; slot.dets[i].y1 = dets[i].box.y; slot.dets[i].x2 = dets[i].box.x + dets[i].box.width; slot.dets[i].y2 = dets[i].box.y + dets[i].box.height; slot.dets[i].score = dets[i].score; slot.dets[i].class_id = dets[i].cls; } // 🔹 اینجا قبل از slot.ready = 1 اضافه کنید cv::Mat rgb; cv::cvtColor(frame, rgb, cv::COLOR_BGR2RGB); cv::Mat resized; cv::resize(rgb, resized, cv::Size(WIDTH_MAX, HEIGHT_MAX)); std::memcpy(slot.img, resized.data, WIDTH_MAX * HEIGHT_MAX * 3); //cv::imshow("DebugFrame", resized); //cv::waitKey(1);
// یا ذخیره موقت
textslot.ready = 1; shm->write_frame = frame_id; // چاپ نتایج Detection std::cout << "Frame " << frame_id << " - Detections: " << M << std::endl; for (int i=0;i<M;i++) { std::cout << " [" << i << "] " << "cls=" << slot.dets[i].class_id << " conf=" << slot.dets[i].score << " box=(" << slot.dets[i].x1 << "," << slot.dets[i].y1 << ")-(" << slot.dets[i].x2 << "," << slot.dets[i].y2 << ")" << std::endl; } } // آزادسازی cudaStreamDestroy(stream); cudaFree(d_in); cudaFree(d_out); // آزادسازی TensorRT objects - روش جدید delete ctx; delete engine; delete runtime; munmap(base, shm_size()); close(fd); shm_unlink(args.shm_name.c_str()); return 0;
}
You’re running into two classic shared-memory pitfalls:
Shape mismatch: C++ writes a fixed WIDTH_MAX×HEIGHT_MAX
image (640×640 in your snippet) but sets slot.img_w/h
to the original video size (e.g. 1920×1080). Your Python then tries to reshape HEIGHT_MAX*WIDTH_MAX*3
bytes into (img_h,img_w,3)
→ wrong size & occasional crashes/garbage.
Race conditions: The Python reader ignores slot.ready
and slot.frame_id
, returns a slot that may still be in write, and never sets slot.ready=0
after reading. That leads to tearing / inconsistent detections.
Below is a robust, complete pair (C++ producer + Python consumer) plus a shared layout that fixes both problems:
ready
states (0=FREE, 1=WRITING, 2=READY).ready=2
.ready=2 && frame_id
match, copies only the used bytes, then sets ready=0
.img_w/img_h
accordingly so Python always reshapes the correct number of bytes.img_w*img_h*3
before reshape and copies to avoid reuse by the writer while tracking.shm_layout.h
cpp#pragma once #include <cstdint> #define WIDTH_MAX 1920 // capacity of the image buffer in SHM #define HEIGHT_MAX 1080 #define MAX_DETS 1024 #define RING_SIZE 8 // Slot.ready states enum : int32_t { SLOT_FREE = 0, SLOT_WRITING = 1, SLOT_READY = 2 }; #pragma pack(push, 1) struct Detection { float x1, y1, x2, y2; // in the coordinate system of the *stored image* float score; int32_t class_id; }; struct Slot { uint64_t frame_id; // monotonically increasing int32_t ready; // 0=FREE,1=WRITING,2=READY int32_t num_dets; int32_t img_w; // width of stored RGB image int32_t img_h; // height of stored RGB image float fps; Detection dets[MAX_DETS]; // detections for this frame // RGB image bytes. We only use the first (img_w*img_h*3) bytes each frame. uint8_t img[WIDTH_MAX * HEIGHT_MAX * 3]; }; struct ShmBlock { uint64_t write_frame; // last frame_id written (for reader progress) uint64_t read_frame; // last frame_id fully read (for writer stats) int32_t ring_size; // = RING_SIZE int32_t max_dets; // = MAX_DETS // nominal camera/stream info (may match or exceed per-slot img_w/h) int32_t nominal_w; int32_t nominal_h; float nominal_fps; Slot slots[RING_SIZE]; }; #pragma pack(pop)
shm_layout.py
python# shm_layout.py import ctypes as C WIDTH_MAX = 1920 HEIGHT_MAX = 1080 MAX_DETS = 1024 RING_SIZE = 8 SLOT_FREE, SLOT_WRITING, SLOT_READY = 0, 1, 2 class Detection(C.Structure): _pack_ = 1 _fields_ = [ ("x1", C.c_float), ("y1", C.c_float), ("x2", C.c_float), ("y2", C.c_float), ("score", C.c_float), ("class_id", C.c_int32), ] class Slot(C.Structure): _pack_ = 1 _fields_ = [ ("frame_id", C.c_uint64), ("ready", C.c_int32), ("num_dets", C.c_int32), ("img_w", C.c_int32), ("img_h", C.c_int32), ("fps", C.c_float), ("dets", Detection * MAX_DETS), ("img", C.c_uint8 * (WIDTH_MAX * HEIGHT_MAX * 3)), ] class ShmBlock(C.Structure): _pack_ = 1 _fields_ = [ ("write_frame", C.c_uint64), ("read_frame", C.c_uint64), ("ring_size", C.c_int32), ("max_dets", C.c_int32), ("nominal_w", C.c_int32), ("nominal_h", C.c_int32), ("nominal_fps", C.c_float), ("slots", Slot * RING_SIZE), ]
detector_trt_shm.cpp
This is your code, tightened for SHM correctness and image-size consistency. (Keeps your TRT bits; focus is on how we write the slot.)
cpp#include <iostream> #include <fstream> #include <vector> #include <string> #include <cmath> #include <fcntl.h> #include <sys/mman.h> #include <sys/stat.h> #include <unistd.h> #include <cstring> #include <algorithm> #include <atomic> #include <numeric> #include <opencv2/opencv.hpp> #include "NvInfer.h" #include "cuda_runtime.h" #include "shm_layout.h" using namespace nvinfer1; struct Logger : ILogger { void log(Severity s, const char* msg) noexcept override { if (s <= Severity::kWARNING) std::cout << msg << std::endl; } } gLogger; #define CHECK_CUDA(x) do{auto e=(x); if(e!=cudaSuccess){ \ std::cerr<<"CUDA: "<<cudaGetErrorString(e)<<" @ "<<__FILE__<<":"<<__LINE__<<"\n"; std::exit(1);} }while(0) struct Args { std::string path; std::string trt; std::string shm_name = "/yolo_shm"; int img = 640; float conf = 0.25f; float nms = 0.45f; bool dynamic = false; }; static void parse_args(int argc, char** argv, Args& a) { auto next = [&](int& i){ if (i+1>=argc){ std::cerr<<"Missing value for "<<argv[i]<<"\n"; std::exit(1);} return std::string(argv[++i]); }; for (int i=1;i<argc;++i){ std::string k=argv[i]; if (k=="--path"||k=="-p") a.path=next(i); else if (k=="--trt"||k=="-trt") a.trt=next(i); else if (k=="--img") a.img=std::stoi(next(i)); else if (k=="--conf") a.conf=std::stof(next(i)); else if (k=="--nms") a.nms=std::stof(next(i)); else if (k=="--dynamic"||k=="--dyn"||k=="--dynamic-shape") a.dynamic=true; else if (k=="--shm") a.shm_name=next(i); else { std::cerr<<"Unknown arg "<<k<<"\n"; std::exit(1); } } if (a.path.empty() || a.trt.empty()){ std::cerr<<"Usage: detector_trt_shm --path <video> --trt <engine.trt> [--img 640] [--conf 0.25] [--nms 0.45] [--dynamic] [--shm /yolo_shm]\n"; std::exit(1); } } // --- trivial letterbox for TRT input --- struct LetterboxInfo { float scale; int pad_w; int pad_h; int net; }; static cv::Mat letterbox(const cv::Mat& img, int net, LetterboxInfo& info) { int w=img.cols, h=img.rows; info.net=net; float r=std::min(net/(float)h, net/(float)w); int nw = std::round(w*r), nh = std::round(h*r); cv::Mat resized; cv::resize(img, resized, cv::Size(nw,nh)); int dw = net - nw, dh = net - nh; info.scale=r; info.pad_w=dw/2; info.pad_h=dh/2; cv::Mat out(net, net, CV_8UC3, cv::Scalar(114,114,114)); resized.copyTo(out(cv::Rect(info.pad_w, info.pad_h, resized.cols, resized.rows))); return out; } // (IoU + NMS helpers) static inline float iou(const cv::Rect2f& a, const cv::Rect2f& b){ float inter=(a&b).area(); float uni=a.area()+b.area()-inter+1e-6f; return inter/uni; } struct Det { cv::Rect2f box; float score; int cls; }; static std::vector<Det> nms(const std::vector<Det>& ds, float thr){ std::vector<int> idx(ds.size()); std::iota(idx.begin(), idx.end(), 0); std::sort(idx.begin(), idx.end(), [&](int i,int j){return ds[i].score>ds[j].score;}); std::vector<Det> keep; std::vector<char> rm(ds.size(),0); for(size_t a=0;a<idx.size();++a){int i=idx[a]; if(rm[i])continue; keep.push_back(ds[i]); for(size_t b=a+1;b<idx.size();++b){int j=idx[b]; if(rm[j])continue; if(iou(ds[i].box,ds[j].box)>thr) rm[j]=1;}} return keep; } // Parse typical YOLOv5 head output (x,y,w,h,obj, +classes...), *returning boxes in the coordinate system of the stored image*. static void parse_yolov5_output(const float* out, const Dims& dims, float conf_thr, const LetterboxInfo& lb, int src_w, int src_h, std::vector<Det>& out_dets) { // dims like (1, N, C) or (N, C) int N=0, C=0; if (dims.nbDims==3){ N=dims.d[1]; C=dims.d[2]; } else if (dims.nbDims==2){ N=dims.d[0]; C=dims.d[1]; } if (N<=0 || C<6) { std::cerr<<"[WARN] unexpected output dims\n"; return; } for (int i=0;i<N;i++){ const float* p = out + i*C; float x=p[0], y=p[1], w=p[2], h=p[3], obj=p[4]; int best_cls=-1; float best_c=0.f; for (int k=5;k<C;k++){ if (p[k]>best_c){ best_c=p[k]; best_cls=k-5; } } float score = obj * best_c; if (score < conf_thr) continue; // xywh (net) -> x1y1x2y2 (net) float x1 = x - w/2.f, y1 = y - h/2.f; float x2 = x + w/2.f, y2 = y + h/2.f; // Undo letterbox to original source space x1 -= lb.pad_w; x2 -= lb.pad_w; y1 -= lb.pad_h; y2 -= lb.pad_h; x1 /= lb.scale; y1 /= lb.scale; x2 /= lb.scale; y2 /= lb.scale; // clamp x1 = std::max(0.f, std::min((float)src_w-1.f, x1)); y1 = std::max(0.f, std::min((float)src_h-1.f, y1)); x2 = std::max(0.f, std::min((float)src_w-1.f, x2)); y2 = std::max(0.f, std::min((float)src_h-1.f, y2)); out_dets.push_back({ cv::Rect2f({x1,y1},{x2,y2}), score, best_cls }); } } static ICudaEngine* load_engine(const std::string& path, IRuntime*& rt){ std::ifstream f(path, std::ios::binary); if (!f){ std::cerr<<"Cannot open engine "<<path<<"\n"; std::exit(1); } f.seekg(0,std::ios::end); size_t sz=f.tellg(); f.seekg(0,std::ios::beg); std::vector<char> buf(sz); f.read(buf.data(), sz); rt = createInferRuntime(gLogger); if (!rt){ std::cerr<<"createInferRuntime failed\n"; std::exit(1); } auto* eng = rt->deserializeCudaEngine(buf.data(), sz); if (!eng){ std::cerr<<"deserializeCudaEngine failed\n"; std::exit(1); } return eng; } static size_t shm_size(){ return sizeof(ShmBlock); } int main(int argc, char** argv){ Args args; parse_args(argc, argv, args); // --- SHM --- int fd = shm_open(args.shm_name.c_str(), O_CREAT|O_RDWR, 0666); if (fd<0){ perror("shm_open"); return 1; } if (ftruncate(fd, shm_size())<0){ perror("ftruncate"); return 1; } void* base = mmap(nullptr, shm_size(), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); if (base==MAP_FAILED){ perror("mmap"); return 1; } auto* shm = reinterpret_cast<ShmBlock*>(base); std::memset(shm, 0, sizeof(ShmBlock)); shm->ring_size = RING_SIZE; shm->max_dets = MAX_DETS; // --- Video --- cv::VideoCapture cap(args.path); if (!cap.isOpened()){ std::cerr<<"Cannot open "<<args.path<<"\n"; return 1; } int src_w = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int src_h = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); float fps = (float)(cap.get(cv::CAP_PROP_FPS) > 0 ? cap.get(cv::CAP_PROP_FPS) : 30.0); shm->nominal_w = src_w; shm->nominal_h = src_h; shm->nominal_fps = fps; // --- TRT --- IRuntime* rt=nullptr; ICudaEngine* eng=load_engine(args.trt, rt); std::unique_ptr<IExecutionContext> ctx(eng->createExecutionContext()); int inIdx=-1, outIdx=-1; for (int i=0;i<eng->getNbIOTensors();++i){ const char* name = eng->getIOTensorName(i); if (eng->getTensorIOMode(name)==TensorIOMode::kINPUT) inIdx=i; else outIdx=i; } const char* inName = eng->getIOTensorName(inIdx); const char* outName = eng->getIOTensorName(outIdx); if (args.dynamic){ Dims d; d.nbDims=4; d.d[0]=1; d.d[1]=3; d.d[2]=args.img; d.d[3]=args.img; if (!ctx->setInputShape(inName, d)){ std::cerr<<"setInputShape failed\n"; return 1; } } Dims inD = ctx->getTensorShape(inName); Dims outD= ctx->getTensorShape(outName); size_t inCount=1, outCount=1; for (int i=0;i<inD.nbDims;i++) inCount *= (size_t)inD.d[i]; for (int i=0;i<outD.nbDims;i++) outCount *= (size_t)outD.d[i]; float *dIn=nullptr, *dOut=nullptr; CHECK_CUDA(cudaMalloc(&dIn, inCount * sizeof(float))); CHECK_CUDA(cudaMalloc(&dOut, outCount* sizeof(float))); std::vector<float> hOut(outCount); cudaStream_t stream; CHECK_CUDA(cudaStreamCreate(&stream)); uint64_t frame_id=0; while (true){ cv::Mat frame_bgr; if(!cap.read(frame_bgr)) break; // Prepare TRT input LetterboxInfo lb; cv::Mat lbimg = letterbox(frame_bgr, args.img, lb); cv::Mat img_f; lbimg.convertTo(img_f, CV_32FC3, 1.0/255.0); std::vector<float> chw(3*args.img*args.img); int hw=args.img*args.img; std::vector<cv::Mat> chs{ cv::Mat(args.img,args.img,CV_32FC1, chw.data()+0*hw), cv::Mat(args.img,args.img,CV_32FC1, chw.data()+1*hw), cv::Mat(args.img,args.img,CV_32FC1, chw.data()+2*hw) }; cv::split(img_f, chs); CHECK_CUDA(cudaMemcpyAsync(dIn, chw.data(), chw.size()*sizeof(float), cudaMemcpyHostToDevice, stream)); ctx->setTensorAddress(inName, dIn); ctx->setTensorAddress(outName, dOut); if (!ctx->enqueueV3(stream)){ std::cerr<<"enqueueV3 failed\n"; break; } CHECK_CUDA(cudaMemcpyAsync(hOut.data(), dOut, hOut.size()*sizeof(float), cudaMemcpyDeviceToHost, stream)); CHECK_CUDA(cudaStreamSynchronize(stream)); // Parse detections to *original* frame coordinates std::vector<Det> dets; dets.reserve(512); parse_yolov5_output(hOut.data(), outD, args.conf, lb, src_w, src_h, dets); dets = nms(dets, args.nms); // ---- Write to SHM (with backpressure) ---- frame_id++; int slot_id = (int)(frame_id % RING_SIZE); Slot& slot = shm->slots[slot_id]; // If reader is slow, wait for slot to become free (drop frames if you prefer: just skip waiting). while (__atomic_load_n(&slot.ready, __ATOMIC_ACQUIRE) == SLOT_READY) { // simple sleep to avoid busy-wait burn usleep(1000); } // Mark WRITING __atomic_store_n(&slot.ready, SLOT_WRITING, __ATOMIC_RELEASE); slot.frame_id = frame_id; slot.fps = fps; // Copy image (prefer original size if fits) int out_w = frame_bgr.cols; int out_h = frame_bgr.rows; cv::Mat rgb; cv::cvtColor(frame_bgr, rgb, cv::COLOR_BGR2RGB); if (out_w > WIDTH_MAX || out_h > HEIGHT_MAX) { // Resize to fit capacity, adjust dets accordingly double sx = (double)WIDTH_MAX / out_w; double sy = (double)HEIGHT_MAX / out_h; cv::Mat resized; cv::resize(rgb, resized, cv::Size((int)(out_w*sx), (int)(out_h*sy))); out_w = resized.cols; out_h = resized.rows; // scale dets for (auto& d : dets){ d.box.x *= (float)sx; d.box.y *= (float)sy; d.box.width *= (float)sx; d.box.height *= (float)sy; } std::memcpy(slot.img, resized.data, (size_t)out_w*out_h*3); } else { std::memcpy(slot.img, rgb.data, (size_t)out_w*out_h*3); } slot.img_w = out_w; slot.img_h = out_h; // Copy detections (clamped to MAX_DETS) int M = std::min((int)dets.size(), MAX_DETS); slot.num_dets = M; for (int i=0;i<M;i++){ slot.dets[i].x1 = dets[i].box.x; slot.dets[i].y1 = dets[i].box.y; slot.dets[i].x2 = dets[i].box.x + dets[i].box.width; slot.dets[i].y2 = dets[i].box.y + dets[i].box.height; slot.dets[i].score = dets[i].score; slot.dets[i].class_id= dets[i].cls; } // Publish __atomic_thread_fence(__ATOMIC_SEQ_CST); __atomic_store_n(&slot.ready, SLOT_READY, __ATOMIC_RELEASE); __atomic_store_n(&shm->write_frame, frame_id, __ATOMIC_RELEASE); // (Optional debug) // std::cout<<"Frame "<<frame_id<<" dets="<<M<<" img="<<out_w<<"x"<<out_h<<"\n"; if (cv::waitKey(1)==27) break; } cudaStreamDestroy(stream); cudaFree(dIn); cudaFree(dOut); eng->destroy(); rt->destroy(); munmap(base, shm_size()); close(fd); shm_unlink(args.shm_name.c_str()); return 0; }
Build (example):
bashg++ -std=c++17 detector_trt_shm.cpp -o detector_trt_shm \ `pkg-config --cflags --libs opencv4` \ -L/path/to/TensorRT/lib -lnvinfer -lcudart -lpthread -lrt
Run:
bash./detector_trt_shm --path input.mp4 --trt yolov5.engine --img 640 --conf 0.25 --nms 0.45 --shm /yolo_shm
tracker_shm.py
This shows a correct reader you can drop into your project. It reads frames + dets from SHM safely and returns frame (BGR)
, dets (Nx6 float32: x1,y1,x2,y2,score,class)
.
If you want to keep your full HOPTracker loop, just replace your
open_shm
,read_frame
, andslot_to_tensor
with the ones below and keep the rest of your logic.
python# tracker_shm.py import os, time, mmap, argparse, ctypes as C import numpy as np import cv2 from shm_layout import ShmBlock, Slot, Detection, RING_SIZE, MAX_DETS, SLOT_READY, SLOT_FREE def open_shm(name: str): """ Posix shm_open wrapper: Python side opens the file path /dev/shm/<name-without-leading-slash> and mmaps the whole ShmBlock. """ path = f"/dev/shm/{name.lstrip('/')}" fd = os.open(path, os.O_RDWR) size = C.sizeof(ShmBlock) mm = mmap.mmap(fd, size, mmap.MAP_SHARED, mmap.PROT_READ | mmap.PROT_WRITE) os.close(fd) block = ShmBlock.from_buffer(mm) return mm, block def read_slot_blocking(block: ShmBlock, frame_id_expected: int, timeout_ms=1000): """ Waits for the specific frame_id slot to become READY. Returns the Slot or None on timeout. """ t0 = time.time() while True: last = block.write_frame if last >= frame_id_expected: sid = frame_id_expected % RING_SIZE slot = block.slots[sid] if slot.ready == SLOT_READY and slot.frame_id == frame_id_expected: return slot # if writer skipped ahead and overwrote the expected slot, jump forward if last - frame_id_expected >= RING_SIZE: frame_id_expected = last if (time.time() - t0)*1000 > timeout_ms: return None time.sleep(0.0005) def slot_to_numpy_frame(slot: Slot) -> np.ndarray: """ Returns a *copy* of the BGR frame. Copying is essential so we can mark the slot FREE immediately. """ w, h = int(slot.img_w), int(slot.img_h) nbytes = w*h*3 # slice only the used bytes before reshape: buf = np.frombuffer(slot.img, dtype=np.uint8, count=nbytes) rgb = buf.reshape((h, w, 3)).copy() # copy to detach from SHM bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR) return bgr def slot_to_dets(slot: Slot) -> np.ndarray: n = int(slot.num_dets) if n <= 0: return np.zeros((0,6), dtype=np.float32) arr = np.zeros((n,6), dtype=np.float32) for i in range(n): d = slot.dets[i] arr[i,0] = d.x1; arr[i,1] = d.y1; arr[i,2] = d.x2; arr[i,3] = d.y2 arr[i,4] = d.score; arr[i,5] = float(d.class_id) return arr def main(): ap = argparse.ArgumentParser() ap.add_argument("--shm", type=str, default="/yolo_shm") ap.add_argument("--show", action="store_true") args = ap.parse_args() mm, shm = open_shm(args.shm) width = shm.nominal_w height = shm.nominal_h fps = shm.nominal_fps if shm.nominal_fps>0 else 30.0 print(f"Connected to SHM. Nominal stream {width}x{height} @ {fps:.2f}fps") frame_id = 1 try: while True: slot = read_slot_blocking(shm, frame_id, timeout_ms=2000) if slot is None: print("Timeout waiting for frame", frame_id) # skip to newest if writer is alive latest = shm.write_frame if latest > frame_id: frame_id = latest else: break continue frame = slot_to_numpy_frame(slot) dets = slot_to_dets(slot) # mark slot FREE and advance read pointer slot.ready = SLOT_FREE shm.read_frame = frame_id # TODO: integrate with your tracker here: # - Convert dets to torch if needed # - Call HOPTracker.detect_track_fuse or hopping_update # For demo we just draw boxes: for (x1,y1,x2,y2,score,cls) in dets: cv2.rectangle(frame, (int(x1),int(y1)), (int(x2),int(y2)), (0,255,0), 2) cv2.putText(frame, f"{int(cls)}:{score:.2f}", (int(x1),int(y1)-4), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1) if args.show: cv2.imshow("Python tracker (reader)", frame) if cv2.waitKey(1) & 0xFF in [27, ord('q'), ord('Q')]: break frame_id += 1 finally: try: cv2.destroyAllWindows() except: pass mm.close() if __name__ == "__main__": main()
Where to plug into your HOPTracker loop
In your existing script, keep your YOLOX/HOPTracker code as-is, but replace your open_shm
, read_frame
, slot_to_tensor
implementations with the versions above:
slot_to_numpy_frame(slot)
to get frame
(BGR).slot_to_dets(slot)
to get dets
as Nx6
float32
. If your tracker expects a torch.Tensor
, just:
pythonimport torch dets_torch = torch.from_numpy(dets).float().cpu()
pythonslot.ready = SLOT_FREE shm.read_frame = frame_id
img_w*img_h*3
bytes, which exactly match what the writer stored.ready=2
only after all bytes & metadata are written; the reader copies then sets ready=0
. Both sides use a sequence fence so field writes aren’t reordered by the compiler/CPU..copy()
in slot_to_numpy_frame
and defer slot.ready=SLOT_FREE
until you’re done using that frame. The current version copies for safety.slot.img_w=slot.img_h=640
and skip the scaling code. The tracker & overlays will use the net space consistently. (The above code uses original-space by default for better visual fidelity.)WIDTH_MAX/HEIGHT_MAX
in both header & Python mirror (e.g., 3840×2160). Remember that memory = RING_SIZE * WIDTH_MAX * HEIGHT_MAX * 3
bytes (plus dets), so plan accordingly.If you want, paste your current track_utils_shm.py
+ HOPTracker
callsites and I’ll drop these functions into them for you so it’s one cohesive script.