کد زیر کاملا درست کار میکند اما من احتیاج دارم که...

Created on: September 3, 2025

Answered using GPT-5 Thinking by Chat01

Question

کد زیر کاملا درست کار میکند
اما من احتیاج دارم که تصاویر frame ها را با pointer از c++ به python بفرستم با sharedmemory و هیچگونه copy اتفاق نیوفتد نکته اصلی این است
پس هدف اصلی این است که بدون کپی کردن frame انرا برای python بفرستد
makefile را هم بنویس و بگو چگونه کد هارا اجرا کنم

به بقیه ساختار کد دست نزن
کد نهایی را کامل بنویس

#!/usr/bin/env python3

-- coding: utf-8 --

import struct
import time
import numpy as np
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch

====== POSIX IPC ======

try:
import posix_ipc
from multiprocessing import shared_memory
except ModuleNotFoundError:
print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc")
raise

===== نام‌ها باید با C++ یکی باشند =====

FRAME_SHM_NAME = "/shm_yolo_frames"
TRACKS_SHM_NAME = "/shm_tracks"
SEM_FRAME_READY = "/sem_frame_ready"
SEM_TRACKS_READY = "/sem_tracks_ready"

MAX_DETS = 512
MAX_TRACKS = 512

Structures (LE)

HDR_FMT = "<iiiii" # width, height, channels, frame_id, num_dets
DET_FMT = "<fffffi" # x, y, w, h, conf, class_id
TRK_HDR_FMT = "<ii" # num_tracks, frame_id
TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id

HDR_SIZE = struct.calcsize(HDR_FMT)
DET_SIZE = struct.calcsize(DET_FMT)
TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT)
TRK_SIZE = struct.calcsize(TRK_FMT)

def open_or_create_semaphores():
print("---------- open_or_create_semaphores ----------")
print("start")
t0 = time.time()
sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0)
sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0)
t1 = (time.time() - t0) * 1000
print("time : ", t1)
print("end")
return sem_frame_ready, sem_tracks_ready

def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2):
t0 = time.time()
last_err = None
while time.time() - t0 < timeout_s:
try:
return shared_memory.SharedMemory(name=name)
except FileNotFoundError as e:
last_err = e
time.sleep(retry_s)
raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامه‌ی C++ را اجرا کرده‌اید.") from last_err

def main():
print("---------- main before while ----------")
print("start")
t0 = time.time()
print("🚀 Python tracker starting (Deep SORT + ReID) ...")
sem_frame_ready, sem_tracks_ready = open_or_create_semaphores()

text
print("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, # سریع confirm شود nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() # torch.cuda.synchronize() # if using PyTorch sem_frame_ready.acquire() t_start = time.time() # header width, height, channels, frame_id, num_dets = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE img_nbytes = width * height * channels img_bytes = buf_f[img_offset: img_offset + img_nbytes] frame = np.frombuffer(img_bytes, dtype=np.uint8).reshape((height, width, channels)) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame (نتیجهٔ tracker + زمان tracker) total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms")

if name == "main":
main()

// main_ipc_final.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <chrono>
#include <iomanip>
#include <cstring>

// ==== POSIX IPC ====
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>

static const char* SHM_FRAME_NAME = "/shm_yolo_frames";
static const char* SHM_TRACKS_NAME = "/shm_tracks";
static const char* SEM_FRAME_READY = "/sem_frame_ready";
static const char* SEM_TRACKS_READY = "/sem_tracks_ready";

static const int MAX_DETS = 512;
static const int MAX_TRACKS = 512;

#pragma pack(push, 1)
struct FrameHeader {
int32_t width;
int32_t height;
int32_t channels;
int32_t frame_id;
int32_t num_dets;
//added by me
bool frame_ready;
//added by me
};
struct SharedDet {
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
struct TracksHeader {
int32_t num_tracks;
int32_t frame_id;
};
struct TrackRec {
int32_t track_id;
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
#pragma pack(pop)

class YOLOv8Detector {
public:
int64 t2 = cv::getTickCount();

text
YOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); // صورتی (BGR): RGB(255,105,180) -> BGR(180,105,255) const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; }

private:
//int64 t4 = cv::getTickCount();

text
cv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } //int64 t5 = cv::getTickCount(); //double time_private = (t4 - t5) * 1000.0 / cv::getTickFrequency(); //std::cout << "---------- class YOLOv8Detector private start ----------" << std::endl; //std::cout << "time : " << time_private << std::endl; //std::cout << "---------- class YOLOv8Detector private end ----------" << std::endl;

};

int main(int argc, char* argv[]) {
int64 t6 = cv::getTickCount();

text
if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); // ← آستانه کم‌تر برای پایداری cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); // صورتی const cv::Scalar trk_color(230,216,173); // آبی کم‌رنگ (BGR) const cv::Scalar trk_text_color(0,0,0); // سربار SHM (محاسبه و چاپ یک‌بار) size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // نوشتن به SHM hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; // کمی سخاوتمندانه‌تر int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: اول det (صورتی)، بعد track (آبی کم‌رنگ + ID) cv::Mat vis = detector.drawDetections(frame, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); // چاپ per-frame: det و trk و جمع double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); // جمع‌بندی زمان‌ها auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0;

}

//g++ main_ipc_optimized.cpp -o yolo_deepsort_ipc_optimized pkg-config --cflags --libs opencv4
//./yolo_deepsort_ipc_optimized /home/rayfan/Documents/cpp_projects/shared_optimize/yolov8n.onnx /media/rayfan/d/videos/9600+6400/output.avi /home/rayfan/Documents/cpp_projects/shared_optimize/coco.names

Thought

Considering zero-copy approach

I need to modify the code to send frame images from C++ to Python using shared memory without copying. I’m seeing that the current code works but might involve unnecessary slicing. Python’s zero-copy is good for using shared memory, though questions about sending pointers and creating instructions still need attention. Code structure and shared memory naming should sync up well.

Addressing zero-copy requirement

They don’t want changes to the rest of the structure, but want the final code to pass a pointer to frames using shared memory without copying. The current C++ code uses memcpy to copy the frame into shared memory. I need to find a way to capture directly into shared memory using the cv::Mat header referencing img_ptr, but there’s still the question of how VideoCapture::read interacts with this setup.

Ensuring synchronization and zero-copy

It turns out that writing into the Mat buffer is zero-copy because it's just decoding into the shared memory pointer, not copying. However, if they're running YOLO on it, I need to ensure synchronization since both C++ and Python are accessing it. They’ve got semaphores in place for that! Now, in Python, the goal is to avoid copying, using np.ndarray referencing the shared memory directly.

Avoiding unnecessary copy

The slicing creates a new memoryview that still references the same buffer, which works fine for np.frombuffer. The issue lies with DeepSort in Python, which might copy data. In C++, to avoid the copy, we would capture directly into shared memory and change how YOLO handles the frame. We can directly operate on the shared memory frame and create a separate cv::Mat for visualization.

Zero-copy frame transfer plan

For zero-copy, I'll modify the C++ code so that after mapping the shared memory, a cv::Mat header directly references the shared memory pointer, eliminating memcpy. The Python side will access the shared memory using np.ndarray without copying. We'll keep the header layout as is to avoid alignment issues. Despite some uncertainties around the 'bool' field, the aim is to efficiently transfer frames without altering too much.

Updating code for zero-copy

To implement zero-copy, we’ll adjust the code to have C++ write frames directly into shared memory without any unnecessary memcpy. The Python side will work with np.ndarray using shared memory, ensuring there's no copy. We’ll also maintain the structure as requested, with minimal changes. The Makefile will handle compilation, and I’ll provide clear "how to run" steps with necessary dependencies. There's a minor issue with a header mismatch, but the plan is to work around it and align Python's header size with C++ without major revisions.

Adjusting header struct

I'm updating the header parsing in Python for consistency while keeping changes minimal. The current mismatch between Python and C++ headers causes alignment issues. I'll adjust Python's header struct (HDR_FMT) to include a padding byte and ensure the offsets align correctly. The update keeps the structure similar but ensures compatibility with C++.

Thought for 3m 11s

Answer

خیلی خب — هدف اینه که «فریمِ تصویر» واقعا Zero-Copy بین ++C و Python جابه‌جا بشه.
کاری که کردم:

  1. در ++C به‌جای memcpy، یک cv::Mat مستقیما روی بافرِ Shared Memory می‌سازیم و همان را با VideoCapture::read پُر می‌کنیم. یعنی خود دیکُدِ ویدیو مستقیم می‌رود داخل SHM (بدون کپی اضافی).

  2. در Python هم numpy.ndarray را با offset روی همان بافر می‌سازیم (نه کپی).

  3. برای یکی‌بودن ABI، همان فیلد bool frame_ready که در هدر ++C داشتید را در پایتون هم لحاظ کردم (به‌صورت uint8) تا اندازهٔ هدر در هر دو طرف دقیقاً یکی باشد.

🔧 بقیهٔ ساختار برنامه را دست نزدم؛ فقط همان نقاط لازم برای Zero-Copy را تغییر دادم و یک Makefile هم اضافه کردم.


✅ نسخهٔ نهایی (Zero-Copy)

tracker_py.py (Python)

python
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import struct import time import numpy as np import cv2 from deep_sort_realtime.deepsort_tracker import DeepSort import torch # ====== POSIX IPC ====== try: import posix_ipc from multiprocessing import shared_memory except ModuleNotFoundError: print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc") raise # ===== نام‌ها باید با C++ یکی باشند ===== FRAME_SHM_NAME = "/shm_yolo_frames" TRACKS_SHM_NAME = "/shm_tracks" SEM_FRAME_READY = "/sem_frame_ready" SEM_TRACKS_READY = "/sem_tracks_ready" MAX_DETS = 512 MAX_TRACKS = 512 # Structures (LE) # نکتهٔ مهم: در C++ داخل FrameHeader یک bool به‌نام frame_ready دارید؛ # اینجا هم آن را به‌صورت uint8 می‌خوانیم تا اندازهٔ هدر دقیقاً یکی شود. HDR_FMT = "<iiiiiB" # width, height, channels, frame_id, num_dets, frame_ready(u8) DET_FMT = "<fffffi" # x, y, w, h, conf, class_id TRK_HDR_FMT = "<ii" # num_tracks, frame_id TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id HDR_SIZE = struct.calcsize(HDR_FMT) DET_SIZE = struct.calcsize(DET_FMT) TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT) TRK_SIZE = struct.calcsize(TRK_FMT) def open_or_create_semaphores(): print("---------- open_or_create_semaphores ----------") print("start") t0 = time.time() sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0) sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0) t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") return sem_frame_ready, sem_tracks_ready def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2): t0 = time.time() last_err = None while time.time() - t0 < timeout_s: try: return shared_memory.SharedMemory(name=name) except FileNotFoundError as e: last_err = e time.sleep(retry_s) raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامه‌ی C++ را اجرا کرده‌اید.") from last_err def main(): print("---------- main before while ----------") print("start") t0 = time.time() print("🚀 Python tracker starting (Deep SORT + ReID) ...") sem_frame_ready, sem_tracks_ready = open_or_create_semaphores() print("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT tracker = DeepSort( max_age=40, n_init=3, nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() sem_frame_ready.acquire() t_start = time.time() # ----- header ----- width, height, channels, frame_id, num_dets, frame_ready = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # ----- detections (view روی همان shm) ----- detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # ----- frame: Zero-Copy ----- buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE # هیچ کپی‌ای انجام نمی‌شود؛ این آرایه مستقیماً روی shm است: frame = np.ndarray( shape=(height, width, channels), dtype=np.uint8, buffer=buf_f, offset=img_offset ) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # ----- track ----- trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms") if __name__ == "__main__": main()

main_ipc_final.cpp (++C)

cpp
// main_ipc_final.cpp #include <opencv2/opencv.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> #include <vector> #include <string> #include <chrono> #include <iomanip> #include <cstring> // ==== POSIX IPC ==== #include <fcntl.h> #include <sys/mman.h> #include <sys/stat.h> #include <semaphore.h> #include <unistd.h> static const char* SHM_FRAME_NAME = "/shm_yolo_frames"; static const char* SHM_TRACKS_NAME = "/shm_tracks"; static const char* SEM_FRAME_READY = "/sem_frame_ready"; static const char* SEM_TRACKS_READY = "/sem_tracks_ready"; static const int MAX_DETS = 512; static const int MAX_TRACKS = 512; #pragma pack(push, 1) struct FrameHeader { int32_t width; int32_t height; int32_t channels; int32_t frame_id; int32_t num_dets; bool frame_ready; // همان چیزی که در پایتون هم با B خوانده می‌شود }; struct SharedDet { float x, y, w, h; // tlwh float conf; int32_t class_id; }; struct TracksHeader { int32_t num_tracks; int32_t frame_id; }; struct TrackRec { int32_t track_id; float x, y, w, h; // tlwh float conf; int32_t class_id; }; #pragma pack(pop) class YOLOv8Detector { public: int64 t2 = cv::getTickCount(); YOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; } private: cv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } }; int main(int argc, char* argv[]) { int64 t6 = cv::getTickCount(); if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); const cv::Scalar trk_color(230,216,173); const cv::Scalar trk_text_color(0,0,0); // ماتریسِ فریم مستقیماً روی بافر SHM (Zero-Copy) cv::Mat frame_shm(height, width, CV_8UC3, img_ptr); size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); while (true) { int64 t8 = cv::getTickCount(); // ⚡ دیکود مستقیم داخل SHM (بدون memcpy) bool ok = cap.read(frame_shm); if (!ok || frame_shm.empty()) break; double det_ms = 0.0; auto dets = detector.detect(frame_shm, det_ms); total_det_ms += det_ms; // نوشتن به SHM (header + dets) — تصویر از قبل در img_ptr است hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); hdr->frame_ready = true; for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } // به Python بگو فریم حاضر است auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: det (صورتی) + tracks (آبی کمرنگ) cv::Mat vis = detector.drawDetections(frame_shm, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; hdr_end->frame_ready=false; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0; }

Makefile

makefile
# Makefile CXX ?= g++ PKGCONFIG ?= pkg-config OPENCV_CFLAGS := $(shell $(PKGCONFIG) --cflags opencv4) OPENCV_LIBS := $(shell $(PKGCONFIG) --libs opencv4) CXXFLAGS += -O3 -std=c++17 -DNDEBUG $(OPENCV_CFLAGS) LDFLAGS += $(OPENCV_LIBS) -pthread -lrt TARGET = yolo_deepsort_ipc SRC = main_ipc_final.cpp all: $(TARGET) $(TARGET): $(SRC) $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) clean: rm -f $(TARGET)

طرز اجرا (گام‌به‌گام)

  1. پیش‌نیازها (Ubuntu/Debian):
bash
sudo apt-get update sudo apt-get install -y build-essential pkg-config libopencv-dev python3 -m pip install --upgrade pip python3 -m pip install posix_ipc numpy opencv-python-headless deep_sort_realtime torch torchvision torchaudio
  1. کامپایل ++C:
bash
make
  1. ساخت پوشهٔ خروجی فریم‌ها (اختیاری):
bash
mkdir -p frames
  1. اجرا:
  • اول برنامهٔ ++C را اجرا کنید (یا برعکس؛ پایتون خودش تا ۵ دقیقه برای SHM صبر می‌کند):
bash
./yolo_deepsort_ipc /path/to/yolov8n.onnx /path/to/video.mp4 /path/to/coco.names
  • سپس پایتون:
bash
python3 tracker_py.py

اگر GPU CUDA ندارید مشکلی نیست؛ کُد خودکار روی CPU می‌افتد.


چرا الآن واقعاً Zero-Copy است؟

  • ++C: با

    cpp
    cv::Mat frame_shm(height, width, CV_8UC3, img_ptr); cap.read(frame_shm);

    دادهِ تصویر مستقیماً داخل بافرِ Shared Memory دیکُد می‌شود. هیچ memcpy اضافه‌ای نداریم.

  • Python: با

    python
    frame = np.ndarray((h,w,c), dtype=np.uint8, buffer=buf_f, offset=img_offset)

    آرایهٔ NumPy صرفاً نمایش (view) روی همان بافرِ SHM است؛ نه کپی.

تنها کپیِ اختیاری، همان frame.clone() در تابع رسمِ ++C برای Visualization است (برای اینکه روی تصویر خامِ SHM خط نکشیم). این به pipeline بین ++C ↔ Python مربوط نیست.

موفق باشید! اگر بخواهید، می‌توانم نسخهٔ lock-free با حلقهٔ چندبافر و بدون صبر روی semaphore هم براتون آماده کنم.

Share this Q&A