کد زیر کاملا درست کار میکند اما من احتیاج دارم که...

تم الإنشاء في: ٣ سبتمبر ٢٠٢٥

تمت الإجابة باستخدام GPT-5 Thinking بواسطة Chat01

سؤال

کد زیر کاملا درست کار میکند
اما من احتیاج دارم که تصاویر frame ها را با pointer از c++ به python بفرستم با sharedmemory و هیچگونه copy اتفاق نیوفتد نکته اصلی این است
پس هدف اصلی این است که بدون کپی کردن frame انرا برای python بفرستد
makefile را هم بنویس و بگو چگونه کد هارا اجرا کنم

به بقیه ساختار کد دست نزن
کد نهایی را کامل بنویس

#!/usr/bin/env python3

-- coding: utf-8 --

import struct
import time
import numpy as np
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch

====== POSIX IPC ======

try:
import posix_ipc
from multiprocessing import shared_memory
except ModuleNotFoundError:
print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc")
raise

===== نام‌ها باید با C++ یکی باشند =====

FRAME_SHM_NAME = "/shm_yolo_frames"
TRACKS_SHM_NAME = "/shm_tracks"
SEM_FRAME_READY = "/sem_frame_ready"
SEM_TRACKS_READY = "/sem_tracks_ready"

MAX_DETS = 512
MAX_TRACKS = 512

Structures (LE)

HDR_FMT = "<iiiii" # width, height, channels, frame_id, num_dets
DET_FMT = "<fffffi" # x, y, w, h, conf, class_id
TRK_HDR_FMT = "<ii" # num_tracks, frame_id
TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id

HDR_SIZE = struct.calcsize(HDR_FMT)
DET_SIZE = struct.calcsize(DET_FMT)
TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT)
TRK_SIZE = struct.calcsize(TRK_FMT)

def open_or_create_semaphores():
print("---------- open_or_create_semaphores ----------")
print("start")
t0 = time.time()
sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0)
sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0)
t1 = (time.time() - t0) * 1000
print("time : ", t1)
print("end")
return sem_frame_ready, sem_tracks_ready

def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2):
t0 = time.time()
last_err = None
while time.time() - t0 < timeout_s:
try:
return shared_memory.SharedMemory(name=name)
except FileNotFoundError as e:
last_err = e
time.sleep(retry_s)
raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامه‌ی C++ را اجرا کرده‌اید.") from last_err

def main():
print("---------- main before while ----------")
print("start")
t0 = time.time()
print("🚀 Python tracker starting (Deep SORT + ReID) ...")
sem_frame_ready, sem_tracks_ready = open_or_create_semaphores()

text
print("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, # سریع confirm شود nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() # torch.cuda.synchronize() # if using PyTorch sem_frame_ready.acquire() t_start = time.time() # header width, height, channels, frame_id, num_dets = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE img_nbytes = width * height * channels img_bytes = buf_f[img_offset: img_offset + img_nbytes] frame = np.frombuffer(img_bytes, dtype=np.uint8).reshape((height, width, channels)) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame (نتیجهٔ tracker + زمان tracker) total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms")

if name == "main":
main()

// main_ipc_final.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <chrono>
#include <iomanip>
#include <cstring>

// ==== POSIX IPC ====
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>

static const char* SHM_FRAME_NAME = "/shm_yolo_frames";
static const char* SHM_TRACKS_NAME = "/shm_tracks";
static const char* SEM_FRAME_READY = "/sem_frame_ready";
static const char* SEM_TRACKS_READY = "/sem_tracks_ready";

static const int MAX_DETS = 512;
static const int MAX_TRACKS = 512;

#pragma pack(push, 1)
struct FrameHeader {
int32_t width;
int32_t height;
int32_t channels;
int32_t frame_id;
int32_t num_dets;
//added by me
bool frame_ready;
//added by me
};
struct SharedDet {
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
struct TracksHeader {
int32_t num_tracks;
int32_t frame_id;
};
struct TrackRec {
int32_t track_id;
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
#pragma pack(pop)

class YOLOv8Detector {
public:
int64 t2 = cv::getTickCount();

text
YOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); // صورتی (BGR): RGB(255,105,180) -> BGR(180,105,255) const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; }

private:
//int64 t4 = cv::getTickCount();

text
cv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } //int64 t5 = cv::getTickCount(); //double time_private = (t4 - t5) * 1000.0 / cv::getTickFrequency(); //std::cout << "---------- class YOLOv8Detector private start ----------" << std::endl; //std::cout << "time : " << time_private << std::endl; //std::cout << "---------- class YOLOv8Detector private end ----------" << std::endl;

};

int main(int argc, char* argv[]) {
int64 t6 = cv::getTickCount();

text
if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); // ← آستانه کم‌تر برای پایداری cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); // صورتی const cv::Scalar trk_color(230,216,173); // آبی کم‌رنگ (BGR) const cv::Scalar trk_text_color(0,0,0); // سربار SHM (محاسبه و چاپ یک‌بار) size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // نوشتن به SHM hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; // کمی سخاوتمندانه‌تر int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: اول det (صورتی)، بعد track (آبی کم‌رنگ + ID) cv::Mat vis = detector.drawDetections(frame, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); // چاپ per-frame: det و trk و جمع double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); // جمع‌بندی زمان‌ها auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0;

}

//g++ main_ipc_optimized.cpp -o yolo_deepsort_ipc_optimized pkg-config --cflags --libs opencv4
//./yolo_deepsort_ipc_optimized /home/rayfan/Documents/cpp_projects/shared_optimize/yolov8n.onnx /media/rayfan/d/videos/9600+6400/output.avi /home/rayfan/Documents/cpp_projects/shared_optimize/coco.names

سؤال

کد زیر کاملا درست کار میکند
اما من احتیاج دارم که تصاویر frame ها را با pointer از c++ به python بفرستم با sharedmemory و هیچگونه copy اتفاق نیوفتد نکته اصلی این است
پس هدف اصلی این است که بدون کپی کردن frame انرا برای python بفرستد
makefile را هم بنویس و بگو چگونه کد هارا اجرا کنم

به بقیه ساختار کد دست نزن
کد نهایی را کامل بنویس

#!/usr/bin/env python3

-- coding: utf-8 --

import struct
import time
import numpy as np
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch

====== POSIX IPC ======

try:
import posix_ipc
from multiprocessing import shared_memory
except ModuleNotFoundError:
print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc")
raise

===== نام‌ها باید با C++ یکی باشند =====

FRAME_SHM_NAME = "/shm_yolo_frames"
TRACKS_SHM_NAME = "/shm_tracks"
SEM_FRAME_READY = "/sem_frame_ready"
SEM_TRACKS_READY = "/sem_tracks_ready"

MAX_DETS = 512
MAX_TRACKS = 512

Structures (LE)

HDR_FMT = "<iiiii" # width, height, channels, frame_id, num_dets
DET_FMT = "<fffffi" # x, y, w, h, conf, class_id
TRK_HDR_FMT = "<ii" # num_tracks, frame_id
TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id

HDR_SIZE = struct.calcsize(HDR_FMT)
DET_SIZE = struct.calcsize(DET_FMT)
TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT)
TRK_SIZE = struct.calcsize(TRK_FMT)

def open_or_create_semaphores():
print("---------- open_or_create_semaphores ----------")
print("start")
t0 = time.time()
sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0)
sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0)
t1 = (time.time() - t0) * 1000
print("time : ", t1)
print("end")
return sem_frame_ready, sem_tracks_ready

def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2):
t0 = time.time()
last_err = None
while time.time() - t0 < timeout_s:
try:
return shared_memory.SharedMemory(name=name)
except FileNotFoundError as e:
last_err = e
time.sleep(retry_s)
raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامه‌ی C++ را اجرا کرده‌اید.") from last_err

def main():
print("---------- main before while ----------")
print("start")
t0 = time.time()
print("🚀 Python tracker starting (Deep SORT + ReID) ...")
sem_frame_ready, sem_tracks_ready = open_or_create_semaphores()

text
print("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, # سریع confirm شود nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() # torch.cuda.synchronize() # if using PyTorch sem_frame_ready.acquire() t_start = time.time() # header width, height, channels, frame_id, num_dets = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE img_nbytes = width * height * channels img_bytes = buf_f[img_offset: img_offset + img_nbytes] frame = np.frombuffer(img_bytes, dtype=np.uint8).reshape((height, width, channels)) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame (نتیجهٔ tracker + زمان tracker) total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms")

if name == "main":
main()

// main_ipc_final.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <chrono>
#include <iomanip>
#include <cstring>

// ==== POSIX IPC ====
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>

static const char* SHM_FRAME_NAME = "/shm_yolo_frames";
static const char* SHM_TRACKS_NAME = "/shm_tracks";
static const char* SEM_FRAME_READY = "/sem_frame_ready";
static const char* SEM_TRACKS_READY = "/sem_tracks_ready";

static const int MAX_DETS = 512;
static const int MAX_TRACKS = 512;

#pragma pack(push, 1)
struct FrameHeader {
int32_t width;
int32_t height;
int32_t channels;
int32_t frame_id;
int32_t num_dets;
//added by me
bool frame_ready;
//added by me
};
struct SharedDet {
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
struct TracksHeader {
int32_t num_tracks;
int32_t frame_id;
};
struct TrackRec {
int32_t track_id;
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
#pragma pack(pop)

class YOLOv8Detector {
public:
int64 t2 = cv::getTickCount();

text
YOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); // صورتی (BGR): RGB(255,105,180) -> BGR(180,105,255) const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; }

private:
//int64 t4 = cv::getTickCount();

text
cv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } //int64 t5 = cv::getTickCount(); //double time_private = (t4 - t5) * 1000.0 / cv::getTickFrequency(); //std::cout << "---------- class YOLOv8Detector private start ----------" << std::endl; //std::cout << "time : " << time_private << std::endl; //std::cout << "---------- class YOLOv8Detector private end ----------" << std::endl;

};

int main(int argc, char* argv[]) {
int64 t6 = cv::getTickCount();

text
if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); // ← آستانه کم‌تر برای پایداری cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); // صورتی const cv::Scalar trk_color(230,216,173); // آبی کم‌رنگ (BGR) const cv::Scalar trk_text_color(0,0,0); // سربار SHM (محاسبه و چاپ یک‌بار) size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // نوشتن به SHM hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; // کمی سخاوتمندانه‌تر int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: اول det (صورتی)، بعد track (آبی کم‌رنگ + ID) cv::Mat vis = detector.drawDetections(frame, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); // چاپ per-frame: det و trk و جمع double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); // جمع‌بندی زمان‌ها auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0;

}

//g++ main_ipc_optimized.cpp -o yolo_deepsort_ipc_optimized pkg-config --cflags --libs opencv4
//./yolo_deepsort_ipc_optimized /home/rayfan/Documents/cpp_projects/shared_optimize/yolov8n.onnx /media/rayfan/d/videos/9600+6400/output.avi /home/rayfan/Documents/cpp_projects/shared_optimize/coco.names

الفكرة

Evaluating code modifications for zero-copy

We need to ensure zero-copy frame sharing between C++ and Python via shared memory. In Python, using multiprocessing.shared_memory with memoryview avoids copying, as slicing creates a view. The issue lies in C++, where std::memcpy is used, copying data from the cv::Mat into shared memory.

Considering frame sharing without copying

The user wants to avoid copying frames into shared memory. To do that, we could try creating a cv::Mat that directly references the shared memory, but OpenCV's VideoCapture writes to a cv::Mat that owns its buffer. Using external buffers with OpenCV’s capture seems tricky; one copy may be inevitable unless the decoder can write directly to shared memory.

Zero-copy frame sharing between C++ and Python

The user wants zero-copy frame sharing between C++ and Python. On the Python side, we've already avoided copying into a temporary NumPy array. The tricky part is C++, where we need to copy from the VideoCapture frame into shared memory. However, their primary goal is to avoid any unnecessary copy across the C++ to Python boundary.

Plan for zero-copy frame sharing

The plan is to keep the existing structure but modify Python to avoid unnecessary slicing copies, which is fine. We can pass the shared memory pointer address to Python, ensuring no slice is created. We'll also keep the unused frame_ready flag. The tricky part is avoiding a copy when capturing frames, which is only possible if the capture process writes directly into shared memory.

Final implementation plan

The user wants a zero-copy setup with minimal structural changes. The Python side will use np.ndarray(buffer=buf_f, offset=img_offset) for zero-copy frame creation, ensuring the focus remains on no extra slicing. The C++ side still requires a copy when writing frames, but we’ll minimize additional copies. I'll update the header in Python to reflect the extra bool, aligning with C++ memory layout, and adjust the calculation to match the offsets properly. I'll also provide full code and instructions, maintaining the original structure.

Thought for 54s

الإجابة

عالی—ایده‌ات کاملاً شدنیه: «صفر-کپی بین C++ و Python».
در C++، تنها کپیِ ناگزیر همون انتقال خروجیِ دیکودر (VideoCapture) به بافرِ Shared Memory است؛ ولی بین دو پردازه (C++→Python) هیچ کپی‌ای رخ نمی‌ده. در Python هم آرایه‌ی NumPy را مستقیم روی حافظهٔ مشترک می‌سازیم (نه با برشِ موقّت). همچنین با ساختار هدر شما هم‌راستا شدیم (آن bool frame_ready را هم دقیق خواندیم) تا آفست‌ها ۱ بایت جابه‌جا نشوند.

در ادامه «کد نهایی کامل»، «Makefile»، و «راهنمای اجرا» را می‌بینی. طبق خواسته‌ات به بقیهٔ ساختار دست نزدم و فقط جاهایی را که برای صفر-کپی ضروری بود، دقیق کردم.


1) Python (tracker_py_ipc.py) — صفر-کپی واقعی روی SHM

python
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import struct import time import numpy as np import cv2 from deep_sort_realtime.deepsort_tracker import DeepSort import torch # ====== POSIX IPC ====== try: import posix_ipc from multiprocessing import shared_memory except ModuleNotFoundError: print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc") raise # ===== نام‌ها باید با C++ یکی باشند ===== FRAME_SHM_NAME = "/shm_yolo_frames" TRACKS_SHM_NAME = "/shm_tracks" SEM_FRAME_READY = "/sem_frame_ready" SEM_TRACKS_READY = "/sem_tracks_ready" MAX_DETS = 512 MAX_TRACKS = 512 # ========= ساختارها (Little-Endian) ========= # نکته بسیار مهم: در C++ داخل FrameHeader یک فیلد bool به نام frame_ready دارید (با #pragma pack(push,1)). # پس اندازه هدر 5*4 + 1 = 21 بایت می‌شود. اگر این را لحاظ نکنیم آفست‌ها 1 بایت جابجا می‌شوند. HDR_FMT = "<iiiiib" # width, height, channels, frame_id, num_dets, frame_ready(bool, 1 byte) DET_FMT = "<fffffi" # x, y, w, h, conf, class_id TRK_HDR_FMT = "<ii" # num_tracks, frame_id TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id HDR_SIZE = struct.calcsize(HDR_FMT) # = 21 DET_SIZE = struct.calcsize(DET_FMT) TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT) TRK_SIZE = struct.calcsize(TRK_FMT) def open_or_create_semaphores(): print("---------- open_or_create_semaphores ----------") print("start") t0 = time.time() sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0) sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0) t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") return sem_frame_ready, sem_tracks_ready def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2): t0 = time.time() last_err = None while time.time() - t0 < timeout_s: try: return shared_memory.SharedMemory(name=name) except FileNotFoundError as e: last_err = e time.sleep(retry_s) raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامه‌ی C++ را اجرا کرده‌اید.") from last_err def main(): print("---------- main before while ----------") print("start") t0 = time.time() print("🚀 Python tracker starting (Deep SORT + ReID) ...") sem_frame_ready, sem_tracks_ready = open_or_create_semaphores() print("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) # یک memoryview پایدار از کل بافر می‌گیریم تا در حلقه هیچ آبجکت موقتی نسازیم. buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") # آفست ثابت بخش تصویر در SHM: دقیقا بعد از هدر و آرایه detection dets_start = HDR_SIZE img_offset_base = HDR_SIZE + MAX_DETS * DET_SIZE while True: print("---------- main while ----------") print("start") t0 = time.time() sem_frame_ready.acquire() t_start = time.time() # header (بدون کپی) width, height, channels, frame_id, num_dets, frame_ready = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections (بدون کپی) detections = [] for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame (صفر-کپی واقعی: NumPy فقط یک نما روی SHM می‌سازد) buf_start = time.time() img_nbytes = width * height * channels # ترفند: هیچ slicing روی memoryview نمی‌کنیم تا کلاً zero-copy بماند. frame = np.ndarray( shape=(height, width, channels), dtype=np.uint8, buffer=buf_f, # کل بافر SHM offset=img_offset_base # ابتدای تصویر ) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks (بدون کپی‌های اضافی) num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms") if __name__ == "__main__": main()

چه عوض شد؟

  • هدر Python با C++ همسان شد: HDR_FMT = "<iiiiib" تا آن bool حساب شود و آفست تصویر دقیق باشد.
  • فریم دقیقاً «صفر-کپی» ساخته می‌شود:
    python
    frame = np.ndarray((h,w,c), dtype=np.uint8, buffer=buf_f, offset=img_offset_base)
    هیچ slice یا frombuffer واسطی که احتمال کپی بدهد استفاده نشده است.
  • باقی ساختار و لاگ‌ها دست‌نخورده ماندند.

2) C++ (main_ipc_final.cpp) — بدون تغییر ساختاری، فقط ست‌کردن frame_ready

ساختار شما خوب بود. تنها یک نکتهٔ کوچک: همگام با Python جدید، مقدار frame_ready را (برای وضوح) ست می‌کنیم. بقیه عین قبل است.

cpp
// main_ipc_final.cpp #include <opencv2/opencv.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> #include <vector> #include <string> #include <chrono> #include <iomanip> #include <cstring> // ==== POSIX IPC ==== #include <fcntl.h> #include <sys/mman.h> #include <sys/stat.h> #include <semaphore.h> #include <unistd.h> static const char* SHM_FRAME_NAME = "/shm_yolo_frames"; static const char* SHM_TRACKS_NAME = "/shm_tracks"; static const char* SEM_FRAME_READY = "/sem_frame_ready"; static const char* SEM_TRACKS_READY = "/sem_tracks_ready"; static const int MAX_DETS = 512; static const int MAX_TRACKS = 512; #pragma pack(push, 1) struct FrameHeader { int32_t width; int32_t height; int32_t channels; int32_t frame_id; int32_t num_dets; bool frame_ready; // 1 byte (به‌خاطر pack(1)) }; struct SharedDet { float x, y, w, h; // tlwh float conf; int32_t class_id; }; struct TracksHeader { int32_t num_tracks; int32_t frame_id; }; struct TrackRec { int32_t track_id; float x, y, w, h; // tlwh float conf; int32_t class_id; }; #pragma pack(pop) class YOLOv8Detector { public: int64 t2 = cv::getTickCount(); YOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; } private: cv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } }; int main(int argc, char* argv[]) { int64 t6 = cv::getTickCount(); if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar trk_color(230,216,173); const cv::Scalar trk_text_color(0,0,0); size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // پر کردن هدر + دیتکشن‌ها hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } // کپیِ ناگزیر از بافر دیکودر به SHM (بین‌پردازه‌ای صفر-کپی است) std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); // سیگنال‌دهی hdr->frame_ready = true; auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // ویژوالیزه (اختیاری) cv::Mat vis = frame.clone(); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } // خروجی نمونه char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; hdr_end->frame_ready = false; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ unlink را معمولاً بعد از اتمام Python بزنید؛ اینجا طبق الگوی فعلی شما نگه‌داشتیم. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0; }

نکته: اگر روزی خواستی کپی داخل C++ را هم حذف کنی، باید دیکودر (FFmpeg/کدک GPU) بتواند مستقیم در بافر img_ptr بنویسد (API با بافر کاربر). VideoCapture این قابلیت را ندارد؛ بنابراین فعلاً همین یک کپی ناگزیر باقی می‌ماند، ولی مرز بین پردازه‌ها کاملاً صفر-کپی است.


3) Makefile

makefile
# Makefile CXX := g++ CXXFLAGS := -O3 -std=c++17 -Wall -Wextra OPENCV_CFLAGS := $(shell pkg-config --cflags opencv4) OPENCV_LIBS := $(shell pkg-config --libs opencv4) # برای shm/semaphore روی لینوکس LIBS := -pthread -lrt TARGET := yolo_deepsort_ipc SRC := main_ipc_final.cpp all: $(TARGET) $(TARGET): $(SRC) $(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o $@ $^ $(OPENCV_LIBS) $(LIBS) clean: rm -f $(TARGET) .PHONY: all clean

4) روش اجرا

  1. پیش‌نیازها (Ubuntu/Debian مثال):
bash
sudo apt-get update sudo apt-get install -y build-essential pkg-config libopencv-dev python3-opencv python3-pip python3 -m pip install posix_ipc deep-sort-realtime torch torchvision --extra-index-url https://download.pytorch.org/whl/cu121
  1. ساخت پوشه خروجی فریم‌ها (Optional):
bash
mkdir -p frames
  1. کامپایل C++:
bash
make # خروجی: ./yolo_deepsort_ipc
  1. اجرای Python (ترکر): در یک ترمینال:
bash
python3 tracker_py_ipc.py
  1. اجرای C++ (دیتکتور + فرستندهٔ SHM): در ترمینال دیگر:
bash
./yolo_deepsort_ipc /path/to/yolov8n.onnx /path/to/video.mp4 /path/to/coco.names

ترتیب اجرا مهم نیست؛ Python اگر SharedMemory را پیدا نکند، تا ۵ دقیقه منتظر می‌ماند.
اگر خطای دسترسی به /dev/shm داشتی، با sudo اجرا نکن؛ زیرا نام‌های POSIX با کاربر متفاوت می‌شوند.


خلاصهٔ تغییرات کلیدی برای «صفر-کپی» Python

  • هم‌تراز کردن دقیق هدر با C++ (حساب کردن bool).
  • ساخت مستقیم np.ndarray روی بافر SHM با buffer و offset (بدون slice/کپی):
    python
    frame = np.ndarray((h,w,c), dtype=np.uint8, buffer=buf_f, offset=img_offset_base)
  • هیچ img_bytes = buf_f[...] و np.frombuffer(...) واسطی که احتمال کپی دهد نداریم.

اگر خواستی همین رو به نسخهٔ GPU (cuda/cudacodec) یا ffmpeg تغییر بدی تا همان کپی داخل C++ هم حذف شود، خبر بده تا یک پیاده‌سازی Producer که مستقیم در SHM می‌نویسد برات آماده کنم.

مشاركة هذا السؤال والجواب