کد زیر کاملا درست کار میکند اما من احتیاج دارم که...
Créé le : 3 septembre 2025
Répondu en utilisant GPT-5 Thinking par Chat01
Créé le : 3 septembre 2025
Répondu en utilisant GPT-5 Thinking par Chat01
کد زیر کاملا درست کار میکند
اما من احتیاج دارم که تصاویر frame ها را با pointer از c++ به python بفرستم با sharedmemory و هیچگونه copy اتفاق نیوفتد نکته اصلی این است
پس هدف اصلی این است که بدون کپی کردن frame انرا برای python بفرستد
makefile را هم بنویس و بگو چگونه کد هارا اجرا کنم
به بقیه ساختار کد دست نزن
کد نهایی را کامل بنویس
#!/usr/bin/env python3
import struct
import time
import numpy as np
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch
try:
import posix_ipc
from multiprocessing import shared_memory
except ModuleNotFoundError:
print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc")
raise
FRAME_SHM_NAME = "/shm_yolo_frames"
TRACKS_SHM_NAME = "/shm_tracks"
SEM_FRAME_READY = "/sem_frame_ready"
SEM_TRACKS_READY = "/sem_tracks_ready"
MAX_DETS = 512
MAX_TRACKS = 512
HDR_FMT = "<iiiii" # width, height, channels, frame_id, num_dets
DET_FMT = "<fffffi" # x, y, w, h, conf, class_id
TRK_HDR_FMT = "<ii" # num_tracks, frame_id
TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id
HDR_SIZE = struct.calcsize(HDR_FMT)
DET_SIZE = struct.calcsize(DET_FMT)
TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT)
TRK_SIZE = struct.calcsize(TRK_FMT)
def open_or_create_semaphores():
print("---------- open_or_create_semaphores ----------")
print("start")
t0 = time.time()
sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0)
sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0)
t1 = (time.time() - t0) * 1000
print("time : ", t1)
print("end")
return sem_frame_ready, sem_tracks_ready
def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2):
t0 = time.time()
last_err = None
while time.time() - t0 < timeout_s:
try:
return shared_memory.SharedMemory(name=name)
except FileNotFoundError as e:
last_err = e
time.sleep(retry_s)
raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامهی C++ را اجرا کردهاید.") from last_err
def main():
print("---------- main before while ----------")
print("start")
t0 = time.time()
print("🚀 Python tracker starting (Deep SORT + ReID) ...")
sem_frame_ready, sem_tracks_ready = open_or_create_semaphores()
textprint("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, # سریع confirm شود nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() # torch.cuda.synchronize() # if using PyTorch sem_frame_ready.acquire() t_start = time.time() # header width, height, channels, frame_id, num_dets = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE img_nbytes = width * height * channels img_bytes = buf_f[img_offset: img_offset + img_nbytes] frame = np.frombuffer(img_bytes, dtype=np.uint8).reshape((height, width, channels)) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame (نتیجهٔ tracker + زمان tracker) total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms")
if name == "main":
main()
// main_ipc_final.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <chrono>
#include <iomanip>
#include <cstring>
// ==== POSIX IPC ====
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>
static const char* SHM_FRAME_NAME = "/shm_yolo_frames";
static const char* SHM_TRACKS_NAME = "/shm_tracks";
static const char* SEM_FRAME_READY = "/sem_frame_ready";
static const char* SEM_TRACKS_READY = "/sem_tracks_ready";
static const int MAX_DETS = 512;
static const int MAX_TRACKS = 512;
#pragma pack(push, 1)
struct FrameHeader {
int32_t width;
int32_t height;
int32_t channels;
int32_t frame_id;
int32_t num_dets;
//added by me
bool frame_ready;
//added by me
};
struct SharedDet {
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
struct TracksHeader {
int32_t num_tracks;
int32_t frame_id;
};
struct TrackRec {
int32_t track_id;
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
#pragma pack(pop)
class YOLOv8Detector {
public:
int64 t2 = cv::getTickCount();
textYOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); // صورتی (BGR): RGB(255,105,180) -> BGR(180,105,255) const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; }
private:
//int64 t4 = cv::getTickCount();
textcv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } //int64 t5 = cv::getTickCount(); //double time_private = (t4 - t5) * 1000.0 / cv::getTickFrequency(); //std::cout << "---------- class YOLOv8Detector private start ----------" << std::endl; //std::cout << "time : " << time_private << std::endl; //std::cout << "---------- class YOLOv8Detector private end ----------" << std::endl;
};
int main(int argc, char* argv[]) {
int64 t6 = cv::getTickCount();
textif (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); // ← آستانه کمتر برای پایداری cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); // صورتی const cv::Scalar trk_color(230,216,173); // آبی کمرنگ (BGR) const cv::Scalar trk_text_color(0,0,0); // سربار SHM (محاسبه و چاپ یکبار) size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // نوشتن به SHM hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; // کمی سخاوتمندانهتر int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: اول det (صورتی)، بعد track (آبی کمرنگ + ID) cv::Mat vis = detector.drawDetections(frame, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); // چاپ per-frame: det و trk و جمع double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); // جمعبندی زمانها auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0;
}
//g++ main_ipc_optimized.cpp -o yolo_deepsort_ipc_optimized pkg-config --cflags --libs opencv4
//./yolo_deepsort_ipc_optimized /home/rayfan/Documents/cpp_projects/shared_optimize/yolov8n.onnx /media/rayfan/d/videos/9600+6400/output.avi /home/rayfan/Documents/cpp_projects/shared_optimize/coco.names
خیلی خب — هدف اینه که «فریمِ تصویر» واقعا Zero-Copy بین ++C و Python جابهجا بشه.
کاری که کردم:
در ++C بهجای memcpy، یک cv::Mat مستقیما روی بافرِ Shared Memory میسازیم و همان را با VideoCapture::read پُر میکنیم. یعنی خود دیکُدِ ویدیو مستقیم میرود داخل SHM (بدون کپی اضافی).
در Python هم numpy.ndarray را با offset روی همان بافر میسازیم (نه کپی).
برای یکیبودن ABI، همان فیلد bool frame_ready که در هدر ++C داشتید را در پایتون هم لحاظ کردم (بهصورت uint8) تا اندازهٔ هدر در هر دو طرف دقیقاً یکی باشد.
🔧 بقیهٔ ساختار برنامه را دست نزدم؛ فقط همان نقاط لازم برای Zero-Copy را تغییر دادم و یک Makefile هم اضافه کردم.
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- import struct import time import numpy as np import cv2 from deep_sort_realtime.deepsort_tracker import DeepSort import torch # ====== POSIX IPC ====== try: import posix_ipc from multiprocessing import shared_memory except ModuleNotFoundError: print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc") raise # ===== نامها باید با C++ یکی باشند ===== FRAME_SHM_NAME = "/shm_yolo_frames" TRACKS_SHM_NAME = "/shm_tracks" SEM_FRAME_READY = "/sem_frame_ready" SEM_TRACKS_READY = "/sem_tracks_ready" MAX_DETS = 512 MAX_TRACKS = 512 # Structures (LE) # نکتهٔ مهم: در C++ داخل FrameHeader یک bool بهنام frame_ready دارید؛ # اینجا هم آن را بهصورت uint8 میخوانیم تا اندازهٔ هدر دقیقاً یکی شود. HDR_FMT = "<iiiiiB" # width, height, channels, frame_id, num_dets, frame_ready(u8) DET_FMT = "<fffffi" # x, y, w, h, conf, class_id TRK_HDR_FMT = "<ii" # num_tracks, frame_id TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id HDR_SIZE = struct.calcsize(HDR_FMT) DET_SIZE = struct.calcsize(DET_FMT) TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT) TRK_SIZE = struct.calcsize(TRK_FMT) def open_or_create_semaphores(): print("---------- open_or_create_semaphores ----------") print("start") t0 = time.time() sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0) sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0) t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") return sem_frame_ready, sem_tracks_ready def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2): t0 = time.time() last_err = None while time.time() - t0 < timeout_s: try: return shared_memory.SharedMemory(name=name) except FileNotFoundError as e: last_err = e time.sleep(retry_s) raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامهی C++ را اجرا کردهاید.") from last_err def main(): print("---------- main before while ----------") print("start") t0 = time.time() print("🚀 Python tracker starting (Deep SORT + ReID) ...") sem_frame_ready, sem_tracks_ready = open_or_create_semaphores() print("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT tracker = DeepSort( max_age=40, n_init=3, nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() sem_frame_ready.acquire() t_start = time.time() # ----- header ----- width, height, channels, frame_id, num_dets, frame_ready = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # ----- detections (view روی همان shm) ----- detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # ----- frame: Zero-Copy ----- buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE # هیچ کپیای انجام نمیشود؛ این آرایه مستقیماً روی shm است: frame = np.ndarray( shape=(height, width, channels), dtype=np.uint8, buffer=buf_f, offset=img_offset ) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # ----- track ----- trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms") if __name__ == "__main__": main()
cpp// main_ipc_final.cpp #include <opencv2/opencv.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> #include <vector> #include <string> #include <chrono> #include <iomanip> #include <cstring> // ==== POSIX IPC ==== #include <fcntl.h> #include <sys/mman.h> #include <sys/stat.h> #include <semaphore.h> #include <unistd.h> static const char* SHM_FRAME_NAME = "/shm_yolo_frames"; static const char* SHM_TRACKS_NAME = "/shm_tracks"; static const char* SEM_FRAME_READY = "/sem_frame_ready"; static const char* SEM_TRACKS_READY = "/sem_tracks_ready"; static const int MAX_DETS = 512; static const int MAX_TRACKS = 512; #pragma pack(push, 1) struct FrameHeader { int32_t width; int32_t height; int32_t channels; int32_t frame_id; int32_t num_dets; bool frame_ready; // همان چیزی که در پایتون هم با B خوانده میشود }; struct SharedDet { float x, y, w, h; // tlwh float conf; int32_t class_id; }; struct TracksHeader { int32_t num_tracks; int32_t frame_id; }; struct TrackRec { int32_t track_id; float x, y, w, h; // tlwh float conf; int32_t class_id; }; #pragma pack(pop) class YOLOv8Detector { public: int64 t2 = cv::getTickCount(); YOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; } private: cv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } }; int main(int argc, char* argv[]) { int64 t6 = cv::getTickCount(); if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); const cv::Scalar trk_color(230,216,173); const cv::Scalar trk_text_color(0,0,0); // ماتریسِ فریم مستقیماً روی بافر SHM (Zero-Copy) cv::Mat frame_shm(height, width, CV_8UC3, img_ptr); size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); while (true) { int64 t8 = cv::getTickCount(); // ⚡ دیکود مستقیم داخل SHM (بدون memcpy) bool ok = cap.read(frame_shm); if (!ok || frame_shm.empty()) break; double det_ms = 0.0; auto dets = detector.detect(frame_shm, det_ms); total_det_ms += det_ms; // نوشتن به SHM (header + dets) — تصویر از قبل در img_ptr است hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); hdr->frame_ready = true; for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } // به Python بگو فریم حاضر است auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: det (صورتی) + tracks (آبی کمرنگ) cv::Mat vis = detector.drawDetections(frame_shm, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; hdr_end->frame_ready=false; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0; }
makefile# Makefile CXX ?= g++ PKGCONFIG ?= pkg-config OPENCV_CFLAGS := $(shell $(PKGCONFIG) --cflags opencv4) OPENCV_LIBS := $(shell $(PKGCONFIG) --libs opencv4) CXXFLAGS += -O3 -std=c++17 -DNDEBUG $(OPENCV_CFLAGS) LDFLAGS += $(OPENCV_LIBS) -pthread -lrt TARGET = yolo_deepsort_ipc SRC = main_ipc_final.cpp all: $(TARGET) $(TARGET): $(SRC) $(CXX) $(CXXFLAGS) -o $@ $^ $(LDFLAGS) clean: rm -f $(TARGET)
bashsudo apt-get update sudo apt-get install -y build-essential pkg-config libopencv-dev python3 -m pip install --upgrade pip python3 -m pip install posix_ipc numpy opencv-python-headless deep_sort_realtime torch torchvision torchaudio
bashmake
bashmkdir -p frames
bash./yolo_deepsort_ipc /path/to/yolov8n.onnx /path/to/video.mp4 /path/to/coco.names
bashpython3 tracker_py.py
اگر GPU CUDA ندارید مشکلی نیست؛ کُد خودکار روی CPU میافتد.
++C: با
cppcv::Mat frame_shm(height, width, CV_8UC3, img_ptr); cap.read(frame_shm);
دادهِ تصویر مستقیماً داخل بافرِ Shared Memory دیکُد میشود. هیچ memcpy اضافهای نداریم.
Python: با
pythonframe = np.ndarray((h,w,c), dtype=np.uint8, buffer=buf_f, offset=img_offset)
آرایهٔ NumPy صرفاً نمایش (view) روی همان بافرِ SHM است؛ نه کپی.
تنها کپیِ اختیاری، همان
frame.clone()در تابع رسمِ ++C برای Visualization است (برای اینکه روی تصویر خامِ SHM خط نکشیم). این به pipeline بین ++C ↔ Python مربوط نیست.
موفق باشید! اگر بخواهید، میتوانم نسخهٔ lock-free با حلقهٔ چندبافر و بدون صبر روی semaphore هم براتون آماده کنم.