کد زیر کاملا درست کار میکند اما من احتیاج دارم که...
Создано: 3 сентября 2025 г.
Отвечено с использованием GPT-5 Thinking от Chat01
Создано: 3 сентября 2025 г.
Отвечено с использованием GPT-5 Thinking от Chat01
کد زیر کاملا درست کار میکند
اما من احتیاج دارم که تصاویر frame ها را با pointer از c++ به python بفرستم با sharedmemory و هیچگونه copy اتفاق نیوفتد نکته اصلی این است
پس هدف اصلی این است که بدون کپی کردن frame انرا برای python بفرستد
makefile را هم بنویس و بگو چگونه کد هارا اجرا کنم
به بقیه ساختار کد دست نزن
کد نهایی را کامل بنویس
#!/usr/bin/env python3
import struct
import time
import numpy as np
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch
try:
import posix_ipc
from multiprocessing import shared_memory
except ModuleNotFoundError:
print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc")
raise
FRAME_SHM_NAME = "/shm_yolo_frames"
TRACKS_SHM_NAME = "/shm_tracks"
SEM_FRAME_READY = "/sem_frame_ready"
SEM_TRACKS_READY = "/sem_tracks_ready"
MAX_DETS = 512
MAX_TRACKS = 512
HDR_FMT = "<iiiii" # width, height, channels, frame_id, num_dets
DET_FMT = "<fffffi" # x, y, w, h, conf, class_id
TRK_HDR_FMT = "<ii" # num_tracks, frame_id
TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id
HDR_SIZE = struct.calcsize(HDR_FMT)
DET_SIZE = struct.calcsize(DET_FMT)
TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT)
TRK_SIZE = struct.calcsize(TRK_FMT)
def open_or_create_semaphores():
print("---------- open_or_create_semaphores ----------")
print("start")
t0 = time.time()
sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0)
sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0)
t1 = (time.time() - t0) * 1000
print("time : ", t1)
print("end")
return sem_frame_ready, sem_tracks_ready
def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2):
t0 = time.time()
last_err = None
while time.time() - t0 < timeout_s:
try:
return shared_memory.SharedMemory(name=name)
except FileNotFoundError as e:
last_err = e
time.sleep(retry_s)
raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامهی C++ را اجرا کردهاید.") from last_err
def main():
print("---------- main before while ----------")
print("start")
t0 = time.time()
print("🚀 Python tracker starting (Deep SORT + ReID) ...")
sem_frame_ready, sem_tracks_ready = open_or_create_semaphores()
textprint("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, # سریع confirm شود nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() # torch.cuda.synchronize() # if using PyTorch sem_frame_ready.acquire() t_start = time.time() # header width, height, channels, frame_id, num_dets = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE img_nbytes = width * height * channels img_bytes = buf_f[img_offset: img_offset + img_nbytes] frame = np.frombuffer(img_bytes, dtype=np.uint8).reshape((height, width, channels)) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame (نتیجهٔ tracker + زمان tracker) total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms")
if name == "main":
main()
// main_ipc_final.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <chrono>
#include <iomanip>
#include <cstring>
// ==== POSIX IPC ====
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>
static const char* SHM_FRAME_NAME = "/shm_yolo_frames";
static const char* SHM_TRACKS_NAME = "/shm_tracks";
static const char* SEM_FRAME_READY = "/sem_frame_ready";
static const char* SEM_TRACKS_READY = "/sem_tracks_ready";
static const int MAX_DETS = 512;
static const int MAX_TRACKS = 512;
#pragma pack(push, 1)
struct FrameHeader {
int32_t width;
int32_t height;
int32_t channels;
int32_t frame_id;
int32_t num_dets;
//added by me
bool frame_ready;
//added by me
};
struct SharedDet {
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
struct TracksHeader {
int32_t num_tracks;
int32_t frame_id;
};
struct TrackRec {
int32_t track_id;
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
#pragma pack(pop)
class YOLOv8Detector {
public:
int64 t2 = cv::getTickCount();
textYOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); // صورتی (BGR): RGB(255,105,180) -> BGR(180,105,255) const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; }
private:
//int64 t4 = cv::getTickCount();
textcv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } //int64 t5 = cv::getTickCount(); //double time_private = (t4 - t5) * 1000.0 / cv::getTickFrequency(); //std::cout << "---------- class YOLOv8Detector private start ----------" << std::endl; //std::cout << "time : " << time_private << std::endl; //std::cout << "---------- class YOLOv8Detector private end ----------" << std::endl;
};
int main(int argc, char* argv[]) {
int64 t6 = cv::getTickCount();
textif (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); // ← آستانه کمتر برای پایداری cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); // صورتی const cv::Scalar trk_color(230,216,173); // آبی کمرنگ (BGR) const cv::Scalar trk_text_color(0,0,0); // سربار SHM (محاسبه و چاپ یکبار) size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // نوشتن به SHM hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; // کمی سخاوتمندانهتر int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: اول det (صورتی)، بعد track (آبی کمرنگ + ID) cv::Mat vis = detector.drawDetections(frame, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); // چاپ per-frame: det و trk و جمع double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); // جمعبندی زمانها auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0;
}
//g++ main_ipc_optimized.cpp -o yolo_deepsort_ipc_optimized pkg-config --cflags --libs opencv4
//./yolo_deepsort_ipc_optimized /home/rayfan/Documents/cpp_projects/shared_optimize/yolov8n.onnx /media/rayfan/d/videos/9600+6400/output.avi /home/rayfan/Documents/cpp_projects/shared_optimize/coco.names
کد زیر کاملا درست کار میکند
اما من احتیاج دارم که تصاویر frame ها را با pointer از c++ به python بفرستم با sharedmemory و هیچگونه copy اتفاق نیوفتد نکته اصلی این است
پس هدف اصلی این است که بدون کپی کردن frame انرا برای python بفرستد
makefile را هم بنویس و بگو چگونه کد هارا اجرا کنم
به بقیه ساختار کد دست نزن
کد نهایی را کامل بنویس
#!/usr/bin/env python3
import struct
import time
import numpy as np
import cv2
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch
try:
import posix_ipc
from multiprocessing import shared_memory
except ModuleNotFoundError:
print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc")
raise
FRAME_SHM_NAME = "/shm_yolo_frames"
TRACKS_SHM_NAME = "/shm_tracks"
SEM_FRAME_READY = "/sem_frame_ready"
SEM_TRACKS_READY = "/sem_tracks_ready"
MAX_DETS = 512
MAX_TRACKS = 512
HDR_FMT = "<iiiii" # width, height, channels, frame_id, num_dets
DET_FMT = "<fffffi" # x, y, w, h, conf, class_id
TRK_HDR_FMT = "<ii" # num_tracks, frame_id
TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id
HDR_SIZE = struct.calcsize(HDR_FMT)
DET_SIZE = struct.calcsize(DET_FMT)
TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT)
TRK_SIZE = struct.calcsize(TRK_FMT)
def open_or_create_semaphores():
print("---------- open_or_create_semaphores ----------")
print("start")
t0 = time.time()
sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0)
sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0)
t1 = (time.time() - t0) * 1000
print("time : ", t1)
print("end")
return sem_frame_ready, sem_tracks_ready
def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2):
t0 = time.time()
last_err = None
while time.time() - t0 < timeout_s:
try:
return shared_memory.SharedMemory(name=name)
except FileNotFoundError as e:
last_err = e
time.sleep(retry_s)
raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامهی C++ را اجرا کردهاید.") from last_err
def main():
print("---------- main before while ----------")
print("start")
t0 = time.time()
print("🚀 Python tracker starting (Deep SORT + ReID) ...")
sem_frame_ready, sem_tracks_ready = open_or_create_semaphores()
textprint("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, # سریع confirm شود nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") while True: print("---------- main while ----------") print("start") t0 = time.time() # torch.cuda.synchronize() # if using PyTorch sem_frame_ready.acquire() t_start = time.time() # header width, height, channels, frame_id, num_dets = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections detections = [] dets_start = HDR_SIZE for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame buf_start = time.time() img_offset = HDR_SIZE + MAX_DETS * DET_SIZE img_nbytes = width * height * channels img_bytes = buf_f[img_offset: img_offset + img_nbytes] frame = np.frombuffer(img_bytes, dtype=np.uint8).reshape((height, width, channels)) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame (نتیجهٔ tracker + زمان tracker) total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms")
if name == "main":
main()
// main_ipc_final.cpp
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <chrono>
#include <iomanip>
#include <cstring>
// ==== POSIX IPC ====
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <semaphore.h>
#include <unistd.h>
static const char* SHM_FRAME_NAME = "/shm_yolo_frames";
static const char* SHM_TRACKS_NAME = "/shm_tracks";
static const char* SEM_FRAME_READY = "/sem_frame_ready";
static const char* SEM_TRACKS_READY = "/sem_tracks_ready";
static const int MAX_DETS = 512;
static const int MAX_TRACKS = 512;
#pragma pack(push, 1)
struct FrameHeader {
int32_t width;
int32_t height;
int32_t channels;
int32_t frame_id;
int32_t num_dets;
//added by me
bool frame_ready;
//added by me
};
struct SharedDet {
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
struct TracksHeader {
int32_t num_tracks;
int32_t frame_id;
};
struct TrackRec {
int32_t track_id;
float x, y, w, h; // tlwh
float conf;
int32_t class_id;
};
#pragma pack(pop)
class YOLOv8Detector {
public:
int64 t2 = cv::getTickCount();
textYOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); // صورتی (BGR): RGB(255,105,180) -> BGR(180,105,255) const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; }
private:
//int64 t4 = cv::getTickCount();
textcv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } //int64 t5 = cv::getTickCount(); //double time_private = (t4 - t5) * 1000.0 / cv::getTickFrequency(); //std::cout << "---------- class YOLOv8Detector private start ----------" << std::endl; //std::cout << "time : " << time_private << std::endl; //std::cout << "---------- class YOLOv8Detector private end ----------" << std::endl;
};
int main(int argc, char* argv[]) {
int64 t6 = cv::getTickCount();
textif (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); // ← آستانه کمتر برای پایداری cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); // پویینترهای SHM auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar det_color(180,105,255); // صورتی const cv::Scalar trk_color(230,216,173); // آبی کمرنگ (BGR) const cv::Scalar trk_text_color(0,0,0); // سربار SHM (محاسبه و چاپ یکبار) size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // نوشتن به SHM hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; // کمی سخاوتمندانهتر int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // رسم: اول det (صورتی)، بعد track (آبی کمرنگ + ID) cv::Mat vis = detector.drawDetections(frame, dets); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); // چاپ per-frame: det و trk و جمع double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; if (use_writer) writer.write(vis); cv::imshow("YOLOv8 (pink) + DeepSORT (light-blue)", vis); char k = (char)cv::waitKey(1); if (k=='q' || k==27) break; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه به Python auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); // جمعبندی زمانها auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ پیشنهاد: unlink را بگذارید بعد از اتمام Python. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0;
}
//g++ main_ipc_optimized.cpp -o yolo_deepsort_ipc_optimized pkg-config --cflags --libs opencv4
//./yolo_deepsort_ipc_optimized /home/rayfan/Documents/cpp_projects/shared_optimize/yolov8n.onnx /media/rayfan/d/videos/9600+6400/output.avi /home/rayfan/Documents/cpp_projects/shared_optimize/coco.names
عالی—ایدهات کاملاً شدنیه: «صفر-کپی بین C++ و Python».
در C++، تنها کپیِ ناگزیر همون انتقال خروجیِ دیکودر (VideoCapture) به بافرِ Shared Memory است؛ ولی بین دو پردازه (C++→Python) هیچ کپیای رخ نمیده. در Python هم آرایهی NumPy را مستقیم روی حافظهٔ مشترک میسازیم (نه با برشِ موقّت). همچنین با ساختار هدر شما همراستا شدیم (آن bool frame_ready را هم دقیق خواندیم) تا آفستها ۱ بایت جابهجا نشوند.
در ادامه «کد نهایی کامل»، «Makefile»، و «راهنمای اجرا» را میبینی. طبق خواستهات به بقیهٔ ساختار دست نزدم و فقط جاهایی را که برای صفر-کپی ضروری بود، دقیق کردم.
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- import struct import time import numpy as np import cv2 from deep_sort_realtime.deepsort_tracker import DeepSort import torch # ====== POSIX IPC ====== try: import posix_ipc from multiprocessing import shared_memory except ModuleNotFoundError: print("❌ posix_ipc نصب نیست. نصب کنید: python3 -m pip install posix_ipc") raise # ===== نامها باید با C++ یکی باشند ===== FRAME_SHM_NAME = "/shm_yolo_frames" TRACKS_SHM_NAME = "/shm_tracks" SEM_FRAME_READY = "/sem_frame_ready" SEM_TRACKS_READY = "/sem_tracks_ready" MAX_DETS = 512 MAX_TRACKS = 512 # ========= ساختارها (Little-Endian) ========= # نکته بسیار مهم: در C++ داخل FrameHeader یک فیلد bool به نام frame_ready دارید (با #pragma pack(push,1)). # پس اندازه هدر 5*4 + 1 = 21 بایت میشود. اگر این را لحاظ نکنیم آفستها 1 بایت جابجا میشوند. HDR_FMT = "<iiiiib" # width, height, channels, frame_id, num_dets, frame_ready(bool, 1 byte) DET_FMT = "<fffffi" # x, y, w, h, conf, class_id TRK_HDR_FMT = "<ii" # num_tracks, frame_id TRK_FMT = "<ifffffi" # track_id, x, y, w, h, conf, class_id HDR_SIZE = struct.calcsize(HDR_FMT) # = 21 DET_SIZE = struct.calcsize(DET_FMT) TRK_HDR_SIZE = struct.calcsize(TRK_HDR_FMT) TRK_SIZE = struct.calcsize(TRK_FMT) def open_or_create_semaphores(): print("---------- open_or_create_semaphores ----------") print("start") t0 = time.time() sem_frame_ready = posix_ipc.Semaphore(SEM_FRAME_READY, flags=posix_ipc.O_CREAT, initial_value=0) sem_tracks_ready = posix_ipc.Semaphore(SEM_TRACKS_READY, flags=posix_ipc.O_CREAT, initial_value=0) t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") return sem_frame_ready, sem_tracks_ready def wait_for_shared_memory(name, timeout_s=300, retry_s=0.2): t0 = time.time() last_err = None while time.time() - t0 < timeout_s: try: return shared_memory.SharedMemory(name=name) except FileNotFoundError as e: last_err = e time.sleep(retry_s) raise TimeoutError(f"SharedMemory {name} پیدا نشد؛ مطمئن شوید برنامهی C++ را اجرا کردهاید.") from last_err def main(): print("---------- main before while ----------") print("start") t0 = time.time() print("🚀 Python tracker starting (Deep SORT + ReID) ...") sem_frame_ready, sem_tracks_ready = open_or_create_semaphores() print("⏳ منتظر SharedMemory از سمت C++ ...") shm_frame = wait_for_shared_memory(FRAME_SHM_NAME) shm_tracks = wait_for_shared_memory(TRACKS_SHM_NAME) # یک memoryview پایدار از کل بافر میگیریم تا در حلقه هیچ آبجکت موقتی نسازیم. buf_f = memoryview(shm_frame.buf) buf_t = memoryview(shm_tracks.buf) print("✅ SharedMemory وصل شد.") # Deep SORT – تند و پایدار tracker = DeepSort( max_age=40, n_init=3, nn_budget=300, embedder="torchreid", half=False, bgr=True, max_cosine_distance=0.3, polygon=False ) print("✅ DeepSort initialised with ReID.") header_struct = struct.Struct(HDR_FMT) det_struct = struct.Struct(DET_FMT) track_header_struct = struct.Struct(TRK_HDR_FMT) track_struct = struct.Struct(TRK_FMT) # آمار frame_count = 0 total_track_time_ms = 0.0 t0_wall = time.time() t1 = (time.time() - t0) * 1000 print("time : ",t1) print("end") print("---------- main before while ----------") # آفست ثابت بخش تصویر در SHM: دقیقا بعد از هدر و آرایه detection dets_start = HDR_SIZE img_offset_base = HDR_SIZE + MAX_DETS * DET_SIZE while True: print("---------- main while ----------") print("start") t0 = time.time() sem_frame_ready.acquire() t_start = time.time() # header (بدون کپی) width, height, channels, frame_id, num_dets, frame_ready = header_struct.unpack_from(buf_f, 0) # پایان تمیز if frame_id == -1: print("👋 پایان از سمت C++ دریافت شد.") break # detections (بدون کپی) detections = [] for i in range(min(num_dets, MAX_DETS)): offset = dets_start + i * DET_SIZE x, y, w, h, conf, cls_id = det_struct.unpack_from(buf_f, offset) detections.append(([x, y, w, h], float(conf), int(cls_id))) # frame (صفر-کپی واقعی: NumPy فقط یک نما روی SHM میسازد) buf_start = time.time() img_nbytes = width * height * channels # ترفند: هیچ slicing روی memoryview نمیکنیم تا کلاً zero-copy بماند. frame = np.ndarray( shape=(height, width, channels), dtype=np.uint8, buffer=buf_f, # کل بافر SHM offset=img_offset_base # ابتدای تصویر ) buf_time = (time.time() - buf_start) * 1000 print("sharedmemory timeeeeee ====>> ", buf_time) # track trk_t0 = time.time() tracks = tracker.update_tracks(detections, frame=frame) trk_ms = (time.time() - trk_t0) * 1000.0 total_track_time_ms += trk_ms frame_count += 1 # جمع کردن خروجی معتبر packed = [] printable = [] for t in tracks: if (hasattr(t, "is_confirmed") and not t.is_confirmed()) or getattr(t, "time_since_update", 0) > 0: continue if hasattr(t, "to_tlwh"): x, y, w, h = map(float, t.to_tlwh(orig=True)) else: l, tt, r, b = t.to_ltrb() x, y, w, h = float(l), float(tt), float(r-l), float(b-tt) tid = int(getattr(t, "track_id", -1)) cls = int(getattr(t, "det_class", -1)) if getattr(t, "det_class", None) is not None else -1 conf = 1.0 if hasattr(t, "det_confidences"): dcs = getattr(t, "det_confidences") if isinstance(dcs, (list, tuple)) and len(dcs) > 0: conf = float(dcs[-1]) packed.append((tid, x, y, w, h, conf, cls)) printable.append(f"[trk id={tid} cls={cls} conf={conf:.2f} tlwh=({int(x)},{int(y)},{int(w)},{int(h)})]") # نوشتن به shm_tracks (بدون کپیهای اضافی) num_tracks = min(len(packed), MAX_TRACKS) track_header_struct.pack_into(buf_t, 0, num_tracks, frame_id) if num_tracks > 0: offset = TRK_HDR_SIZE for i in range(num_tracks): tid, x, y, w, h, conf, clsid = packed[i] track_struct.pack_into(buf_t, offset, tid, x, y, w, h, conf, clsid) offset += TRK_SIZE sem_tracks_ready.release() # چاپ per-frame total_ms = (time.time() - t_start) * 1000.0 print(f"PY frame={frame_id:05d} trk={trk_ms:6.2f}ms total_py={total_ms:6.2f}ms tracks={num_tracks} {', '.join(printable)}") t1 = (time.time() - t0) * 1000 print("time : ", t1) print("end") print("---------- main while ----------") # پاکسازی try: buf_f.release(); buf_t.release() shm_frame.close(); shm_tracks.close() except Exception as e: print(f"⚠️ Cleanup warning: {e}") avg_trk = total_track_time_ms / max(1, frame_count) wall_ms = (time.time() - t0_wall) * 1000.0 print(f"✅ Python tracker finished. avg_trk={avg_trk:.2f}ms over {frame_count} frames, wall={wall_ms:.1f}ms") if __name__ == "__main__": main()
چه عوض شد؟
HDR_FMT = "<iiiiib" تا آن bool حساب شود و آفست تصویر دقیق باشد.هیچpythonframe = np.ndarray((h,w,c), dtype=np.uint8, buffer=buf_f, offset=img_offset_base)
slice یا frombuffer واسطی که احتمال کپی بدهد استفاده نشده است.ساختار شما خوب بود. تنها یک نکتهٔ کوچک: همگام با Python جدید، مقدار
frame_readyرا (برای وضوح) ست میکنیم. بقیه عین قبل است.
cpp// main_ipc_final.cpp #include <opencv2/opencv.hpp> #include <opencv2/dnn.hpp> #include <iostream> #include <fstream> #include <vector> #include <string> #include <chrono> #include <iomanip> #include <cstring> // ==== POSIX IPC ==== #include <fcntl.h> #include <sys/mman.h> #include <sys/stat.h> #include <semaphore.h> #include <unistd.h> static const char* SHM_FRAME_NAME = "/shm_yolo_frames"; static const char* SHM_TRACKS_NAME = "/shm_tracks"; static const char* SEM_FRAME_READY = "/sem_frame_ready"; static const char* SEM_TRACKS_READY = "/sem_tracks_ready"; static const int MAX_DETS = 512; static const int MAX_TRACKS = 512; #pragma pack(push, 1) struct FrameHeader { int32_t width; int32_t height; int32_t channels; int32_t frame_id; int32_t num_dets; bool frame_ready; // 1 byte (بهخاطر pack(1)) }; struct SharedDet { float x, y, w, h; // tlwh float conf; int32_t class_id; }; struct TracksHeader { int32_t num_tracks; int32_t frame_id; }; struct TrackRec { int32_t track_id; float x, y, w, h; // tlwh float conf; int32_t class_id; }; #pragma pack(pop) class YOLOv8Detector { public: int64 t2 = cv::getTickCount(); YOLOv8Detector(const std::string& model_path, const std::string& class_file_path, float conf_thresh = 0.30f, float nms_thresh = 0.4f) : conf_threshold(conf_thresh), nms_threshold(nms_thresh), input_width(640), input_height(640) { std::cout << "🔄 Loading YOLOv8 model: " << model_path << std::endl; net = cv::dnn::readNet(model_path); try { net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); std::cout << "✅ Using GPU (CUDA) backend\n"; } catch (...) { net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); std::cout << "⚠️ Using CPU backend\n"; } loadClassNames(class_file_path); boxes.reserve(1000); confidences.reserve(1000); class_ids.reserve(1000); outputs.reserve(10); std::cout << "✅ YOLOv8 model loaded successfully!\n"; std::cout << "📊 Classes loaded: " << class_names.size() << "\n"; } struct Detection { cv::Rect bbox; float confidence; int class_id; std::string class_name; }; std::vector<Detection> detect(const cv::Mat& frame, double& inference_time_ms) { auto t0 = std::chrono::high_resolution_clock::now(); boxes.clear(); confidences.clear(); class_ids.clear(); outputs.clear(); cv::dnn::blobFromImage(frame, blob, 1.0/255.0, cv::Size(input_width, input_height), cv::Scalar(0,0,0), true, false, CV_32F); net.setInput(blob); net.forward(outputs, net.getUnconnectedOutLayersNames()); auto t1 = std::chrono::high_resolution_clock::now(); inference_time_ms = std::chrono::duration<double, std::milli>(t1 - t0).count(); return postProcess(frame, outputs); } cv::Mat drawDetections(const cv::Mat& frame, const std::vector<Detection>& detections) { cv::Mat result = frame.clone(); const cv::Scalar det_color(180,105,255); for (const auto& d : detections) { cv::rectangle(result, d.bbox, det_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "%s %.0f%%", d.class_name.c_str(), d.confidence*100.0f); int base=0; cv::Size sz = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 1, &base); cv::rectangle(result, {d.bbox.x, d.bbox.y - sz.height - 6}, {d.bbox.x + sz.width + 6, d.bbox.y}, det_color, cv::FILLED); cv::putText(result, lab, {d.bbox.x+3, d.bbox.y-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, {255,255,255}, 1); } int64 t3 = cv::getTickCount(); double time = (t3 - t2) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- class YOLOv8Detector public start ----------" << std::endl; std::cout << "time : " << time << std::endl; std::cout << "---------- class YOLOv8Detector public end ----------" << std::endl; return result; } private: cv::dnn::Net net; std::vector<std::string> class_names; float conf_threshold, nms_threshold; int input_width, input_height; cv::Mat blob; std::vector<cv::Mat> outputs; std::vector<cv::Rect> boxes; std::vector<float> confidences; std::vector<int> class_ids; void loadClassNames(const std::string& class_file_path) { std::ifstream file(class_file_path); std::string line; if (file.is_open()) { while (std::getline(file, line)) class_names.push_back(line); file.close(); } else { std::cout << "⚠️ Class file not found. Using fallback labels.\n"; class_names = {"obj"}; } } std::vector<Detection> postProcess(const cv::Mat& frame, const std::vector<cv::Mat>& outputs) { std::vector<Detection> detections; detections.reserve(100); float x_factor = frame.cols / static_cast<float>(input_width); float y_factor = frame.rows / static_cast<float>(input_height); for (const auto& out : outputs) { const float* data = (float*)out.data; int num_classes = out.size[1] - 4; int num_dets = out.size[2]; for (int i = 0; i < num_dets; ++i) { float cx = data[i]; float cy = data[i + num_dets]; float w = data[i + 2*num_dets]; float h = data[i + 3*num_dets]; float best = 0.f; int best_id = 0; for (int j = 4; j < 4 + num_classes; ++j) { float conf = data[i + j*num_dets]; if (conf > best) { best = conf; best_id = j - 4; } } if (best >= conf_threshold) { int x1 = (int)((cx - w/2) * x_factor); int y1 = (int)((cy - h/2) * y_factor); int ww = (int)(w * x_factor); int hh = (int)(h * y_factor); boxes.emplace_back(x1, y1, ww, hh); confidences.push_back(best); class_ids.push_back(best_id); } } } std::vector<int> idx; cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, idx); detections.reserve(idx.size()); for (int k : idx) { Detection d; d.bbox = boxes[k]; d.confidence = confidences[k]; d.class_id = class_ids[k]; d.class_name = (d.class_id>=0 && d.class_id<(int)class_names.size()) ? class_names[d.class_id] : "Unknown"; detections.push_back(std::move(d)); } return detections; } }; int main(int argc, char* argv[]) { int64 t6 = cv::getTickCount(); if (argc < 3) { std::cout << "Usage: " << argv[0] << " <model.onnx> <video.mp4> [coco.names]\n"; return -1; } std::string model_path = argv[1]; std::string video_path = argv[2]; std::string class_file = (argc>3)? argv[3] : ""; try { YOLOv8Detector detector(model_path, class_file, 0.30f, 0.4f); cv::VideoCapture cap(video_path); if (!cap.isOpened()) { std::cerr << "❌ Could not open video\n"; return -1; } cap.set(cv::CAP_PROP_BUFFERSIZE, 1); int fps = (int)cap.get(cv::CAP_PROP_FPS); int width = (int)cap.get(cv::CAP_PROP_FRAME_WIDTH); int height = (int)cap.get(cv::CAP_PROP_FRAME_HEIGHT); int total = (int)cap.get(cv::CAP_PROP_FRAME_COUNT); int channels = 3; std::cout << "📹 " << width << "x" << height << " @" << fps << "fps, total " << total << "\n"; // ===== Shared memory ===== size_t frame_bytes = (size_t)width * height * channels; size_t shm_frame_size = sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet) + frame_bytes; int shm_fd_frames = shm_open(SHM_FRAME_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_frames == -1) { perror("shm_open frame"); return -1; } if (ftruncate(shm_fd_frames, shm_frame_size) == -1) { perror("ftruncate frame"); return -1; } void* shm_frames = mmap(nullptr, shm_frame_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_frames, 0); if (shm_frames == MAP_FAILED) { perror("mmap frame"); return -1; } size_t shm_tracks_size = sizeof(TracksHeader) + MAX_TRACKS*sizeof(TrackRec); int shm_fd_tracks = shm_open(SHM_TRACKS_NAME, O_CREAT|O_RDWR, 0666); if (shm_fd_tracks == -1) { perror("shm_open tracks"); return -1; } if (ftruncate(shm_fd_tracks, shm_tracks_size) == -1) { perror("ftruncate tracks"); return -1; } void* shm_tracks = mmap(nullptr, shm_tracks_size, PROT_READ|PROT_WRITE, MAP_SHARED, shm_fd_tracks, 0); if (shm_tracks == MAP_FAILED) { perror("mmap tracks"); return -1; } sem_t* sem_frame_ready = sem_open(SEM_FRAME_READY, O_CREAT, 0666, 0); sem_t* sem_tracks_ready = sem_open(SEM_TRACKS_READY, O_CREAT, 0666, 0); if (sem_frame_ready == SEM_FAILED || sem_tracks_ready == SEM_FAILED) { perror("sem_open"); return -1; } cv::VideoWriter writer; bool use_writer = false; try { writer.open("output_tracked.mp4", cv::VideoWriter::fourcc('a','v','c','1'), fps, cv::Size(width, height)); use_writer = writer.isOpened(); } catch (...) { std::cout << "⚠️ Video writer failed, continuing.\n"; } cv::Mat frame; int frame_id = 0; double total_det_ms = 0.0; auto t0_wall = std::chrono::high_resolution_clock::now(); auto* hdr = reinterpret_cast<FrameHeader*>(shm_frames); auto* det_arr = reinterpret_cast<SharedDet*>((char*)shm_frames + sizeof(FrameHeader)); auto* img_ptr = reinterpret_cast<uint8_t*>((char*)shm_frames + sizeof(FrameHeader) + MAX_DETS*sizeof(SharedDet)); const cv::Scalar trk_color(230,216,173); const cv::Scalar trk_text_color(0,0,0); size_t shm_total = shm_frame_size + shm_tracks_size; std::cout << "ℹ️ SHM sizes: frames=" << shm_frame_size << " bytes, tracks=" << shm_tracks_size << " bytes, total=" << shm_total << " bytes (~" << (shm_total/1024.0/1024.0) << " MB)\n"; int64 t7 = cv::getTickCount(); double time_main = (t7 - t6) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main before while start ----------" << std::endl; std::cout << "time : " << time_main << std::endl; std::cout << "---------- main before while end ----------" << std::endl; while (true) { int64 t8 = cv::getTickCount(); cap >> frame; if (frame.empty()) break; if (!frame.isContinuous()) frame = frame.clone(); double det_ms = 0.0; auto dets = detector.detect(frame, det_ms); total_det_ms += det_ms; // پر کردن هدر + دیتکشنها hdr->width = width; hdr->height = height; hdr->channels = 3; hdr->frame_id = frame_id; hdr->num_dets = std::min<int>((int)dets.size(), MAX_DETS); for (int i = 0; i < hdr->num_dets; ++i) { const auto& d = dets[i]; det_arr[i].x = (float)d.bbox.x; det_arr[i].y = (float)d.bbox.y; det_arr[i].w = (float)d.bbox.width; det_arr[i].h = (float)d.bbox.height; det_arr[i].conf = d.confidence; det_arr[i].class_id = d.class_id; } // کپیِ ناگزیر از بافر دیکودر به SHM (بینپردازهای صفر-کپی است) std::memcpy(img_ptr, frame.data, frame.total()*frame.elemSize()); // سیگنالدهی hdr->frame_ready = true; auto t_py_start = std::chrono::high_resolution_clock::now(); sem_post(sem_frame_ready); // منتظر پاسخ ترکر struct timespec timeout; clock_gettime(CLOCK_REALTIME, &timeout); timeout.tv_sec += 2; int sem_result = sem_timedwait(sem_tracks_ready, &timeout); double trk_ms = 0.0; std::vector<TrackRec> tracks; if (sem_result != 0) { std::cout << "⚠️ Timeout waiting for Python tracker\n"; } else { auto t_py_end = std::chrono::high_resolution_clock::now(); trk_ms = std::chrono::duration<double, std::milli>(t_py_end - t_py_start).count(); auto* th = reinterpret_cast<TracksHeader*>(shm_tracks); auto* tr = reinterpret_cast<TrackRec*>((char*)shm_tracks + sizeof(TracksHeader)); int ntracks = std::min<int>(th->num_tracks, MAX_TRACKS); tracks.assign(tr, tr + ntracks); } // ویژوالیزه (اختیاری) cv::Mat vis = frame.clone(); for (const auto& tk : tracks) { cv::Rect box((int)std::round(tk.x),(int)std::round(tk.y), (int)std::round(tk.w),(int)std::round(tk.h)); box &= cv::Rect(0,0,width,height); if (box.width<=0 || box.height<=0) continue; cv::rectangle(vis, box, trk_color, 2); char lab[128]; std::snprintf(lab, sizeof(lab), "ID %d cls:%d reid", tk.track_id, tk.class_id); int base=0; cv::Size ts = cv::getTextSize(lab, cv::FONT_HERSHEY_SIMPLEX, 0.6, 2, &base); int ly = std::max(ts.height + 6, box.y); cv::rectangle(vis, {box.x, ly - ts.height - 6}, {box.x + ts.width + 6, ly}, trk_color, cv::FILLED); cv::putText(vis, lab, {box.x+3, ly-3}, cv::FONT_HERSHEY_SIMPLEX, 0.6, trk_text_color, 2); } // خروجی نمونه char filename[256]; std::snprintf(filename, sizeof(filename), "frames/frame_%05d.jpg", frame_id); cv::imwrite(filename, vis); double total_ms = det_ms + trk_ms; std::cout << "CPP frame=" << std::setw(5) << frame_id << " det=" << std::fixed << std::setprecision(2) << det_ms << "ms" << " trk=" << trk_ms << "ms" << " total=" << total_ms << "ms" << " dets=" << dets.size() << " tracks=" << tracks.size() << "\n"; ++frame_id; int64 t9 = cv::getTickCount(); double time_main_while = (t9 - t8) * 1000.0 / cv::getTickFrequency(); std::cout << "---------- main while start ----------" << std::endl; std::cout << "time : " << time_main_while << std::endl; std::cout << "---------- main e while end ----------" << std::endl; } // پایان: سیگنال خاتمه auto* hdr_end = reinterpret_cast<FrameHeader*>(shm_frames); hdr_end->width=0; hdr_end->height=0; hdr_end->channels=0; hdr_end->frame_id=-1; hdr_end->num_dets=0; hdr_end->frame_ready = false; sem_post(sem_frame_ready); cap.release(); if (use_writer) writer.release(); cv::destroyAllWindows(); auto t1_wall = std::chrono::high_resolution_clock::now(); double wall_ms = std::chrono::duration<double, std::milli>(t1_wall - t0_wall).count(); double avg_det = total_det_ms / std::max(1, frame_id); std::cout << "✅ Done. avg_det=" << std::fixed << std::setprecision(2) << avg_det << "ms over " << frame_id << " frames, wall=" << wall_ms << "ms\n"; // cleanup munmap(shm_frames, shm_frame_size); munmap(shm_tracks, shm_tracks_size); close(shm_fd_frames); close(shm_fd_tracks); sem_close(sem_frame_ready); sem_close(sem_tracks_ready); // ⚠️ unlink را معمولاً بعد از اتمام Python بزنید؛ اینجا طبق الگوی فعلی شما نگهداشتیم. shm_unlink(SHM_FRAME_NAME); shm_unlink(SHM_TRACKS_NAME); sem_unlink(SEM_FRAME_READY); sem_unlink(SEM_TRACKS_READY); } catch (const std::exception& e) { std::cerr << "❌ Exception: " << e.what() << "\n"; return -1; } return 0; }
نکته: اگر روزی خواستی کپی داخل C++ را هم حذف کنی، باید دیکودر (FFmpeg/کدک GPU) بتواند مستقیم در بافر
img_ptrبنویسد (API با بافر کاربر).VideoCaptureاین قابلیت را ندارد؛ بنابراین فعلاً همین یک کپی ناگزیر باقی میماند، ولی مرز بین پردازهها کاملاً صفر-کپی است.
makefile# Makefile CXX := g++ CXXFLAGS := -O3 -std=c++17 -Wall -Wextra OPENCV_CFLAGS := $(shell pkg-config --cflags opencv4) OPENCV_LIBS := $(shell pkg-config --libs opencv4) # برای shm/semaphore روی لینوکس LIBS := -pthread -lrt TARGET := yolo_deepsort_ipc SRC := main_ipc_final.cpp all: $(TARGET) $(TARGET): $(SRC) $(CXX) $(CXXFLAGS) $(OPENCV_CFLAGS) -o $@ $^ $(OPENCV_LIBS) $(LIBS) clean: rm -f $(TARGET) .PHONY: all clean
bashsudo apt-get update sudo apt-get install -y build-essential pkg-config libopencv-dev python3-opencv python3-pip python3 -m pip install posix_ipc deep-sort-realtime torch torchvision --extra-index-url https://download.pytorch.org/whl/cu121
bashmkdir -p frames
bashmake # خروجی: ./yolo_deepsort_ipc
bashpython3 tracker_py_ipc.py
bash./yolo_deepsort_ipc /path/to/yolov8n.onnx /path/to/video.mp4 /path/to/coco.names
ترتیب اجرا مهم نیست؛ Python اگر SharedMemory را پیدا نکند، تا ۵ دقیقه منتظر میماند.
اگر خطای دسترسی به/dev/shmداشتی، باsudoاجرا نکن؛ زیرا نامهای POSIX با کاربر متفاوت میشوند.
bool).np.ndarray روی بافر SHM با buffer و offset (بدون slice/کپی):
pythonframe = np.ndarray((h,w,c), dtype=np.uint8, buffer=buf_f, offset=img_offset_base)
img_bytes = buf_f[...] و np.frombuffer(...) واسطی که احتمال کپی دهد نداریم.اگر خواستی همین رو به نسخهٔ GPU (cuda/cudacodec) یا ffmpeg تغییر بدی تا همان کپی داخل C++ هم حذف شود، خبر بده تا یک پیادهسازی Producer که مستقیم در SHM مینویسد برات آماده کنم.