我有一个做视觉和点云目标检测融合的代码,目前的流程是只做决策级的融合,我现在已经有的代码如下,但是她...
Tạo vào: 27 tháng 1, 2026
Trả lời bằng GPT-5.2 Pro bởi Chat01
Tạo vào: 27 tháng 1, 2026
Trả lời bằng GPT-5.2 Pro bởi Chat01
我有一个做视觉和点云目标检测融合的代码,目前的流程是只做决策级的融合,我现在已经有的代码如下,但是她目前还是有些问题,现在的右侧的还是不是主视图,还是看着很不方便,你想办法解决下,现在无论时点云还是检测框都看不清楚,同时融合后的检测框为什么和点云检测框一样呢,以这个为主就是这个检测框的大小也不变吗,你需要思考下这个问题,请给出修改后的完整的代码,记住一定要给出全部代码#!/usr/bin/env python3
"""
Fusion + Visualization + 3D Eval (NO intrusion logic)
你要的关键点:
import os
import sys
import glob
import math
import random
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import numpy as np
import cv2
import torch
import torch.nn as nn
IMG_DIR = "/media/jd/4997BB1603CFE2C4/lw/aug2/img"
PCDET_POINTS_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/points"
PCDET_GT_LABEL_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/labels"
ULTRALYTICS_REPO = "/media/jd/4997BB1603CFE2C4/lw/ultralytic"
YOLO_WEIGHTS = "/media/jd/4997BB1603CFE2C4/lw/ultralytic/runs1/1209yolo11_m+SM+d1/weights/best.pt"
OPENPCDET_REPO = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet"
PCDET_CFG = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/tools/cfgs/custom_models/train.yaml"
PCDET_CKPT = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/output/cfgs/custom_models/train/default/ckpt/checkpoint_epoch_80.pth"
DEBUG_DIR = "debug_vis"
CLASSES = ['Drone', 'Plastic_sheet', 'Kite', 'Balloon', 'Bird']
CALIB = {
"extrinsic_matrix": np.array([
[0.00871822977022152, -0.9990101808868351, -0.043619387365335945, -0.04000000000000002],
[-0.0003806461322609286, 0.043617726472722454, -0.9990482215818578, 0.25],
[0.9999619230641715, 0.008726535498373544, -1.3877787807814457e-17, -0.04000000000000002],
[0.0, 0.0, 0.0, 1.0]
], dtype=np.float64),
"fx": 3605.0801593073,
"fy": 3604.9573399128,
"cx": 951.9363889574,
"cy": 816.9773743048,
"width": 1920,
"height": 1536,
"dist": np.array([
2.4515361243, -46.8007861419,
-0.0002973913, -0.0008024507,
-144.3698857610, 2.6420544359,
-46.0443623397, -158.1742719597
], dtype=np.float64)
}
USE_DISTORTION = True
EXTRINSIC_MODE = "auto" # auto / lidar2cam / cam2lidar
COL_YOLO = (0, 255, 0)
COL_PCDET = (255, 0, 0)
COL_FUSED = (0, 0, 255)
YOLO_CONF_LOW = 0.05
PCDET_CONF_LOW = 0.01
YOLO_KEEP_TOPK = 1
PCDET_CONF_HIGH_UNMATCHED = 0.10
FUSED_KEEP_THRESH = 0.10
MATCH_IOU_THR = 0.05
MATCH_CENTER_DIST_THR_PX = 160.0
PROJ_BOX_EXPAND_RATIO = 0.18
COST_ALPHA = 0.7
OV_IOU_THR = 0.20
OV_CENTER_DIST_PX = 40.0
OV_CONTAINMENT_THR = 0.60 # inter / min(area)
OV_EDGE_GAP_PX = 6.0 # 两框边缘最近距离 <= 这个也算重叠/贴边
OV_MIN_SCORE_FOR_STYLE = 0.55 # overlap_score>=此值 -> 虚线+半透明
Q_IOU_REF = 0.30 # IoU 达到这个基本认为对齐不错
Q_DIST_REF_PX = 80.0 # 中心距离到这个基本认为对齐不错
Q_EDGE_REF_PX = 8.0 # 贴边距离参考
FUSED_Q_GATING_STRENGTH = 0.60 # 0.6 表示 fused_score 会被 (0.4+0.6*q) 调节
RANDOM_SEED = 42
NUM_DEBUG_FRAMES = 20
NUM_EVAL_FRAMES = 800 # 改 None=全量(慢),建议先用 300~1000 验证流程
EVAL_IOU_THRESHOLDS = [0.7]
EVAL_SCORE_THR_PCD = 0.10
EVAL_SCORE_THR_FUSED = 0.10
PROGRESS_EVERY = 20 # 每多少帧打印一次进度
BEV_X_RANGE = (0.0, 80.0)
BEV_Y_RANGE = (-30.0, 30.0)
BEV_RESOLUTION = 0.10
FRONT_RESOLUTION = 0.03
FRONT_MIN_WPX = 160
FRONT_MIN_HPX = 240
FRONT_CROP_EXPAND_XY = 1.0 # meters (左右/前后稍扩)
FRONT_CROP_EXPAND_Z = 0.8 # meters (上下稍扩)
PCDET_AP07 = {
"Drone": 91.6,
"Plastic_sheet": 55.52,
"Kite": 40.61,
"Balloon": 99.96,
"Bird": 73.37
}
@dataclass
class Det2D:
xyxy: List[float]
cls_name: str
score: float
@dataclass
class Det3D:
box7: np.ndarray
cls_name: str
score: float
proj_xyxy: Optional[List[float]] = None
def ensure_dir(p: str):
os.makedirs(p, exist_ok=True)
def canonical_class(name: str) -> str:
if name is None:
return name
n = name.strip()
n_low = n.lower().replace("-", "_")
mapping = {
"drone": "Drone",
"kite": "Kite",
"balloon": "Balloon",
"bird": "Bird",
"plastic_sheet": "Plastic_sheet",
"plastic": "Plastic_sheet",
"plasticsheet": "Plastic_sheet",
}
return mapping.get(n_low, n)
def find_image_for_frame(frame_id: str) -> Optional[str]:
for ext in [".jpg", ".png", ".jpeg", ".bmp"]:
p = os.path.join(IMG_DIR, frame_id + ext)
if os.path.exists(p):
return p
g = glob.glob(os.path.join(IMG_DIR, frame_id + ".*"))
return g[0] if g else None
def clip_box_xyxy(box, w, h):
x1, y1, x2, y2 = box
x1 = max(0, min(w - 1, x1))
y1 = max(0, min(h - 1, y1))
x2 = max(0, min(w - 1, x2))
y2 = max(0, min(h - 1, y2))
if x2 < x1:
x1, x2 = x2, x1
if y2 < y1:
y1, y2 = y2, y1
return [x1, y1, x2, y2]
def expand_box_xyxy(box, ratio=0.1):
x1, y1, x2, y2 = box
cx = (x1 + x2) / 2.0
cy = (y1 + y2) / 2.0
w = max(1.0, (x2 - x1))
h = max(1.0, (y2 - y1))
w2 = w * (1.0 + ratio)
h2 = h * (1.0 + ratio)
return [cx - w2/2, cy - h2/2, cx + w2/2, cy + h2/2]
def box_iou2d(a, b) -> float:
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
ix1 = max(ax1, bx1)
iy1 = max(ay1, by1)
ix2 = min(ax2, bx2)
iy2 = min(ay2, by2)
iw = max(0.0, ix2 - ix1)
ih = max(0.0, iy2 - iy1)
inter = iw * ih
area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1)
area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1)
return float(inter / (area_a + area_b - inter + 1e-9))
def box_intersection_area(a, b) -> float:
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
ix1 = max(ax1, bx1)
iy1 = max(ay1, by1)
ix2 = min(ax2, bx2)
iy2 = min(ay2, by2)
iw = max(0.0, ix2 - ix1)
ih = max(0.0, iy2 - iy1)
return float(iw * ih)
def box_area(a) -> float:
return float(max(0.0, a[2]-a[0]) * max(0.0, a[3]-a[1]))
def center_of_box(box):
x1, y1, x2, y2 = box
return (0.5*(x1+x2), 0.5*(y1+y2))
def center_dist_px(a, b) -> float:
ac = center_of_box(a)
bc = center_of_box(b)
return float(math.hypot(ac[0]-bc[0], ac[1]-bc[1]))
def point_in_box(pt, box):
x, y = pt
x1, y1, x2, y2 = box
return (x >= x1) and (x <= x2) and (y >= y1) and (y <= y2)
def rect_edge_gap(a, b) -> float:
"""
两个轴对齐矩形的边缘最近距离(0表示相交/重叠)
"""
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
# 水平方向 gap
if ax2 < bx1:
gx = bx1 - ax2
elif bx2 < ax1:
gx = ax1 - bx2
else:
gx = 0.0
# 垂直方向 gap
if ay2 < by1:
gy = by1 - ay2
elif by2 < ay1:
gy = ay1 - by2
else:
gy = 0.0
return float(math.hypot(gx, gy))
def overlap_and_score_2d(a, b, diag: float) -> Tuple[bool, float, Dict]:
"""
返回:
is_overlap (bool)
overlap_score (0~1) -> 用于可视化虚线/半透明 & 也可用于融合质量
info dict (iou, dist, containment, gap, center_in)
"""
iou = box_iou2d(a, b)
dist = center_dist_px(a, b)
gap = rect_edge_gap(a, b)
textinter = box_intersection_area(a, b) area_a = box_area(a) area_b = box_area(b) min_area = max(1e-9, min(area_a, area_b)) containment = float(np.clip(inter / min_area, 0.0, 1.0)) ac = center_of_box(a) bc = center_of_box(b) center_in = (point_in_box(ac, b) or point_in_box(bc, a)) # ---- overlap 判定(更稳)---- # 1) 真实重叠/包含 cond_iou = (iou >= OV_IOU_THR) cond_contain = (containment >= OV_CONTAINMENT_THR) cond_center_in = center_in # 2) 贴边/轻微错开:IoU≈0 但边缘距离很小 cond_edge = (gap <= OV_EDGE_GAP_PX) # 3) 中心距离足够小(小目标在远处时尤其有用) cond_dist = (dist <= OV_CENTER_DIST_PX) is_overlap = cond_iou or cond_contain or cond_center_in or (cond_edge and cond_dist) # ---- overlap_score(0~1)---- # 把多个因素组合成一个连续分数,用于: # - 可视化:>=阈值就画虚线/半透明 # - 融合:作为 match quality q 的基础(但融合里我们会再算一遍 q,更细) iou_term = min(iou / Q_IOU_REF, 1.0) dist_term = max(0.0, 1.0 - dist / max(1e-6, Q_DIST_REF_PX)) gap_term = max(0.0, 1.0 - gap / max(1e-6, Q_EDGE_REF_PX)) contain_term = containment center_bonus = 0.15 if center_in else 0.0 score = 0.45 * iou_term + 0.25 * dist_term + 0.20 * contain_term + 0.10 * gap_term + center_bonus score = float(np.clip(score, 0.0, 1.0)) info = {"iou": iou, "dist": dist, "gap": gap, "containment": containment, "center_in": center_in} return is_overlap, score, info
def match_quality_2d(yolo_box, proj_box, img_w, img_h) -> Tuple[float, Dict]:
"""
q: 0~1,越大表示 YOLO 与 PCDet(投影)对齐越好
"""
diag = math.hypot(img_w, img_h) + 1e-9
_, score, info = overlap_and_score_2d(yolo_box, proj_box, diag)
text# 额外做一个“匹配质量”更保守的压缩:避免 score 过高 # 让中等匹配也能起作用,但不会把“很弱的匹配”当成强匹配 q = float(np.clip(0.15 + 0.85 * score, 0.0, 1.0)) return q, info
def draw_text_box(img, x, y, text, color, font_scale=0.6, thickness=2):
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
x = int(np.clip(x, 0, img.shape[1]-1))
y = int(np.clip(y, 0, img.shape[0]-1))
x2 = int(np.clip(x + tw + 6, 0, img.shape[1]-1))
y2 = int(np.clip(y + th + 6, 0, img.shape[0]-1))
cv2.rectangle(img, (x, y), (x2, y2), (0, 0, 0), -1)
cv2.putText(img, text, (x+3, y+th+3), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness, cv2.LINE_AA)
def draw_transparent_rect(img, box, color, alpha=0.22):
x1, y1, x2, y2 = [int(round(v)) for v in box]
overlay = img.copy()
cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
cv2.addWeighted(overlay, alpha, img, 1.0 - alpha, 0, img)
def draw_dashed_line(img, p1, p2, color, thickness=2, dash_len=14, gap_len=8):
x1, y1 = p1
x2, y2 = p2
dx = x2 - x1
dy = y2 - y1
length = math.hypot(dx, dy)
if length < 1e-6:
return
vx = dx / length
vy = dy / length
dist = 0.0
while dist < length:
seg_start = dist
seg_end = min(dist + dash_len, length)
sx = int(round(x1 + vx * seg_start))
sy = int(round(y1 + vy * seg_start))
ex = int(round(x1 + vx * seg_end))
ey = int(round(y1 + vy * seg_end))
cv2.line(img, (sx, sy), (ex, ey), color, thickness, cv2.LINE_AA)
dist += dash_len + gap_len
def draw_dashed_rect(img, box, color, thickness=2):
x1, y1, x2, y2 = [int(round(v)) for v in box]
draw_dashed_line(img, (x1, y1), (x2, y1), color, thickness)
draw_dashed_line(img, (x2, y1), (x2, y2), color, thickness)
draw_dashed_line(img, (x2, y2), (x1, y2), color, thickness)
draw_dashed_line(img, (x1, y2), (x1, y1), color, thickness)
def draw_box_smart(img, box, color, overlap_score: float, solid_thickness=2):
"""
overlap_score>=OV_MIN_SCORE_FOR_STYLE -> 半透明+虚线
"""
if overlap_score >= OV_MIN_SCORE_FOR_STYLE:
draw_transparent_rect(img, box, color, alpha=0.22)
draw_dashed_rect(img, box, color, thickness=max(2, solid_thickness))
else:
x1, y1, x2, y2 = [int(round(v)) for v in box]
cv2.rectangle(img, (x1, y1), (x2, y2), color, solid_thickness)
def draw_legend(img, x=12, y=12):
draw_text_box(img, x, y, "Legend:", (255,255,255), 0.65, 2)
y += 30
cv2.rectangle(img, (x, y+6), (x+18, y+24), COL_YOLO, -1)
draw_text_box(img, x+26, y, "YOLO (2D)", (255,255,255), 0.6, 2)
y += 28
cv2.rectangle(img, (x, y+6), (x+18, y+24), COL_PCDET, -1)
draw_text_box(img, x+26, y, "PCDet-proj (3D->2D)", (255,255,255), 0.6, 2)
y += 28
cv2.rectangle(img, (x, y+6), (x+18, y+24), COL_FUSED, -1)
draw_text_box(img, x+26, y, "FUSED", (255,255,255), 0.6, 2)
y += 30
draw_text_box(img, x, y, "Overlap: IoU/dist/contain/edge/center-in", (255,255,255), 0.50, 2)
def get_sensor_weights(cls_name: str) -> Tuple[float, float]:
c = canonical_class(cls_name)
ap = PCDET_AP07.get(c, 70.0) / 100.0
w_lidar = float(np.clip(ap, 0.15, 0.95))
w_img = float(np.clip(1.0 - w_lidar, 0.05, 0.85))
s = w_img + w_lidar
return w_img / s, w_lidar / s
def ds_fuse_singleton(cls_a: str, s_a: float, w_a: float,
cls_b: str, s_b: float, w_b: float) -> Tuple[str, float]:
cls_a = canonical_class(cls_a)
cls_b = canonical_class(cls_b)
textm_a = float(np.clip(w_a * s_a, 0.0, 0.999999)) m_b = float(np.clip(w_b * s_b, 0.0, 0.999999)) th_a = 1.0 - m_a th_b = 1.0 - m_b K = (m_a * m_b) if (cls_a != cls_b) else 0.0 denom = 1.0 - K + 1e-9 if cls_a == cls_b: m = (m_a*m_b + m_a*th_b + th_a*m_b) / denom return cls_a, float(m) else: ma = (m_a * th_b) / denom mb = (th_a * m_b) / denom return (cls_a, float(ma)) if ma >= mb else (cls_b, float(mb))
def fuse_with_quality(ydet: Det2D, pdet: Det3D, q: float) -> Tuple[str, float]:
"""
你强调“重叠度/中心距离不同对融合置信度影响不应不变”:
- 这里 q(0~1) 是匹配质量:越对齐,越相信两者在描述同一物体
- 让 effective score 随 q 变化:s_eff = s * (0.35 + 0.65q)
这样 q 很低时,score 会明显衰减;q 很高时几乎不衰减
- 最后 fused_score 再 gated 一次:(1 - g) + gq
"""
w_img, w_lidar = get_sensor_weights(pdet.cls_name)
textgate = (0.35 + 0.65 * q) s_img = float(np.clip(ydet.score * gate, 0.0, 1.0)) s_lid = float(np.clip(pdet.score * gate, 0.0, 1.0)) fused_cls, fused_score = ds_fuse_singleton( ydet.cls_name, s_img, w_img, pdet.cls_name, s_lid, w_lidar ) fused_score = float(np.clip(fused_score * ((1.0 - FUSED_Q_GATING_STRENGTH) + FUSED_Q_GATING_STRENGTH * q), 0.0, 1.0)) return fused_cls, fused_score
def boxes3d_to_corners(boxes7: np.ndarray) -> np.ndarray:
N = boxes7.shape[0]
corners = np.zeros((N, 8, 3), dtype=np.float32)
for i in range(N):
x, y, z, dx, dy, dz, yaw = boxes7[i].tolist()
local = np.array([
[ dx/2, dy/2, dz/2],
[ dx/2, -dy/2, dz/2],
[-dx/2, -dy/2, dz/2],
[-dx/2, dy/2, dz/2],
[ dx/2, dy/2, -dz/2],
[ dx/2, -dy/2, -dz/2],
[-dx/2, -dy/2, -dz/2],
[-dx/2, dy/2, -dz/2],
], dtype=np.float32)
cy = math.cos(yaw); sy = math.sin(yaw)
R = np.array([[cy, -sy, 0.0],
[sy, cy, 0.0],
[0.0, 0.0, 1.0]], dtype=np.float32)
corners[i] = (local @ R.T) + np.array([x, y, z], dtype=np.float32)
return corners
def project_points_lidar_to_img(pts_lidar: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray,
img_w: int, img_h: int, use_distortion=True):
fx, fy, cx, cy = calib["fx"], calib["fy"], calib["cx"], calib["cy"]
K = np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]], dtype=np.float64)
dist = calib["dist"].astype(np.float64) if use_distortion else None
textpts_h = np.concatenate([pts_lidar.astype(np.float64), np.ones((pts_lidar.shape[0], 1), dtype=np.float64)], axis=1) pts_cam = (T_lidar2cam @ pts_h.T).T[:, :3] valid = pts_cam[:, 2] > 1e-6 rvec = np.zeros((3, 1), dtype=np.float64) tvec = np.zeros((3, 1), dtype=np.float64) img_pts, _ = cv2.projectPoints(pts_cam, rvec, tvec, K, dist) return img_pts.reshape(-1, 2).astype(np.float32), valid
def get_extrinsic_matrix(calib: Dict, pts_lidar_xyz: np.ndarray, img_w: int, img_h: int) -> np.ndarray:
T = calib["extrinsic_matrix"].copy()
if EXTRINSIC_MODE == "lidar2cam":
return T
if EXTRINSIC_MODE == "cam2lidar":
return np.linalg.inv(T)
textif pts_lidar_xyz is None or pts_lidar_xyz.shape[0] < 100: return T pts = pts_lidar_xyz if pts.shape[0] > 8000: pts = pts[np.random.choice(pts.shape[0], 8000, replace=False)] def score_for(Tuse): img_pts, valid = project_points_lidar_to_img(pts, calib, Tuse, img_w, img_h, use_distortion=USE_DISTORTION) img_pts = img_pts[valid] if img_pts.shape[0] == 0: return 0.0 inside = (img_pts[:, 0] >= 0) & (img_pts[:, 0] < img_w) & (img_pts[:, 1] >= 0) & (img_pts[:, 1] < img_h) return float(inside.mean()) s1 = score_for(T) s2 = score_for(np.linalg.inv(T)) chosen = T if s1 >= s2 else np.linalg.inv(T) print(f"[CALIB] auto | lidar2cam_inlier={s1:.3f} cam2lidar_inlier={s2:.3f} -> choose {'lidar2cam' if s1>=s2 else 'cam2lidar'}") return chosen
def project_boxes3d_to_2d(boxes7: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray,
img_w: int, img_h: int, use_distortion=True):
if boxes7.shape[0] == 0:
return []
corners = boxes3d_to_corners(boxes7)
out = []
for i in range(corners.shape[0]):
img_pts, valid = project_points_lidar_to_img(corners[i], calib, T_lidar2cam, img_w, img_h, use_distortion)
if valid.sum() < 4:
out.append(None)
continue
xs = img_pts[valid, 0]; ys = img_pts[valid, 1]
box = clip_box_xyxy([float(xs.min()), float(ys.min()), float(xs.max()), float(ys.max())], img_w, img_h)
if (box[2]-box[0]) < 2 or (box[3]-box[1]) < 2:
out.append(None)
else:
out.append(box)
return out
def hungarian_match(cost: np.ndarray) -> List[Tuple[int, int]]:
try:
from scipy.optimize import linear_sum_assignment
r, c = linear_sum_assignment(cost)
return list(zip(r.tolist(), c.tolist()))
except Exception:
matches = []
used_r, used_c = set(), set()
idxs = np.dstack(np.unravel_index(np.argsort(cost.ravel()), cost.shape))[0]
for i, j in idxs:
if i in used_r or j in used_c:
continue
used_r.add(int(i)); used_c.add(int(j))
matches.append((int(i), int(j)))
return matches
def associate_yolo_pcdet(yolo: List[Det2D], pcdet: List[Det3D], img_w: int, img_h: int):
N, M = len(yolo), len(pcdet)
if N == 0 or M == 0:
return {}, {}
diag = math.hypot(img_w, img_h) + 1e-9
cost = np.ones((N, M), dtype=np.float32) * 10.0
textfor i in range(N): a = yolo[i].xyxy a_c = center_of_box(a) for j in range(M): b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) b_c = center_of_box(b) iou = box_iou2d(a, b) cd = math.hypot(a_c[0]-b_c[0], a_c[1]-b_c[1]) / diag inside = point_in_box(a_c, b) or point_in_box(b_c, a) bonus = -0.15 if inside else 0.0 cost[i, j] = COST_ALPHA*(1.0 - iou) + (1.0 - COST_ALPHA)*cd + bonus pairs = hungarian_match(cost) y2p, p2y = {}, {} for i, j in pairs: b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) iou = box_iou2d(yolo[i].xyxy, b) cd = center_dist_px(yolo[i].xyxy, b) y_c = center_of_box(yolo[i].xyxy) b_c = center_of_box(b) inside = point_in_box(y_c, b) or point_in_box(b_c, yolo[i].xyxy) if (iou >= MATCH_IOU_THR) or (cd <= MATCH_CENTER_DIST_THR_PX) or inside: y2p[i] = j p2y[j] = i return y2p, p2y
def points_in_obb(points_xyz: np.ndarray, box7: np.ndarray,
expand_xy: float, expand_z: float) -> np.ndarray:
"""
points_xyz: (N,3)
box7: x,y,z,dx,dy,dz,yaw (yaw about z)
返回 mask
"""
cx, cy, cz, dx, dy, dz, yaw = box7.tolist()
px = points_xyz[:, 0] - cx
py = points_xyz[:, 1] - cy
pz = points_xyz[:, 2] - cz
textc = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz hx = dx / 2.0 + expand_xy hy = dy / 2.0 + expand_xy hz = dz / 2.0 + expand_z mask = (np.abs(lx) <= hx) & (np.abs(ly) <= hy) & (np.abs(lz) <= hz) return mask
def make_front_xz_crop(points: Optional[np.ndarray], det_ref: Optional[Det3D],
pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray:
"""
右侧正视图:只显示 det_ref 盒子附近(盒内+扩张)的点云
X 横轴, Z 纵轴
"""
canvas = np.zeros((FRONT_MIN_HPX, FRONT_MIN_WPX, 3), dtype=np.uint8)
draw_text_box(canvas, 6, 10, "FRONT (X-Z) CROP", (220,220,220), 0.6, 2)
textif det_ref is None or points is None or points.shape[0] == 0: return canvas pts_xyz = points[:, :3] mask = points_in_obb(pts_xyz, det_ref.box7, FRONT_CROP_EXPAND_XY, FRONT_CROP_EXPAND_Z) crop = pts_xyz[mask] if crop.shape[0] == 0: draw_text_box(canvas, 6, 40, "no points in crop", (200,200,200), 0.55, 2) return canvas # 视图范围:用 det_ref 的 3D bbox 的 x/z extent + expand x, y, z, dx, dy, dz, yaw = det_ref.box7.tolist() x_min = (x - dx/2 - FRONT_CROP_EXPAND_XY) x_max = (x + dx/2 + FRONT_CROP_EXPAND_XY) z_min = (z - dz/2 - FRONT_CROP_EXPAND_Z) z_max = (z + dz/2 + FRONT_CROP_EXPAND_Z) # 分辨率 -> 画布大小 res = FRONT_RESOLUTION W = max(FRONT_MIN_WPX, int((x_max - x_min) / res)) H = max(FRONT_MIN_HPX, int((z_max - z_min) / res)) front = np.zeros((H, W, 3), dtype=np.uint8) # 画点 cc = ((crop[:, 0] - x_min) / res).astype(np.int32) rr = ((z_max - crop[:, 2]) / res).astype(np.int32) cc = np.clip(cc, 0, W - 1) rr = np.clip(rr, 0, H - 1) front[rr, cc] = (80, 80, 80) def draw_det_xz(det: Det3D, col): corners = boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] xs = corners[:, 0] zs = corners[:, 2] x1, x2 = float(xs.min()), float(xs.max()) z1, z2 = float(zs.min()), float(zs.max()) x1p = int((x1 - x_min) / res) x2p = int((x2 - x_min) / res) y1p = int((z_max - z2) / res) y2p = int((z_max - z1) / res) x1p = int(np.clip(x1p, 0, W-1)) x2p = int(np.clip(x2p, 0, W-1)) y1p = int(np.clip(y1p, 0, H-1)) y2p = int(np.clip(y2p, 0, H-1)) cv2.rectangle(front, (x1p, y1p), (x2p, y2p), col, 1) draw_text_box(front, max(0, x1p), max(0, y1p-18), f"{det.cls_name}:{det.score:.2f}", col, 0.55, 1) if pcdet_top is not None: draw_det_xz(pcdet_top, COL_PCDET) if fused_top is not None: draw_det_xz(fused_top, COL_FUSED) draw_text_box(front, 6, 10, "FRONT (X-Z) CROP", (220,220,220), 0.6, 2) return front
def make_bev_image(points: Optional[np.ndarray], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray:
x_min, x_max = BEV_X_RANGE
y_min, y_max = BEV_Y_RANGE
res = BEV_RESOLUTION
W = int((y_max - y_min) / res)
H = int((x_max - x_min) / res)
bev = np.zeros((H, W, 3), dtype=np.uint8)
textif points is not None and points.shape[0] > 0: mask = (points[:, 0] >= x_min) & (points[:, 0] <= x_max) & (points[:, 1] >= y_min) & (points[:, 1] <= y_max) pts = points[mask] if pts.shape[0] > 0: rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) bev[rr, cc] = (70, 70, 70) def draw_one(det: Det3D, col_bgr): x, y, z, dx, dy, dz, yaw = det.box7.tolist() cy = math.cos(yaw); sy = math.sin(yaw) local = np.array([[ dx/2, dy/2], [ dx/2, -dy/2], [-dx/2, -dy/2], [-dx/2, dy/2]], dtype=np.float32) R = np.array([[cy, -sy], [sy, cy]], dtype=np.float32) corners_xy = (local @ R.T) + np.array([x, y], dtype=np.float32) rr = ((x_max - corners_xy[:, 0]) / res).astype(np.int32) cc = ((corners_xy[:, 1] - y_min) / res).astype(np.int32) poly = np.stack([cc, rr], axis=1).reshape(-1, 1, 2) cv2.polylines(bev, [poly], True, col_bgr, 1) rc = int((x_max - x) / res) cc0 = int((y - y_min) / res) draw_text_box(bev, max(0, cc0+4), max(0, rc-18), f"{det.cls_name}:{det.score:.2f}", col_bgr, 0.5, 1) if pcdet_top is not None: draw_one(pcdet_top, COL_PCDET) if fused_top is not None: draw_one(fused_top, COL_FUSED) draw_text_box(bev, 10, 10, "BEV", (220,220,220), 0.7, 2) return bev
def top1_yolo(dets: List[Det2D]) -> Optional[Det2D]:
return max(dets, key=lambda d: d.score) if dets else None
def top1_3d(dets: List[Det3D]) -> Optional[Det3D]:
return max(dets, key=lambda d: d.score) if dets else None
def make_debug_vis(frame_id: str, img_bgr: np.ndarray,
yolo_top: Optional[Det2D],
pcdet_top: Optional[Det3D],
fused_top: Optional[Det3D],
raw_points: Optional[np.ndarray],
out_path: str):
vis = img_bgr.copy()
H, W = vis.shape[:2]
draw_legend(vis, 12, 12)
draw_text_box(vis, 12, H-40, f"frame:{frame_id}", (0, 220, 220), 0.75, 2)
textbox_map: Dict[str, Tuple[List[float], str]] = {} if yolo_top is not None: b = clip_box_xyxy(yolo_top.xyxy, W, H) box_map["YOLO"] = (b, f"{yolo_top.cls_name}:{yolo_top.score:.2f}") if pcdet_top is not None and pcdet_top.proj_xyxy is not None: b = clip_box_xyxy(pcdet_top.proj_xyxy, W, H) box_map["PCDet"] = (b, f"{pcdet_top.cls_name}:{pcdet_top.score:.2f}") if fused_top is not None and fused_top.proj_xyxy is not None: b = clip_box_xyxy(fused_top.proj_xyxy, W, H) box_map["FUSED"] = (b, f"{fused_top.cls_name}:{fused_top.score:.2f}") # compute pairwise overlap scores to decide style diag = math.hypot(W, H) + 1e-9 overlap_score = {k: 0.0 for k in box_map.keys()} notes = [] keys = list(box_map.keys()) for i in range(len(keys)): for j in range(i+1, len(keys)): ki, kj = keys[i], keys[j] bi, ti = box_map[ki] bj, tj = box_map[kj] is_ov, sc, info = overlap_and_score_2d(bi, bj, diag) if is_ov: overlap_score[ki] = max(overlap_score[ki], sc) overlap_score[kj] = max(overlap_score[kj], sc) notes.append(f"{ki} vs {kj}: score={sc:.2f} iou={info['iou']:.2f} d={info['dist']:.1f}px gap={info['gap']:.1f}px") # draw boxes if "YOLO" in box_map: b, _ = box_map["YOLO"] draw_box_smart(vis, b, COL_YOLO, overlap_score.get("YOLO", 0.0), 2) draw_text_box(vis, int(b[0]), max(0, int(b[1]) - 22), f"YOLO {yolo_top.cls_name}:{yolo_top.score:.2f}", COL_YOLO, 0.6, 2) if "PCDet" in box_map: b, _ = box_map["PCDet"] draw_box_smart(vis, b, COL_PCDET, overlap_score.get("PCDet", 0.0), 2) draw_text_box(vis, int(b[0]), min(H-28, int(b[3]) + 6), f"PCDet {pcdet_top.cls_name}:{pcdet_top.score:.2f}", COL_PCDET, 0.6, 2) if "FUSED" in box_map: b, _ = box_map["FUSED"] draw_box_smart(vis, b, COL_FUSED, overlap_score.get("FUSED", 0.0), 3) txt = f"FUSED {fused_top.cls_name}:{fused_top.score:.2f}" (tw, _), _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) draw_text_box(vis, max(0, int(b[2]) - tw - 8), max(0, int(b[1]) - 22), txt, COL_FUSED, 0.6, 2) # overlap notes if notes: y0 = 150 draw_text_box(vis, 12, y0, "Overlap notes:", (255,255,255), 0.58, 2) y0 += 26 for k, line in enumerate(notes[:6]): draw_text_box(vis, 12, y0 + 22*k, line, (255,255,255), 0.50, 1) # panels det_ref = fused_top if fused_top is not None else pcdet_top bev = make_bev_image(raw_points, pcdet_top, fused_top) front = make_front_xz_crop(raw_points, det_ref, pcdet_top, fused_top) total_w = int(W / 0.50) # 2W bev_w = int(total_w * 0.40) front_w = total_w - W - bev_w # 0.10 total bev_r = cv2.resize(bev, (bev_w, H), interpolation=cv2.INTER_NEAREST) front_r = cv2.resize(front, (front_w, H), interpolation=cv2.INTER_NEAREST) canvas = cv2.hconcat([vis, bev_r, front_r]) cv2.imwrite(out_path, canvas)
def iou3d_matrix_cpu(boxes_a: np.ndarray, boxes_b: np.ndarray) -> np.ndarray:
N, M = boxes_a.shape[0], boxes_b.shape[0]
if N == 0 or M == 0:
return np.zeros((N, M), dtype=np.float32)
textfrom pcdet.ops.iou3d_nms import iou3d_nms_utils ta = torch.from_numpy(boxes_a).float() tb = torch.from_numpy(boxes_b).float() bev_iou = iou3d_nms_utils.boxes_bev_iou_cpu(ta, tb).cpu().numpy().astype(np.float32) area_a = (boxes_a[:, 3] * boxes_a[:, 4]).reshape(N, 1) area_b = (boxes_b[:, 3] * boxes_b[:, 4]).reshape(1, M) inter_area = np.where( bev_iou > 0, bev_iou * (area_a + area_b) / (1.0 + bev_iou + 1e-9), 0.0 ).astype(np.float32) zmax_a = boxes_a[:, 2] + boxes_a[:, 5] / 2.0 zmin_a = boxes_a[:, 2] - boxes_a[:, 5] / 2.0 zmax_b = boxes_b[:, 2] + boxes_b[:, 5] / 2.0 zmin_b = boxes_b[:, 2] - boxes_b[:, 5] / 2.0 overlap_h = np.maximum( 0.0, np.minimum(zmax_a.reshape(N, 1), zmax_b.reshape(1, M)) - np.maximum(zmin_a.reshape(N, 1), zmin_b.reshape(1, M)) ).astype(np.float32) inter_vol = inter_area * overlap_h vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).reshape(N, 1) vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).reshape(1, M) union_vol = vol_a + vol_b - inter_vol + 1e-9 return (inter_vol / union_vol).astype(np.float32)
def compute_ap_from_pr(rec: np.ndarray, prec: np.ndarray) -> float:
if rec.size == 0:
return 0.0
mrec = np.concatenate(([0.0], rec, [1.0]))
mpre = np.concatenate(([0.0], prec, [0.0]))
for i in range(mpre.size - 1, 0, -1):
mpre[i-1] = max(mpre[i-1], mpre[i])
idx = np.where(mrec[1:] != mrec[:-1])[0]
ap = np.sum((mrec[idx+1] - mrec[idx]) * mpre[idx+1])
return float(ap)
def eval_3d_map_mar(
all_dets: Dict[str, List[Tuple[str, np.ndarray, float]]],
all_gts: Dict[str, Dict[str, List[np.ndarray]]],
class_names: List[str],
iou_thr: float
) -> Dict:
results = {"iou_thr": iou_thr, "per_class": {}, "mAP": 0.0, "mAR": 0.0}
aps, ars = [], []
textfor cls in class_names: dets = all_dets.get(cls, []) gts_by_frame = all_gts.get(cls, {}) npos = sum(len(v) for v in gts_by_frame.values()) if npos == 0: results["per_class"][cls] = {"AP": None, "AR": None, "nGT": 0, "nDet": len(dets)} continue dets_sorted = sorted(dets, key=lambda x: x[2], reverse=True) tp = np.zeros((len(dets_sorted),), dtype=np.float32) fp = np.zeros((len(dets_sorted),), dtype=np.float32) matched = {fid: np.zeros((len(gts_by_frame[fid]),), dtype=bool) for fid in gts_by_frame.keys()} for i, (fid, box, score) in enumerate(dets_sorted): gt_list = gts_by_frame.get(fid, []) if len(gt_list) == 0: fp[i] = 1.0 continue gt_boxes = np.stack(gt_list, axis=0).astype(np.float32) det_box = box.reshape(1, 7).astype(np.float32) ious = iou3d_matrix_cpu(det_box, gt_boxes).reshape(-1) jmax = int(np.argmax(ious)) if ious[jmax] >= iou_thr and (not matched[fid][jmax]): tp[i] = 1.0 matched[fid][jmax] = True else: fp[i] = 1.0 tp_cum = np.cumsum(tp) fp_cum = np.cumsum(fp) rec = tp_cum / float(npos) prec = tp_cum / np.maximum(tp_cum + fp_cum, 1e-9) ap = compute_ap_from_pr(rec, prec) ar = float(rec[-1]) if rec.size > 0 else 0.0 results["per_class"][cls] = {"AP": ap, "AR": ar, "nGT": int(npos), "nDet": int(len(dets_sorted))} aps.append(ap); ars.append(ar) results["mAP"] = float(np.mean(aps)) if len(aps) > 0 else 0.0 results["mAR"] = float(np.mean(ars)) if len(ars) > 0 else 0.0 return results
def parse_pcdet_gt_label(txt_path: str) -> List[Det3D]:
dets = []
if not os.path.exists(txt_path):
return dets
with open(txt_path, "r", encoding="utf-8") as f:
lines = [ln.strip() for ln in f.readlines() if ln.strip()]
for ln in lines:
parts = ln.split()
if len(parts) < 8:
continue
nums = list(map(float, parts[:7]))
cls_name = canonical_class(parts[7])
dets.append(Det3D(box7=np.array(nums, dtype=np.float32), cls_name=cls_name, score=1.0))
return dets
def _extract_model_state(ckpt_obj):
if isinstance(ckpt_obj, dict):
if "model_state" in ckpt_obj:
return ckpt_obj["model_state"]
if "state_dict" in ckpt_obj:
return ckpt_obj["state_dict"]
return ckpt_obj
def _override_pcdet_score_thresh(cfg, thr: float = 0.0):
try:
if hasattr(cfg.MODEL, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.POST_PROCESSING:
cfg.MODEL.POST_PROCESSING.SCORE_THRESH = float(thr)
except Exception:
pass
try:
if hasattr(cfg.MODEL, "DENSE_HEAD") and hasattr(cfg.MODEL.DENSE_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.DENSE_HEAD.POST_PROCESSING:
cfg.MODEL.DENSE_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr)
except Exception:
pass
try:
if hasattr(cfg.MODEL, "ROI_HEAD") and hasattr(cfg.MODEL.ROI_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.ROI_HEAD.POST_PROCESSING:
cfg.MODEL.ROI_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr)
except Exception:
pass
def _ensure_map_to_bev_proj_bn_registered(model, state_dict, device):
proj_w_key = "map_to_bev_module.proj.weight"
bn_w_key = "map_to_bev_module.bn.weight"
if proj_w_key not in state_dict or bn_w_key not in state_dict:
return
mtb = getattr(model, "map_to_bev_module", None)
if mtb is None:
return
if isinstance(getattr(mtb, "proj", None), nn.Module) and isinstance(getattr(mtb, "bn", None), nn.Module):
return
textw = state_dict[proj_w_key] out_ch, in_ch, kH, kW = w.shape use_bias = ("map_to_bev_module.proj.bias" in state_dict) padding = (kH // 2, kW // 2) if (kH > 1 or kW > 1) else (0, 0) mtb.proj = nn.Conv2d(in_ch, out_ch, kernel_size=(kH, kW), stride=1, padding=padding, bias=use_bias).to(device) mtb.bn = nn.BatchNorm2d(out_ch, eps=1e-3, momentum=0.01).to(device) print(f"[PCDet][COMPAT] Registered map_to_bev_module.proj/bn: Conv2d({in_ch}->{out_ch}, k={kH}x{kW})")
def load_yolo_model(weights_path: str):
try:
from ultralytics import YOLO
except Exception:
sys.path.insert(0, ULTRALYTICS_REPO)
from ultralytics import YOLO
return YOLO(weights_path)
def load_pcdet_model(cfg_path: str, ckpt_path: str, device: torch.device):
sys.path.insert(0, OPENPCDET_REPO)
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.datasets import DatasetTemplate
from pcdet.models import build_network, load_data_to_gpu
from pcdet.utils import common_utils
textclass DemoDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, root_path, ext=".bin", logger=None): super().__init__(dataset_cfg=dataset_cfg, class_names=class_names, training=False, root_path=root_path, logger=logger) self.root_path = Path(root_path) self.ext = ext self.points_dir = self.root_path / "points" self.sample_file_list = sorted(glob.glob(str(self.points_dir / f"*{self.ext}"))) def __len__(self): return len(self.sample_file_list) def __getitem__(self, index): p = Path(self.sample_file_list[index]) points = np.fromfile(str(p), dtype=np.float32).reshape(-1, 4) input_dict = {"points": points, "frame_id": p.stem} data_dict = self.prepare_data(data_dict=input_dict) return data_dict logger = common_utils.create_logger() cfg_from_yaml_file(cfg_path, cfg) _override_pcdet_score_thresh(cfg, thr=0.0) dataset_root = str(Path(PCDET_POINTS_DIR).parent) dataset = DemoDataset(cfg.DATA_CONFIG, cfg.CLASS_NAMES, dataset_root, ext=".bin", logger=logger) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=dataset) model.to(device).eval() ckpt_obj = torch.load(ckpt_path, map_location="cpu") state = _extract_model_state(ckpt_obj) _ensure_map_to_bev_proj_bn_registered(model, state, device) ret = model.load_state_dict(state, strict=False) print(f"[PCDet] load_state_dict done. missing={len(ret.missing_keys)} unexpected={len(ret.unexpected_keys)}") return cfg, dataset, model, load_data_to_gpu
@torch.no_grad()
def infer_yolo(yolo_model, img_bgr: np.ndarray) -> List[Det2D]:
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
results = yolo_model(img_rgb, conf=YOLO_CONF_LOW, iou=0.7, verbose=False)
r = results[0]
dets: List[Det2D] = []
if r.boxes is None or len(r.boxes) == 0:
return dets
xyxy = r.boxes.xyxy.detach().cpu().numpy()
conf = r.boxes.conf.detach().cpu().numpy()
cls = r.boxes.cls.detach().cpu().numpy().astype(int)
for b, s, c in zip(xyxy, conf, cls):
if float(s) < YOLO_CONF_LOW:
continue
cls_name = CLASSES[c] if 0 <= c < len(CLASSES) else str(c)
dets.append(Det2D(xyxy=b.tolist(), cls_name=canonical_class(cls_name), score=float(s)))
dets = sorted(dets, key=lambda d: d.score, reverse=True)[:YOLO_KEEP_TOPK]
return dets
@torch.no_grad()
def infer_pcdet(cfg, dataset, model, load_data_to_gpu_fn, index: int, device: torch.device):
data_dict = dataset[index]
frame_id = data_dict["frame_id"]
textbin_path = str(Path(PCDET_POINTS_DIR) / f"{frame_id}.bin") raw_points = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) if os.path.exists(bin_path) else None batch = dataset.collate_batch([data_dict]) if device.type == "cuda": load_data_to_gpu_fn(batch) pred_dicts, _ = model.forward(batch) pred = pred_dicts[0] boxes = pred["pred_boxes"].detach().cpu().numpy().astype(np.float32) scores = pred["pred_scores"].detach().cpu().numpy().astype(np.float32) labels = pred["pred_labels"].detach().cpu().numpy().astype(np.int32) label_base = 1 if labels.size > 0 and labels.min() == 0: label_base = 0 dets3d: List[Det3D] = [] for b, s, lb in zip(boxes, scores, labels): if float(s) < PCDET_CONF_LOW: continue idx_cls = int(lb) - 1 if label_base == 1 else int(lb) cls_name = cfg.CLASS_NAMES[idx_cls] if 0 <= idx_cls < len(cfg.CLASS_NAMES) else str(int(lb)) dets3d.append(Det3D(box7=b.copy(), cls_name=canonical_class(cls_name), score=float(s))) return frame_id, raw_points, dets3d
def fuse_frame(yolo_dets: List[Det2D], pcdet_dets: List[Det3D],
img_w: int, img_h: int, calib: Dict, T_lidar2cam: np.ndarray) -> List[Det3D]:
# 3D->2D projection
if len(pcdet_dets) > 0:
boxes7 = np.stack([d.box7 for d in pcdet_dets], axis=0)
proj = project_boxes3d_to_2d(boxes7, calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION)
for d, p in zip(pcdet_dets, proj):
d.proj_xyxy = p
texty2p, p2y = associate_yolo_pcdet(yolo_dets, pcdet_dets, img_w, img_h) fused: List[Det3D] = [] for pj, pdet in enumerate(pcdet_dets): if pj in p2y: ydet = yolo_dets[p2y[pj]] if pdet.proj_xyxy is None: continue # match quality q (depends on IoU/center/contain/edge...) q, _ = match_quality_2d(ydet.xyxy, pdet.proj_xyxy, img_w, img_h) fused_cls, fused_score = fuse_with_quality(ydet, pdet, q) if fused_score >= FUSED_KEEP_THRESH: fused.append(Det3D( box7=pdet.box7.copy(), cls_name=canonical_class(fused_cls), score=float(fused_score), proj_xyxy=pdet.proj_xyxy )) else: # unmatched but confident enough if pdet.score >= PCDET_CONF_HIGH_UNMATCHED: fused.append(Det3D( box7=pdet.box7.copy(), cls_name=pdet.cls_name, score=float(pdet.score), proj_xyxy=pdet.proj_xyxy )) return fused
def load_models():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] device={device}")
textprint("[INFO] Loading YOLO...") yolo_model = load_yolo_model(YOLO_WEIGHTS) print("[INFO] Loading PCDet...") cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_pcdet_model(PCDET_CFG, PCDET_CKPT, device) print(f"[PCDet] CLASS_NAMES = {list(cfg.CLASS_NAMES)}") return device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn
def main():
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
ensure_dir(DEBUG_DIR)
textdevice, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_models() bin_files = sorted(glob.glob(os.path.join(PCDET_POINTS_DIR, "*.bin"))) frame_ids_all = [Path(p).stem for p in bin_files] valid_ids = [fid for fid in frame_ids_all if find_image_for_frame(fid) is not None] if not valid_ids: print("[ERROR] No matching (pointcloud, image) pairs found.") return # eval subset (避免你以为“没反应”) if NUM_EVAL_FRAMES is None: eval_ids = valid_ids else: eval_ids = random.sample(valid_ids, k=min(NUM_EVAL_FRAMES, len(valid_ids))) # debug subset debug_ids = random.sample(valid_ids, k=min(NUM_DEBUG_FRAMES, len(valid_ids))) idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # eval storage all_dets_pcd: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_dets_fus: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_gts: Dict[str, Dict[str, List[np.ndarray]]] = {c: {} for c in CLASSES} chosen_T = None # ---------- Pass 1: eval collect ---------- print(f"[EVAL] collecting on {len(eval_ids)} frames ...") for k, fid in enumerate(eval_ids): if fid not in idx_map: continue if (k % PROGRESS_EVERY) == 0: print(f"[EVAL] {k}/{len(eval_ids)} frame={fid}") img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T) # store preds baseline for d in pcdet_dets: c = canonical_class(d.cls_name) if c in all_dets_pcd and d.score >= EVAL_SCORE_THR_PCD: all_dets_pcd[c].append((fid, d.box7.copy(), float(d.score))) # store preds fused for d in fused_dets: c = canonical_class(d.cls_name) if c in all_dets_fus and d.score >= EVAL_SCORE_THR_FUSED: all_dets_fus[c].append((fid, d.box7.copy(), float(d.score))) # store GT gt_path = os.path.join(PCDET_GT_LABEL_DIR, f"{fid}.txt") gt_dets = parse_pcdet_gt_label(gt_path) for gd in gt_dets: c = canonical_class(gd.cls_name) if c in all_gts: all_gts[c].setdefault(fid, []).append(gd.box7.copy()) # ---------- Pass 2: debug visualization ---------- print(f"[DEBUG] saving {len(debug_ids)} visualizations into {DEBUG_DIR}/ ...") for k, fid in enumerate(debug_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T) y_top = top1_yolo(yolo_dets) p_top = top1_3d(pcdet_dets) f_top = top1_3d(fused_dets) out_path = os.path.join(DEBUG_DIR, f"{fid}.jpg") make_debug_vis(fid, img, y_top, p_top, f_top, raw_points, out_path) if (k % 5) == 0: print(f"[DEBUG] {k}/{len(debug_ids)} saved {out_path}") # ---------- Print metrics ---------- print("\n================= 3D Evaluation =================") print(f"[EvalFrames] {len(eval_ids)} | PCD_thr={EVAL_SCORE_THR_PCD} | FUSED_thr={EVAL_SCORE_THR_FUSED}") for thr in EVAL_IOU_THRESHOLDS: res_p = eval_3d_map_mar(all_dets_pcd, all_gts, CLASSES, iou_thr=thr) res_f = eval_3d_map_mar(all_dets_fus, all_gts, CLASSES, iou_thr=thr) print(f"\n[IOU={thr:.2f}] PCDet baseline: mAP={res_p['mAP']:.4f}, mAR={res_p['mAR']:.4f}") for c in CLASSES: pc = res_p["per_class"].get(c, {}) if pc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={pc.get('nDet', 0)}") else: print(f" - {c:14s} AP={pc['AP']:.4f} AR={pc['AR']:.4f} nGT={pc['nGT']} nDet={pc['nDet']}") print(f"[IOU={thr:.2f}] FUSED: mAP={res_f['mAP']:.4f}, mAR={res_f['mAR']:.4f}") for c in CLASSES: fc = res_f["per_class"].get(c, {}) if fc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={fc.get('nDet', 0)}") else: print(f" - {c:14s} AP={fc['AP']:.4f} AR={fc['AR']:.4f} nGT={fc['nGT']} nDet={fc['nDet']}") print(f"\n[INFO] Done. Debug images saved in: {DEBUG_DIR}/")
def _ensure_map_to_bev_proj_bn_registered(model, state_dict, device):
proj_w_key = "map_to_bev_module.proj.weight"
bn_w_key = "map_to_bev_module.bn.weight"
if proj_w_key not in state_dict or bn_w_key not in state_dict:
return
mtb = getattr(model, "map_to_bev_module", None)
if mtb is None:
return
if isinstance(getattr(mtb, "proj", None), nn.Module) and isinstance(getattr(mtb, "bn", None), nn.Module):
return
textw = state_dict[proj_w_key] out_ch, in_ch, kH, kW = w.shape use_bias = ("map_to_bev_module.proj.bias" in state_dict) padding = (kH // 2, kW // 2) if (kH > 1 or kW > 1) else (0, 0) mtb.proj = nn.Conv2d(in_ch, out_ch, kernel_size=(kH, kW), stride=1, padding=padding, bias=use_bias).to(device) mtb.bn = nn.BatchNorm2d(out_ch, eps=1e-3, momentum=0.01).to(device) print(f"[PCDet][COMPAT] Registered map_to_bev_module.proj/bn: Conv2d({in_ch}->{out_ch}, k={kH}x{kW})")
if name == "main":
main()
你现在这个“看不清楚 / 右侧不是主视图 / 融合框和点云框一样”的根因其实主要有两点:
front_w 只有 0.2W),再加上点云用单像素画,缩放后几乎看不见。pdet.proj_xyxy,所以 FUSED 在图上看起来和 PCDet 完全重叠。下面我给你一份修改后的完整代码(全量代码),做到:
注意:默认仍然是决策级融合(3D box7 不变,所以“融合后 3D 框大小不变”是正常的)。如果你真的希望融合后 3D 框尺寸变化,需要 box-level/feature-level 的 3D 回归,这里我也给了一个可选启发式尺寸微调开关(默认关闭,慎用,会影响 3D IoU)。
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Fusion + Visualization + 3D Eval (NO intrusion logic) 改动要点(解决你说的“看不清楚 / 右侧不是主视图 / 融合框和点云框一样”): 1) 可视化布局:输出改成【左:原图(含框) + BEV 小窗 + (可选)ZOOM 小窗】 | 【右:FRONT(X-Z) 主视图】, 右侧 FRONT 直接等宽显示(不再是 10% 宽的小条),看点云/3D框更清楚。 2) 点云渲染:BEV / FRONT 改成“密度图”(bincount + log + gamma) 并做轻微膨胀,远处稀疏点也能看清。 3) 融合框为什么和点云框一样: - 你现在是“决策级融合”,只融合类别/置信度,不做 3D 框回归,所以 3D box7 必然沿用 PCDet 的框(大小不变)。 - 但为了让你在图上能直观看到“融合框”不是一条线重叠,我们把 fused 的 2D 显示框改成 YOLO 框与 PCDet 投影框的加权融合(随匹配质量 q 变化) —— 这样 FUSED 的 2D 框不再和 PCDet 一模一样。 - 同时在 BEV/FRONT 里默认只画 FUSED(避免和 PCDet 3D 框完全重合看不出来),可用开关切回对比模式。 4) 其余:保持你原来的匹配/quality gating/eval 打印,并保留逐帧进度日志。 注意:本脚本默认仍是“决策级融合”(3D box7 不变),如果你真的希望融合后 3D 框尺寸变化, 需要 box-level/feature-level fusion 或额外的 3D 回归网络;这里仅提供一个可选的启发式尺寸微调开关(默认关闭)。 """ import os import sys import glob import math import random from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Tuple, Optional import numpy as np import cv2 import torch import torch.nn as nn # ========================= # Paths (按你环境) # ========================= IMG_DIR = "/media/jd/4997BB1603CFE2C4/lw/aug2/img" PCDET_POINTS_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/points" PCDET_GT_LABEL_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/labels" ULTRALYTICS_REPO = "/media/jd/4997BB1603CFE2C4/lw/ultralytic" YOLO_WEIGHTS = "/media/jd/4997BB1603CFE2C4/lw/ultralytic/runs1/1209yolo11_m+SM+d1/weights/best.pt" OPENPCDET_REPO = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet" PCDET_CFG = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/tools/cfgs/custom_models/train.yaml" PCDET_CKPT = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/output/cfgs/custom_models/train/default/ckpt/checkpoint_epoch_80.pth" DEBUG_DIR = "debug_vis" # ========================= # Classes # ========================= CLASSES = ['Drone', 'Plastic_sheet', 'Kite', 'Balloon', 'Bird'] # ========================= # Calibration # ========================= CALIB = { "extrinsic_matrix": np.array([ [0.00871822977022152, -0.9990101808868351, -0.043619387365335945, -0.04000000000000002], [-0.0003806461322609286, 0.043617726472722454, -0.9990482215818578, 0.25], [0.9999619230641715, 0.008726535498373544, -1.3877787807814457e-17, -0.04000000000000002], [0.0, 0.0, 0.0, 1.0] ], dtype=np.float64), "fx": 3605.0801593073, "fy": 3604.9573399128, "cx": 951.9363889574, "cy": 816.9773743048, "width": 1920, "height": 1536, "dist": np.array([ 2.4515361243, -46.8007861419, -0.0002973913, -0.0008024507, -144.3698857610, 2.6420544359, -46.0443623397, -158.1742719597 ], dtype=np.float64) } USE_DISTORTION = True EXTRINSIC_MODE = "auto" # auto / lidar2cam / cam2lidar # ========================= # Colors (BGR) # ========================= COL_YOLO = (0, 255, 0) COL_PCDET = (255, 0, 0) COL_FUSED = (0, 0, 255) # ========================= # Thresholds # ========================= YOLO_CONF_LOW = 0.05 PCDET_CONF_LOW = 0.01 YOLO_KEEP_TOPK = 1 PCDET_CONF_HIGH_UNMATCHED = 0.10 FUSED_KEEP_THRESH = 0.10 # matching MATCH_IOU_THR = 0.05 MATCH_CENTER_DIST_THR_PX = 160.0 PROJ_BOX_EXPAND_RATIO = 0.18 COST_ALPHA = 0.7 # ========================= # Overlap visualization / scoring (稳定版) # ========================= OV_IOU_THR = 0.20 OV_CENTER_DIST_PX = 40.0 OV_CONTAINMENT_THR = 0.60 OV_EDGE_GAP_PX = 6.0 OV_MIN_SCORE_FOR_STYLE = 0.55 # ========================= # Fusion: match quality -> affects confidence # ========================= Q_IOU_REF = 0.30 Q_DIST_REF_PX = 80.0 Q_EDGE_REF_PX = 8.0 FUSED_Q_GATING_STRENGTH = 0.60 # ========================= # Debug / Eval # ========================= RANDOM_SEED = 42 NUM_DEBUG_FRAMES = 20 NUM_EVAL_FRAMES = 800 # None=全量(慢),建议先用 300~1000 验证流程 EVAL_IOU_THRESHOLDS = [0.7] EVAL_SCORE_THR_PCD = 0.10 EVAL_SCORE_THR_FUSED = 0.10 PROGRESS_EVERY = 20 # ========================= # BEV / FRONT rendering # ========================= # 全局范围(当没有 det_ref 或你关闭 crop 时用) BEV_X_RANGE = (0.0, 80.0) BEV_Y_RANGE = (-30.0, 30.0) # 更高分辨率 + crop(看局部更清楚) BEV_RESOLUTION = 0.06 BEV_MIN_WPX = 360 BEV_MIN_HPX = 360 BEV_CROP_AROUND_DET = True BEV_CROP_MARGIN_X = 22.0 # meters BEV_CROP_MARGIN_Y = 22.0 # meters BEV_DENSITY_GAMMA = 0.55 BEV_DILATE_KSIZE = 3 # FRONT(X-Z) “抠出来”视图 FRONT_RESOLUTION = 0.03 FRONT_MIN_WPX = 520 # 右侧主视图,最小宽度加大 FRONT_MIN_HPX = 520 FRONT_CROP_EXPAND_XY = 1.0 FRONT_CROP_EXPAND_Z = 0.8 FRONT_DENSITY_GAMMA = 0.55 FRONT_DILATE_KSIZE = 3 # ========================= # Visualization layout # ========================= # 右侧作为主视图:只输出两栏 [IMAGE(with insets) | FRONT(main)] VIS_LAYOUT = "front_main" # front_main / classic_3panels(可自行扩展) VIS_FRONT_WIDTH_RATIO_TO_IMG = 1.00 # FRONT 右侧宽度 = W * ratio VIS_BEV_INSET_WIDTH_RATIO = 0.34 # BEV 作为左图的小窗,宽度占左图宽度比例 VIS_ADD_ZOOM_INSET = True VIS_ZOOM_INSET_SIZE = 420 # px (square) in left image VIS_ZOOM_CONTEXT_EXPAND_RATIO = 0.65 # 对框做扩张后裁剪(越大越包含背景) VIS_OUT_SCALE = 1.0 # 最终输出整体缩放(<1 可减小文件大小) # 在 BEV/FRONT 视图里,如果有 fused_top,默认只画 FUSED(否则完全重叠看不出来) VIS_SHOW_PCDET_WHEN_FUSED = False # 融合 2D 显示框(让 FUSED 在图上不再和 PCDet 投影框完全一样) FUSE_2D_BOX_FOR_VIS = True # ========================= # Optional: heuristic 3D size refinement (默认关闭) # ========================= # 注意:这不是严格的 3D box fusion,只是让 box7 尺寸随 2D 尺寸差异做一点点变化, # 可能提高也可能降低 3D IoU;默认 False。 REFINE_3D_BOX_DIMS = False REFINE_MAX_SCALE = 1.25 REFINE_MIN_SCALE = 0.75 REFINE_STRENGTH_XY = 0.25 # dx/dy 调整强度 REFINE_STRENGTH_Z = 0.20 # dz 调整强度 # ========================= # Sensor weights table (base for DS) # ========================= PCDET_AP07 = { "Drone": 91.6, "Plastic_sheet": 55.52, "Kite": 40.61, "Balloon": 99.96, "Bird": 73.37 } # ========================= # Data structures # ========================= @dataclass class Det2D: xyxy: List[float] cls_name: str score: float @dataclass class Det3D: box7: np.ndarray cls_name: str score: float proj_xyxy: Optional[List[float]] = None # ========================= # Utils # ========================= def ensure_dir(p: str): os.makedirs(p, exist_ok=True) def canonical_class(name: str) -> str: if name is None: return name n = name.strip() n_low = n.lower().replace("-", "_") mapping = { "drone": "Drone", "kite": "Kite", "balloon": "Balloon", "bird": "Bird", "plastic_sheet": "Plastic_sheet", "plastic": "Plastic_sheet", "plasticsheet": "Plastic_sheet", } return mapping.get(n_low, n) def find_image_for_frame(frame_id: str) -> Optional[str]: for ext in [".jpg", ".png", ".jpeg", ".bmp"]: p = os.path.join(IMG_DIR, frame_id + ext) if os.path.exists(p): return p g = glob.glob(os.path.join(IMG_DIR, frame_id + ".*")) return g[0] if g else None def clip_box_xyxy(box, w, h): x1, y1, x2, y2 = box x1 = max(0, min(w - 1, x1)) y1 = max(0, min(h - 1, y1)) x2 = max(0, min(w - 1, x2)) y2 = max(0, min(h - 1, y2)) if x2 < x1: x1, x2 = x2, x1 if y2 < y1: y1, y2 = y2, y1 return [float(x1), float(y1), float(x2), float(y2)] def expand_box_xyxy(box, ratio=0.1): x1, y1, x2, y2 = box cx = (x1 + x2) / 2.0 cy = (y1 + y2) / 2.0 w = max(1.0, (x2 - x1)) h = max(1.0, (y2 - y1)) w2 = w * (1.0 + ratio) h2 = h * (1.0 + ratio) return [cx - w2 / 2, cy - h2 / 2, cx + w2 / 2, cy + h2 / 2] def box_iou2d(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) iw = max(0.0, ix2 - ix1) ih = max(0.0, iy2 - iy1) inter = iw * ih area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1) area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1) return float(inter / (area_a + area_b - inter + 1e-9)) def box_intersection_area(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) iw = max(0.0, ix2 - ix1) ih = max(0.0, iy2 - iy1) return float(iw * ih) def box_area(a) -> float: return float(max(0.0, a[2] - a[0]) * max(0.0, a[3] - a[1])) def center_of_box(box): x1, y1, x2, y2 = box return (0.5 * (x1 + x2), 0.5 * (y1 + y2)) def center_dist_px(a, b) -> float: ac = center_of_box(a) bc = center_of_box(b) return float(math.hypot(ac[0] - bc[0], ac[1] - bc[1])) def point_in_box(pt, box): x, y = pt x1, y1, x2, y2 = box return (x >= x1) and (x <= x2) and (y >= y1) and (y <= y2) def rect_edge_gap(a, b) -> float: """ 两个轴对齐矩形的边缘最近距离(0表示相交/重叠) """ ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b if ax2 < bx1: gx = bx1 - ax2 elif bx2 < ax1: gx = ax1 - bx2 else: gx = 0.0 if ay2 < by1: gy = by1 - ay2 elif by2 < ay1: gy = ay1 - by2 else: gy = 0.0 return float(math.hypot(gx, gy)) # ========================= # Stable overlap + match quality score # ========================= def overlap_and_score_2d(a, b, diag: float) -> Tuple[bool, float, Dict]: iou = box_iou2d(a, b) dist = center_dist_px(a, b) gap = rect_edge_gap(a, b) inter = box_intersection_area(a, b) area_a = box_area(a) area_b = box_area(b) min_area = max(1e-9, min(area_a, area_b)) containment = float(np.clip(inter / min_area, 0.0, 1.0)) ac = center_of_box(a) bc = center_of_box(b) center_in = (point_in_box(ac, b) or point_in_box(bc, a)) cond_iou = (iou >= OV_IOU_THR) cond_contain = (containment >= OV_CONTAINMENT_THR) cond_center_in = center_in cond_edge = (gap <= OV_EDGE_GAP_PX) cond_dist = (dist <= OV_CENTER_DIST_PX) is_overlap = cond_iou or cond_contain or cond_center_in or (cond_edge and cond_dist) iou_term = min(iou / Q_IOU_REF, 1.0) dist_term = max(0.0, 1.0 - dist / max(1e-6, Q_DIST_REF_PX)) gap_term = max(0.0, 1.0 - gap / max(1e-6, Q_EDGE_REF_PX)) contain_term = containment center_bonus = 0.15 if center_in else 0.0 score = 0.45 * iou_term + 0.25 * dist_term + 0.20 * contain_term + 0.10 * gap_term + center_bonus score = float(np.clip(score, 0.0, 1.0)) info = {"iou": iou, "dist": dist, "gap": gap, "containment": containment, "center_in": center_in} return is_overlap, score, info def match_quality_2d(yolo_box, proj_box, img_w, img_h) -> Tuple[float, Dict]: diag = math.hypot(img_w, img_h) + 1e-9 _, score, info = overlap_and_score_2d(yolo_box, proj_box, diag) q = float(np.clip(0.15 + 0.85 * score, 0.0, 1.0)) return q, info # ========================= # Drawing helpers # ========================= def draw_text_box(img, x, y, text, color, font_scale=0.6, thickness=2): (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness) x = int(np.clip(x, 0, img.shape[1] - 1)) y = int(np.clip(y, 0, img.shape[0] - 1)) x2 = int(np.clip(x + tw + 6, 0, img.shape[1] - 1)) y2 = int(np.clip(y + th + 6, 0, img.shape[0] - 1)) cv2.rectangle(img, (x, y), (x2, y2), (0, 0, 0), -1) cv2.putText(img, text, (x + 3, y + th + 3), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness, cv2.LINE_AA) def draw_transparent_rect(img, box, color, alpha=0.22): x1, y1, x2, y2 = [int(round(v)) for v in box] overlay = img.copy() cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1) cv2.addWeighted(overlay, alpha, img, 1.0 - alpha, 0, img) def draw_dashed_line(img, p1, p2, color, thickness=2, dash_len=14, gap_len=8): x1, y1 = p1 x2, y2 = p2 dx = x2 - x1 dy = y2 - y1 length = math.hypot(dx, dy) if length < 1e-6: return vx = dx / length vy = dy / length dist = 0.0 while dist < length: seg_start = dist seg_end = min(dist + dash_len, length) sx = int(round(x1 + vx * seg_start)) sy = int(round(y1 + vy * seg_start)) ex = int(round(x1 + vx * seg_end)) ey = int(round(y1 + vy * seg_end)) cv2.line(img, (sx, sy), (ex, ey), color, thickness, cv2.LINE_AA) dist += dash_len + gap_len def draw_dashed_rect(img, box, color, thickness=2): x1, y1, x2, y2 = [int(round(v)) for v in box] draw_dashed_line(img, (x1, y1), (x2, y1), color, thickness) draw_dashed_line(img, (x2, y1), (x2, y2), color, thickness) draw_dashed_line(img, (x2, y2), (x1, y2), color, thickness) draw_dashed_line(img, (x1, y2), (x1, y1), color, thickness) def draw_box_smart(img, box, color, overlap_score: float, solid_thickness=2): if overlap_score >= OV_MIN_SCORE_FOR_STYLE: draw_transparent_rect(img, box, color, alpha=0.22) draw_dashed_rect(img, box, color, thickness=max(2, solid_thickness)) else: x1, y1, x2, y2 = [int(round(v)) for v in box] cv2.rectangle(img, (x1, y1), (x2, y2), color, solid_thickness) def draw_legend(img, x=12, y=12): draw_text_box(img, x, y, "Legend:", (255, 255, 255), 0.65, 2) y += 30 cv2.rectangle(img, (x, y + 6), (x + 18, y + 24), COL_YOLO, -1) draw_text_box(img, x + 26, y, "YOLO (2D)", (255, 255, 255), 0.6, 2) y += 28 cv2.rectangle(img, (x, y + 6), (x + 18, y + 24), COL_PCDET, -1) draw_text_box(img, x + 26, y, "PCDet-proj (3D->2D)", (255, 255, 255), 0.6, 2) y += 28 cv2.rectangle(img, (x, y + 6), (x + 18, y + 24), COL_FUSED, -1) draw_text_box(img, x + 26, y, "FUSED", (255, 255, 255), 0.6, 2) y += 30 draw_text_box(img, x, y, "Overlap: IoU/dist/contain/edge/center-in", (255, 255, 255), 0.50, 2) def resize_keep_aspect(img: np.ndarray, target_w: Optional[int] = None, target_h: Optional[int] = None, interp=cv2.INTER_AREA) -> np.ndarray: h, w = img.shape[:2] if target_w is None and target_h is None: return img if target_w is not None and target_h is not None: scale = min(target_w / max(1, w), target_h / max(1, h)) elif target_w is not None: scale = target_w / max(1, w) else: scale = target_h / max(1, h) nw = max(1, int(round(w * scale))) nh = max(1, int(round(h * scale))) if nw == w and nh == h: return img return cv2.resize(img, (nw, nh), interpolation=interp) def fit_to_rect(img: np.ndarray, target_w: int, target_h: int, pad_color=(0, 0, 0), interp=cv2.INTER_AREA) -> np.ndarray: resized = resize_keep_aspect(img, target_w=target_w, target_h=target_h, interp=interp) h, w = resized.shape[:2] out = np.zeros((target_h, target_w, 3), dtype=np.uint8) out[:] = pad_color y0 = (target_h - h) // 2 x0 = (target_w - w) // 2 out[y0:y0 + h, x0:x0 + w] = resized return out def paste_inset(base: np.ndarray, inset: np.ndarray, x: int, y: int, border: int = 2, border_color=(255, 255, 255)) -> None: """直接贴 inset(带边框)""" H, W = base.shape[:2] ih, iw = inset.shape[:2] x = int(np.clip(x, 0, W - 1)) y = int(np.clip(y, 0, H - 1)) x2 = min(W, x + iw) y2 = min(H, y + ih) iw2 = x2 - x ih2 = y2 - y if iw2 <= 0 or ih2 <= 0: return base[y:y2, x:x2] = inset[:ih2, :iw2] if border > 0: cv2.rectangle(base, (x, y), (x2 - 1, y2 - 1), border_color, border) def safe_int_box(box): x1, y1, x2, y2 = box x1i = int(math.floor(x1)) y1i = int(math.floor(y1)) x2i = int(math.ceil(x2)) y2i = int(math.ceil(y2)) return x1i, y1i, x2i, y2i # ========================= # DS fusion with dynamic match-quality # ========================= def get_sensor_weights(cls_name: str) -> Tuple[float, float]: c = canonical_class(cls_name) ap = PCDET_AP07.get(c, 70.0) / 100.0 w_lidar = float(np.clip(ap, 0.15, 0.95)) w_img = float(np.clip(1.0 - w_lidar, 0.05, 0.85)) s = w_img + w_lidar return w_img / s, w_lidar / s def ds_fuse_singleton(cls_a: str, s_a: float, w_a: float, cls_b: str, s_b: float, w_b: float) -> Tuple[str, float]: cls_a = canonical_class(cls_a) cls_b = canonical_class(cls_b) m_a = float(np.clip(w_a * s_a, 0.0, 0.999999)) m_b = float(np.clip(w_b * s_b, 0.0, 0.999999)) th_a = 1.0 - m_a th_b = 1.0 - m_b K = (m_a * m_b) if (cls_a != cls_b) else 0.0 denom = 1.0 - K + 1e-9 if cls_a == cls_b: m = (m_a * m_b + m_a * th_b + th_a * m_b) / denom return cls_a, float(m) else: ma = (m_a * th_b) / denom mb = (th_a * m_b) / denom return (cls_a, float(ma)) if ma >= mb else (cls_b, float(mb)) def fuse_with_quality(ydet: Det2D, pdet: Det3D, q: float) -> Tuple[str, float]: w_img, w_lidar = get_sensor_weights(pdet.cls_name) gate = (0.35 + 0.65 * q) s_img = float(np.clip(ydet.score * gate, 0.0, 1.0)) s_lid = float(np.clip(pdet.score * gate, 0.0, 1.0)) fused_cls, fused_score = ds_fuse_singleton( ydet.cls_name, s_img, w_img, pdet.cls_name, s_lid, w_lidar ) fused_score = float(np.clip( fused_score * ((1.0 - FUSED_Q_GATING_STRENGTH) + FUSED_Q_GATING_STRENGTH * q), 0.0, 1.0 )) return fused_cls, fused_score # ========================= # 3D geometry & projection # ========================= def boxes3d_to_corners(boxes7: np.ndarray) -> np.ndarray: N = boxes7.shape[0] corners = np.zeros((N, 8, 3), dtype=np.float32) for i in range(N): x, y, z, dx, dy, dz, yaw = boxes7[i].tolist() local = np.array([ [dx / 2, dy / 2, dz / 2], [dx / 2, -dy / 2, dz / 2], [-dx / 2, -dy / 2, dz / 2], [-dx / 2, dy / 2, dz / 2], [dx / 2, dy / 2, -dz / 2], [dx / 2, -dy / 2, -dz / 2], [-dx / 2, -dy / 2, -dz / 2], [-dx / 2, dy / 2, -dz / 2], ], dtype=np.float32) cy = math.cos(yaw) sy = math.sin(yaw) R = np.array([[cy, -sy, 0.0], [sy, cy, 0.0], [0.0, 0.0, 1.0]], dtype=np.float32) corners[i] = (local @ R.T) + np.array([x, y, z], dtype=np.float32) return corners def project_points_lidar_to_img(pts_lidar: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, use_distortion=True): fx, fy, cx, cy = calib["fx"], calib["fy"], calib["cx"], calib["cy"] K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float64) dist = calib["dist"].astype(np.float64) if use_distortion else None pts_h = np.concatenate([pts_lidar.astype(np.float64), np.ones((pts_lidar.shape[0], 1), dtype=np.float64)], axis=1) pts_cam = (T_lidar2cam @ pts_h.T).T[:, :3] valid = pts_cam[:, 2] > 1e-6 rvec = np.zeros((3, 1), dtype=np.float64) tvec = np.zeros((3, 1), dtype=np.float64) img_pts, _ = cv2.projectPoints(pts_cam, rvec, tvec, K, dist) return img_pts.reshape(-1, 2).astype(np.float32), valid def get_extrinsic_matrix(calib: Dict, pts_lidar_xyz: np.ndarray, img_w: int, img_h: int) -> np.ndarray: T = calib["extrinsic_matrix"].copy() if EXTRINSIC_MODE == "lidar2cam": return T if EXTRINSIC_MODE == "cam2lidar": return np.linalg.inv(T) if pts_lidar_xyz is None or pts_lidar_xyz.shape[0] < 100: return T pts = pts_lidar_xyz if pts.shape[0] > 8000: pts = pts[np.random.choice(pts.shape[0], 8000, replace=False)] def score_for(Tuse): img_pts, valid = project_points_lidar_to_img(pts, calib, Tuse, img_w, img_h, use_distortion=USE_DISTORTION) img_pts = img_pts[valid] if img_pts.shape[0] == 0: return 0.0 inside = (img_pts[:, 0] >= 0) & (img_pts[:, 0] < img_w) & (img_pts[:, 1] >= 0) & (img_pts[:, 1] < img_h) return float(inside.mean()) s1 = score_for(T) s2 = score_for(np.linalg.inv(T)) chosen = T if s1 >= s2 else np.linalg.inv(T) print(f"[CALIB] auto | lidar2cam_inlier={s1:.3f} cam2lidar_inlier={s2:.3f} -> choose {'lidar2cam' if s1 >= s2 else 'cam2lidar'}") return chosen def project_boxes3d_to_2d(boxes7: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, use_distortion=True): if boxes7.shape[0] == 0: return [] corners = boxes3d_to_corners(boxes7) out = [] for i in range(corners.shape[0]): img_pts, valid = project_points_lidar_to_img(corners[i], calib, T_lidar2cam, img_w, img_h, use_distortion) if valid.sum() < 4: out.append(None) continue xs = img_pts[valid, 0] ys = img_pts[valid, 1] box = clip_box_xyxy([float(xs.min()), float(ys.min()), float(xs.max()), float(ys.max())], img_w, img_h) if (box[2] - box[0]) < 2 or (box[3] - box[1]) < 2: out.append(None) else: out.append(box) return out # ========================= # Matching (Hungarian) # ========================= def hungarian_match(cost: np.ndarray) -> List[Tuple[int, int]]: try: from scipy.optimize import linear_sum_assignment r, c = linear_sum_assignment(cost) return list(zip(r.tolist(), c.tolist())) except Exception: matches = [] used_r, used_c = set(), set() idxs = np.dstack(np.unravel_index(np.argsort(cost.ravel()), cost.shape))[0] for i, j in idxs: if i in used_r or j in used_c: continue used_r.add(int(i)) used_c.add(int(j)) matches.append((int(i), int(j))) return matches def associate_yolo_pcdet(yolo: List[Det2D], pcdet: List[Det3D], img_w: int, img_h: int): N, M = len(yolo), len(pcdet) if N == 0 or M == 0: return {}, {} diag = math.hypot(img_w, img_h) + 1e-9 cost = np.ones((N, M), dtype=np.float32) * 10.0 for i in range(N): a = yolo[i].xyxy a_c = center_of_box(a) for j in range(M): b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) b_c = center_of_box(b) iou = box_iou2d(a, b) cd = math.hypot(a_c[0] - b_c[0], a_c[1] - b_c[1]) / diag inside = point_in_box(a_c, b) or point_in_box(b_c, a) bonus = -0.15 if inside else 0.0 cost[i, j] = COST_ALPHA * (1.0 - iou) + (1.0 - COST_ALPHA) * cd + bonus pairs = hungarian_match(cost) y2p, p2y = {}, {} for i, j in pairs: b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) iou = box_iou2d(yolo[i].xyxy, b) cd = center_dist_px(yolo[i].xyxy, b) y_c = center_of_box(yolo[i].xyxy) b_c = center_of_box(b) inside = point_in_box(y_c, b) or point_in_box(b_c, yolo[i].xyxy) if (iou >= MATCH_IOU_THR) or (cd <= MATCH_CENTER_DIST_THR_PX) or inside: y2p[i] = j p2y[j] = i return y2p, p2y # ========================= # FRONT “抠出来”点云裁剪:点在 OBB 内(含扩张) # ========================= def points_in_obb(points_xyz: np.ndarray, box7: np.ndarray, expand_xy: float, expand_z: float) -> np.ndarray: cx, cy, cz, dx, dy, dz, yaw = box7.tolist() px = points_xyz[:, 0] - cx py = points_xyz[:, 1] - cy pz = points_xyz[:, 2] - cz c = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz hx = dx / 2.0 + expand_xy hy = dy / 2.0 + expand_xy hz = dz / 2.0 + expand_z mask = (np.abs(lx) <= hx) & (np.abs(ly) <= hy) & (np.abs(lz) <= hz) return mask def rasterize_density(rr: np.ndarray, cc: np.ndarray, H: int, W: int, gamma: float = 0.55, dilate_ksize: int = 3) -> np.ndarray: """ 把离散点 (rr, cc) 转成密度灰度图(log + gamma),再转 BGR。 """ rr = rr.astype(np.int64) cc = cc.astype(np.int64) idx = rr * W + cc counts = np.bincount(idx, minlength=H * W).reshape(H, W).astype(np.float32) if counts.max() > 0: dens = np.log1p(counts) dens = dens / (dens.max() + 1e-6) dens = np.power(dens, gamma) gray = (dens * 255.0).astype(np.uint8) else: gray = np.zeros((H, W), dtype=np.uint8) if dilate_ksize and dilate_ksize >= 2: k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_ksize, dilate_ksize)) gray = cv2.dilate(gray, k, iterations=1) bgr = cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) return bgr def make_front_xz_crop(points: Optional[np.ndarray], det_ref: Optional[Det3D], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray: """ 右侧正视图:只显示 det_ref 盒子附近(盒内+扩张)的点云 X 横轴, Z 纵轴 """ canvas = np.zeros((FRONT_MIN_HPX, FRONT_MIN_WPX, 3), dtype=np.uint8) draw_text_box(canvas, 8, 10, "FRONT (X-Z) MAIN", (220, 220, 220), 0.7, 2) if det_ref is None or points is None or points.shape[0] == 0: draw_text_box(canvas, 8, 45, "no det_ref / no points", (200, 200, 200), 0.6, 2) return canvas pts_xyz = points[:, :3] mask = points_in_obb(pts_xyz, det_ref.box7, FRONT_CROP_EXPAND_XY, FRONT_CROP_EXPAND_Z) crop = pts_xyz[mask] if crop.shape[0] == 0: draw_text_box(canvas, 8, 45, "no points in crop", (200, 200, 200), 0.6, 2) return canvas # 视图范围:用 det_ref 的 3D bbox 的 x/z extent + expand x, y, z, dx, dy, dz, yaw = det_ref.box7.tolist() x_min = (x - dx / 2 - FRONT_CROP_EXPAND_XY) x_max = (x + dx / 2 + FRONT_CROP_EXPAND_XY) z_min = (z - dz / 2 - FRONT_CROP_EXPAND_Z) z_max = (z + dz / 2 + FRONT_CROP_EXPAND_Z) # 分辨率 -> 画布大小 res = FRONT_RESOLUTION W = max(FRONT_MIN_WPX, int(math.ceil((x_max - x_min) / res))) H = max(FRONT_MIN_HPX, int(math.ceil((z_max - z_min) / res))) # 画点:密度图(更清晰) cc = ((crop[:, 0] - x_min) / res).astype(np.int32) rr = ((z_max - crop[:, 2]) / res).astype(np.int32) cc = np.clip(cc, 0, W - 1) rr = np.clip(rr, 0, H - 1) front = rasterize_density(rr, cc, H, W, gamma=FRONT_DENSITY_GAMMA, dilate_ksize=FRONT_DILATE_KSIZE) # 画一个简单网格(每 1m 一条,淡线,不影响阅读) step_m = 1.0 step_px = max(1, int(round(step_m / res))) grid_col = (30, 30, 30) for xg in range(0, W, step_px): cv2.line(front, (xg, 0), (xg, H - 1), grid_col, 1) for zg in range(0, H, step_px): cv2.line(front, (0, zg), (W - 1, zg), grid_col, 1) def draw_det_xz(det: Det3D, col, thick=2, tag=""): corners = boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] xs = corners[:, 0] zs = corners[:, 2] x1, x2 = float(xs.min()), float(xs.max()) z1, z2 = float(zs.min()), float(zs.max()) x1p = int((x1 - x_min) / res) x2p = int((x2 - x_min) / res) y1p = int((z_max - z2) / res) y2p = int((z_max - z1) / res) x1p = int(np.clip(x1p, 0, W - 1)) x2p = int(np.clip(x2p, 0, W - 1)) y1p = int(np.clip(y1p, 0, H - 1)) y2p = int(np.clip(y2p, 0, H - 1)) cv2.rectangle(front, (x1p, y1p), (x2p, y2p), col, thick) # 中心点 cxp = int(np.clip((0.5 * (x1p + x2p)), 0, W - 1)) cyp = int(np.clip((0.5 * (y1p + y2p)), 0, H - 1)) cv2.circle(front, (cxp, cyp), 4, col, -1) title = f"{tag}{det.cls_name}:{det.score:.2f}" draw_text_box(front, max(0, x1p), max(0, y1p - 22), title, col, 0.65, 2) # 右侧主视图:默认只画 fused(更容易看清) if fused_top is not None: draw_det_xz(fused_top, COL_FUSED, thick=3, tag="FUSED ") if VIS_SHOW_PCDET_WHEN_FUSED and pcdet_top is not None: draw_det_xz(pcdet_top, COL_PCDET, thick=1, tag="PCDet ") else: if pcdet_top is not None: draw_det_xz(pcdet_top, COL_PCDET, thick=2, tag="PCDet ") draw_text_box(front, 8, 10, "FRONT (X-Z) MAIN", (220, 220, 220), 0.7, 2) draw_text_box(front, 8, 42, f"range X[{x_min:.1f},{x_max:.1f}]m Z[{z_min:.1f},{z_max:.1f}]m", (200, 200, 200), 0.55, 2) return front # ========================= # BEV panel # ========================= def make_bev_image(points: Optional[np.ndarray], det_ref: Optional[Det3D], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray: # range if BEV_CROP_AROUND_DET and det_ref is not None: cx, cy = float(det_ref.box7[0]), float(det_ref.box7[1]) x_min, x_max = (cx - BEV_CROP_MARGIN_X), (cx + BEV_CROP_MARGIN_X) y_min, y_max = (cy - BEV_CROP_MARGIN_Y), (cy + BEV_CROP_MARGIN_Y) else: x_min, x_max = BEV_X_RANGE y_min, y_max = BEV_Y_RANGE res = BEV_RESOLUTION W = max(BEV_MIN_WPX, int(math.ceil((y_max - y_min) / res))) H = max(BEV_MIN_HPX, int(math.ceil((x_max - x_min) / res))) bev = np.zeros((H, W, 3), dtype=np.uint8) if points is not None and points.shape[0] > 0: mask = (points[:, 0] >= x_min) & (points[:, 0] <= x_max) & (points[:, 1] >= y_min) & (points[:, 1] <= y_max) pts = points[mask] if pts.shape[0] > 0: rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) bev = rasterize_density(rr, cc, H, W, gamma=BEV_DENSITY_GAMMA, dilate_ksize=BEV_DILATE_KSIZE) # 网格淡线 step_m = 5.0 step_px = max(1, int(round(step_m / res))) grid_col = (28, 28, 28) for yg in range(0, W, step_px): cv2.line(bev, (yg, 0), (yg, H - 1), grid_col, 1) for xg in range(0, H, step_px): cv2.line(bev, (0, xg), (W - 1, xg), grid_col, 1) def draw_one(det: Det3D, col_bgr, thick=2, tag=""): x, y, z, dx, dy, dz, yaw = det.box7.tolist() cy_ = math.cos(yaw) sy_ = math.sin(yaw) local = np.array([[dx / 2, dy / 2], [dx / 2, -dy / 2], [-dx / 2, -dy / 2], [-dx / 2, dy / 2]], dtype=np.float32) R = np.array([[cy_, -sy_], [sy_, cy_]], dtype=np.float32) corners_xy = (local @ R.T) + np.array([x, y], dtype=np.float32) rr = ((x_max - corners_xy[:, 0]) / res).astype(np.int32) cc = ((corners_xy[:, 1] - y_min) / res).astype(np.int32) poly = np.stack([cc, rr], axis=1).reshape(-1, 1, 2) cv2.polylines(bev, [poly], True, col_bgr, thick, cv2.LINE_AA) rc = int((x_max - x) / res) cc0 = int((y - y_min) / res) cv2.circle(bev, (cc0, rc), 3, col_bgr, -1) draw_text_box(bev, max(0, cc0 + 4), max(0, rc - 20), f"{tag}{det.cls_name}:{det.score:.2f}", col_bgr, 0.55, 2) if fused_top is not None: draw_one(fused_top, COL_FUSED, thick=2, tag="F ") if VIS_SHOW_PCDET_WHEN_FUSED and pcdet_top is not None: draw_one(pcdet_top, COL_PCDET, thick=1, tag="P ") else: if pcdet_top is not None: draw_one(pcdet_top, COL_PCDET, thick=2, tag="P ") draw_text_box(bev, 10, 10, "BEV (inset)", (220, 220, 220), 0.65, 2) draw_text_box(bev, 10, 40, f"X[{x_min:.1f},{x_max:.1f}] Y[{y_min:.1f},{y_max:.1f}] res={res:.2f}", (200, 200, 200), 0.5, 2) return bev # ========================= # Visualize helpers # ========================= def top1_yolo(dets: List[Det2D]) -> Optional[Det2D]: return max(dets, key=lambda d: d.score) if dets else None def top1_3d(dets: List[Det3D]) -> Optional[Det3D]: return max(dets, key=lambda d: d.score) if dets else None def make_zoom_inset(src_img: np.ndarray, ref_box: Optional[List[float]], inset_size: int = 420, context_expand_ratio: float = 0.65) -> Optional[np.ndarray]: if ref_box is None: return None H, W = src_img.shape[:2] box = expand_box_xyxy(ref_box, ratio=context_expand_ratio) box = clip_box_xyxy(box, W, H) x1, y1, x2, y2 = safe_int_box(box) x1 = max(0, min(W - 1, x1)) y1 = max(0, min(H - 1, y1)) x2 = max(0, min(W, x2)) y2 = max(0, min(H, y2)) if x2 - x1 < 2 or y2 - y1 < 2: return None crop = src_img[y1:y2, x1:x2].copy() inset = cv2.resize(crop, (inset_size, inset_size), interpolation=cv2.INTER_LINEAR) cv2.rectangle(inset, (0, 0), (inset_size - 1, inset_size - 1), (255, 255, 255), 2) draw_text_box(inset, 8, 10, "ZOOM", (255, 255, 255), 0.7, 2) return inset # ========================= # Fusion 2D box for visualization (make fused box not identical) # ========================= def fuse_2d_box_for_vis(yolo_box: List[float], pcd_proj_box: List[float], q: float, img_w: int, img_h: int) -> List[float]: """ 仅用于可视化:把 YOLO 框和 PCDet 投影框做一个加权融合,让 FUSED 框在图上可区分。 q 越高,说明两框越一致 -> 权重更平均;q 越低 -> 更偏向 YOLO(因为你想看清楚 2D 框位置)。 """ wy = float(np.clip(0.80 - 0.30 * q, 0.50, 0.85)) # q=1 -> 0.50, q=0 -> 0.80 fused = [ wy * yolo_box[0] + (1.0 - wy) * pcd_proj_box[0], wy * yolo_box[1] + (1.0 - wy) * pcd_proj_box[1], wy * yolo_box[2] + (1.0 - wy) * pcd_proj_box[2], wy * yolo_box[3] + (1.0 - wy) * pcd_proj_box[3], ] return clip_box_xyxy(fused, img_w, img_h) def refine_3d_dims_with_2d(box7: np.ndarray, yolo_box: List[float], proj_box: List[float], q: float) -> np.ndarray: """ 可选启发式:用 2D 框宽高比例微调 3D 的 dx/dy/dz。 默认关闭:REFINE_3D_BOX_DIMS = False """ if not REFINE_3D_BOX_DIMS: return box7 x, y, z, dx, dy, dz, yaw = box7.tolist() wy = max(1.0, (yolo_box[2] - yolo_box[0])) hy = max(1.0, (yolo_box[3] - yolo_box[1])) wp = max(1.0, (proj_box[2] - proj_box[0])) hp = max(1.0, (proj_box[3] - proj_box[1])) rw = float(np.clip(wy / wp, 0.2, 5.0)) rh = float(np.clip(hy / hp, 0.2, 5.0)) scale_xy = 1.0 + (rw - 1.0) * (REFINE_STRENGTH_XY * q) scale_z = 1.0 + (rh - 1.0) * (REFINE_STRENGTH_Z * q) scale_xy = float(np.clip(scale_xy, REFINE_MIN_SCALE, REFINE_MAX_SCALE)) scale_z = float(np.clip(scale_z, REFINE_MIN_SCALE, REFINE_MAX_SCALE)) dx2 = float(max(0.05, dx * scale_xy)) dy2 = float(max(0.05, dy * scale_xy)) dz2 = float(max(0.05, dz * scale_z)) out = np.array([x, y, z, dx2, dy2, dz2, yaw], dtype=np.float32) return out # ========================= # Visualize (Front main) # ========================= def make_debug_vis(frame_id: str, img_bgr: np.ndarray, yolo_top: Optional[Det2D], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D], raw_points: Optional[np.ndarray], out_path: str): vis = img_bgr.copy() H, W = vis.shape[:2] draw_legend(vis, 12, 12) draw_text_box(vis, 12, H - 40, f"frame:{frame_id}", (0, 220, 220), 0.75, 2) # collect boxes for overlap styling box_map: Dict[str, Tuple[List[float], str]] = {} if yolo_top is not None: b = clip_box_xyxy(yolo_top.xyxy, W, H) box_map["YOLO"] = (b, f"{yolo_top.cls_name}:{yolo_top.score:.2f}") if pcdet_top is not None and pcdet_top.proj_xyxy is not None: b = clip_box_xyxy(pcdet_top.proj_xyxy, W, H) box_map["PCDet"] = (b, f"{pcdet_top.cls_name}:{pcdet_top.score:.2f}") if fused_top is not None and fused_top.proj_xyxy is not None: b = clip_box_xyxy(fused_top.proj_xyxy, W, H) box_map["FUSED"] = (b, f"{fused_top.cls_name}:{fused_top.score:.2f}") # compute overlap scores diag = math.hypot(W, H) + 1e-9 overlap_score = {k: 0.0 for k in box_map.keys()} notes = [] keys = list(box_map.keys()) for i in range(len(keys)): for j in range(i + 1, len(keys)): ki, kj = keys[i], keys[j] bi, _ = box_map[ki] bj, _ = box_map[kj] is_ov, sc, info = overlap_and_score_2d(bi, bj, diag) if is_ov: overlap_score[ki] = max(overlap_score[ki], sc) overlap_score[kj] = max(overlap_score[kj], sc) notes.append(f"{ki} vs {kj}: sc={sc:.2f} iou={info['iou']:.2f} d={info['dist']:.1f}px gap={info['gap']:.1f}px") # draw boxes on left image if "YOLO" in box_map and yolo_top is not None: b, _ = box_map["YOLO"] draw_box_smart(vis, b, COL_YOLO, overlap_score.get("YOLO", 0.0), 3) draw_text_box(vis, int(b[0]), max(0, int(b[1]) - 26), f"YOLO {yolo_top.cls_name}:{yolo_top.score:.2f}", COL_YOLO, 0.7, 2) if "PCDet" in box_map and pcdet_top is not None: b, _ = box_map["PCDet"] # PCDet 用更细的线,避免盖住 fused draw_box_smart(vis, b, COL_PCDET, overlap_score.get("PCDet", 0.0), 2) draw_text_box(vis, int(b[0]), min(H - 30, int(b[3]) + 6), f"PCDet {pcdet_top.cls_name}:{pcdet_top.score:.2f}", COL_PCDET, 0.65, 2) if "FUSED" in box_map and fused_top is not None: b, _ = box_map["FUSED"] draw_box_smart(vis, b, COL_FUSED, overlap_score.get("FUSED", 0.0), 4) txt = f"FUSED {fused_top.cls_name}:{fused_top.score:.2f}" (tw, _), _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) draw_text_box(vis, max(0, int(b[2]) - tw - 12), max(0, int(b[1]) - 26), txt, COL_FUSED, 0.7, 2) # overlap notes(简要) if notes: y0 = 160 draw_text_box(vis, 12, y0, "Overlap notes:", (255, 255, 255), 0.6, 2) y0 += 28 for k, line in enumerate(notes[:5]): draw_text_box(vis, 12, y0 + 24 * k, line, (255, 255, 255), 0.52, 2) # build FRONT main (right) det_ref = fused_top if fused_top is not None else pcdet_top front = make_front_xz_crop(raw_points, det_ref, pcdet_top, fused_top) # build BEV inset (inside left image) bev = make_bev_image(raw_points, det_ref, pcdet_top, fused_top) bev_inset_w = int(W * VIS_BEV_INSET_WIDTH_RATIO) bev_inset = resize_keep_aspect(bev, target_w=bev_inset_w, interp=cv2.INTER_NEAREST) # place at bottom-left paste_inset(vis, bev_inset, x=10, y=H - bev_inset.shape[0] - 10, border=2, border_color=(220, 220, 220)) # optional zoom inset (top-right) if VIS_ADD_ZOOM_INSET: ref = None if fused_top is not None and fused_top.proj_xyxy is not None: ref = fused_top.proj_xyxy elif yolo_top is not None: ref = yolo_top.xyxy elif pcdet_top is not None and pcdet_top.proj_xyxy is not None: ref = pcdet_top.proj_xyxy z_inset = make_zoom_inset(vis, ref, inset_size=VIS_ZOOM_INSET_SIZE, context_expand_ratio=VIS_ZOOM_CONTEXT_EXPAND_RATIO) if z_inset is not None: paste_inset(vis, z_inset, x=W - z_inset.shape[1] - 10, y=10, border=2, border_color=(220, 220, 220)) # fit right front to target size (right width = W * ratio, height = H) front_w = int(W * VIS_FRONT_WIDTH_RATIO_TO_IMG) front_fit = fit_to_rect(front, front_w, H, pad_color=(0, 0, 0), interp=cv2.INTER_NEAREST) canvas = cv2.hconcat([vis, front_fit]) if VIS_OUT_SCALE != 1.0: out_w = int(round(canvas.shape[1] * VIS_OUT_SCALE)) out_h = int(round(canvas.shape[0] * VIS_OUT_SCALE)) canvas = cv2.resize(canvas, (out_w, out_h), interpolation=cv2.INTER_AREA) cv2.imwrite(out_path, canvas) # ========================= # 3D Eval # ========================= def iou3d_matrix_cpu(boxes_a: np.ndarray, boxes_b: np.ndarray) -> np.ndarray: N, M = boxes_a.shape[0], boxes_b.shape[0] if N == 0 or M == 0: return np.zeros((N, M), dtype=np.float32) from pcdet.ops.iou3d_nms import iou3d_nms_utils ta = torch.from_numpy(boxes_a).float() tb = torch.from_numpy(boxes_b).float() bev_iou = iou3d_nms_utils.boxes_bev_iou_cpu(ta, tb).cpu().numpy().astype(np.float32) area_a = (boxes_a[:, 3] * boxes_a[:, 4]).reshape(N, 1) area_b = (boxes_b[:, 3] * boxes_b[:, 4]).reshape(1, M) inter_area = np.where( bev_iou > 0, bev_iou * (area_a + area_b) / (1.0 + bev_iou + 1e-9), 0.0 ).astype(np.float32) zmax_a = boxes_a[:, 2] + boxes_a[:, 5] / 2.0 zmin_a = boxes_a[:, 2] - boxes_a[:, 5] / 2.0 zmax_b = boxes_b[:, 2] + boxes_b[:, 5] / 2.0 zmin_b = boxes_b[:, 2] - boxes_b[:, 5] / 2.0 overlap_h = np.maximum( 0.0, np.minimum(zmax_a.reshape(N, 1), zmax_b.reshape(1, M)) - np.maximum(zmin_a.reshape(N, 1), zmin_b.reshape(1, M)) ).astype(np.float32) inter_vol = inter_area * overlap_h vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).reshape(N, 1) vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).reshape(1, M) union_vol = vol_a + vol_b - inter_vol + 1e-9 return (inter_vol / union_vol).astype(np.float32) def compute_ap_from_pr(rec: np.ndarray, prec: np.ndarray) -> float: if rec.size == 0: return 0.0 mrec = np.concatenate(([0.0], rec, [1.0])) mpre = np.concatenate(([0.0], prec, [0.0])) for i in range(mpre.size - 1, 0, -1): mpre[i - 1] = max(mpre[i - 1], mpre[i]) idx = np.where(mrec[1:] != mrec[:-1])[0] ap = np.sum((mrec[idx + 1] - mrec[idx]) * mpre[idx + 1]) return float(ap) def eval_3d_map_mar( all_dets: Dict[str, List[Tuple[str, np.ndarray, float]]], all_gts: Dict[str, Dict[str, List[np.ndarray]]], class_names: List[str], iou_thr: float ) -> Dict: results = {"iou_thr": iou_thr, "per_class": {}, "mAP": 0.0, "mAR": 0.0} aps, ars = [], [] for cls in class_names: dets = all_dets.get(cls, []) gts_by_frame = all_gts.get(cls, {}) npos = sum(len(v) for v in gts_by_frame.values()) if npos == 0: results["per_class"][cls] = {"AP": None, "AR": None, "nGT": 0, "nDet": len(dets)} continue dets_sorted = sorted(dets, key=lambda x: x[2], reverse=True) tp = np.zeros((len(dets_sorted),), dtype=np.float32) fp = np.zeros((len(dets_sorted),), dtype=np.float32) matched = {fid: np.zeros((len(gts_by_frame[fid]),), dtype=bool) for fid in gts_by_frame.keys()} for i, (fid, box, score) in enumerate(dets_sorted): gt_list = gts_by_frame.get(fid, []) if len(gt_list) == 0: fp[i] = 1.0 continue gt_boxes = np.stack(gt_list, axis=0).astype(np.float32) det_box = box.reshape(1, 7).astype(np.float32) ious = iou3d_matrix_cpu(det_box, gt_boxes).reshape(-1) jmax = int(np.argmax(ious)) if ious[jmax] >= iou_thr and (not matched[fid][jmax]): tp[i] = 1.0 matched[fid][jmax] = True else: fp[i] = 1.0 tp_cum = np.cumsum(tp) fp_cum = np.cumsum(fp) rec = tp_cum / float(npos) prec = tp_cum / np.maximum(tp_cum + fp_cum, 1e-9) ap = compute_ap_from_pr(rec, prec) ar = float(rec[-1]) if rec.size > 0 else 0.0 results["per_class"][cls] = {"AP": ap, "AR": ar, "nGT": int(npos), "nDet": int(len(dets_sorted))} aps.append(ap) ars.append(ar) results["mAP"] = float(np.mean(aps)) if len(aps) > 0 else 0.0 results["mAR"] = float(np.mean(ars)) if len(ars) > 0 else 0.0 return results def parse_pcdet_gt_label(txt_path: str) -> List[Det3D]: dets = [] if not os.path.exists(txt_path): return dets with open(txt_path, "r", encoding="utf-8") as f: lines = [ln.strip() for ln in f.readlines() if ln.strip()] for ln in lines: parts = ln.split() if len(parts) < 8: continue nums = list(map(float, parts[:7])) cls_name = canonical_class(parts[7]) dets.append(Det3D(box7=np.array(nums, dtype=np.float32), cls_name=cls_name, score=1.0)) return dets # ========================= # PCDet load compat # ========================= def _extract_model_state(ckpt_obj): if isinstance(ckpt_obj, dict): if "model_state" in ckpt_obj: return ckpt_obj["model_state"] if "state_dict" in ckpt_obj: return ckpt_obj["state_dict"] return ckpt_obj def _override_pcdet_score_thresh(cfg, thr: float = 0.0): try: if hasattr(cfg.MODEL, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.POST_PROCESSING: cfg.MODEL.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass try: if hasattr(cfg.MODEL, "DENSE_HEAD") and hasattr(cfg.MODEL.DENSE_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.DENSE_HEAD.POST_PROCESSING: cfg.MODEL.DENSE_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass try: if hasattr(cfg.MODEL, "ROI_HEAD") and hasattr(cfg.MODEL.ROI_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.ROI_HEAD.POST_PROCESSING: cfg.MODEL.ROI_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass def _ensure_map_to_bev_proj_bn_registered(model, state_dict, device): proj_w_key = "map_to_bev_module.proj.weight" bn_w_key = "map_to_bev_module.bn.weight" if proj_w_key not in state_dict or bn_w_key not in state_dict: return mtb = getattr(model, "map_to_bev_module", None) if mtb is None: return if isinstance(getattr(mtb, "proj", None), nn.Module) and isinstance(getattr(mtb, "bn", None), nn.Module): return w = state_dict[proj_w_key] out_ch, in_ch, kH, kW = w.shape use_bias = ("map_to_bev_module.proj.bias" in state_dict) padding = (kH // 2, kW // 2) if (kH > 1 or kW > 1) else (0, 0) mtb.proj = nn.Conv2d(in_ch, out_ch, kernel_size=(kH, kW), stride=1, padding=padding, bias=use_bias).to(device) mtb.bn = nn.BatchNorm2d(out_ch, eps=1e-3, momentum=0.01).to(device) print(f"[PCDet][COMPAT] Registered map_to_bev_module.proj/bn: Conv2d({in_ch}->{out_ch}, k={kH}x{kW})") # ========================= # Load models # ========================= def load_yolo_model(weights_path: str): try: from ultralytics import YOLO except Exception: sys.path.insert(0, ULTRALYTICS_REPO) from ultralytics import YOLO return YOLO(weights_path) def load_pcdet_model(cfg_path: str, ckpt_path: str, device: torch.device): sys.path.insert(0, OPENPCDET_REPO) from pcdet.config import cfg, cfg_from_yaml_file from pcdet.datasets import DatasetTemplate from pcdet.models import build_network, load_data_to_gpu from pcdet.utils import common_utils class DemoDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, root_path, ext=".bin", logger=None): super().__init__(dataset_cfg=dataset_cfg, class_names=class_names, training=False, root_path=root_path, logger=logger) self.root_path = Path(root_path) self.ext = ext self.points_dir = self.root_path / "points" self.sample_file_list = sorted(glob.glob(str(self.points_dir / f"*{self.ext}"))) def __len__(self): return len(self.sample_file_list) def __getitem__(self, index): p = Path(self.sample_file_list[index]) points = np.fromfile(str(p), dtype=np.float32).reshape(-1, 4) input_dict = {"points": points, "frame_id": p.stem} data_dict = self.prepare_data(data_dict=input_dict) return data_dict logger = common_utils.create_logger() cfg_from_yaml_file(cfg_path, cfg) _override_pcdet_score_thresh(cfg, thr=0.0) dataset_root = str(Path(PCDET_POINTS_DIR).parent) dataset = DemoDataset(cfg.DATA_CONFIG, cfg.CLASS_NAMES, dataset_root, ext=".bin", logger=logger) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=dataset) model.to(device).eval() ckpt_obj = torch.load(ckpt_path, map_location="cpu") state = _extract_model_state(ckpt_obj) _ensure_map_to_bev_proj_bn_registered(model, state, device) ret = model.load_state_dict(state, strict=False) print(f"[PCDet] load_state_dict done. missing={len(ret.missing_keys)} unexpected={len(ret.unexpected_keys)}") return cfg, dataset, model, load_data_to_gpu # ========================= # Inference # ========================= @torch.no_grad() def infer_yolo(yolo_model, img_bgr: np.ndarray) -> List[Det2D]: img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) results = yolo_model(img_rgb, conf=YOLO_CONF_LOW, iou=0.7, verbose=False) r = results[0] dets: List[Det2D] = [] if r.boxes is None or len(r.boxes) == 0: return dets xyxy = r.boxes.xyxy.detach().cpu().numpy() conf = r.boxes.conf.detach().cpu().numpy() cls = r.boxes.cls.detach().cpu().numpy().astype(int) for b, s, c in zip(xyxy, conf, cls): if float(s) < YOLO_CONF_LOW: continue cls_name = CLASSES[c] if 0 <= c < len(CLASSES) else str(c) dets.append(Det2D(xyxy=b.tolist(), cls_name=canonical_class(cls_name), score=float(s))) dets = sorted(dets, key=lambda d: d.score, reverse=True)[:YOLO_KEEP_TOPK] return dets @torch.no_grad() def infer_pcdet(cfg, dataset, model, load_data_to_gpu_fn, index: int, device: torch.device): data_dict = dataset[index] frame_id = data_dict["frame_id"] bin_path = str(Path(PCDET_POINTS_DIR) / f"{frame_id}.bin") raw_points = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) if os.path.exists(bin_path) else None batch = dataset.collate_batch([data_dict]) if device.type == "cuda": load_data_to_gpu_fn(batch) pred_dicts, _ = model.forward(batch) pred = pred_dicts[0] boxes = pred["pred_boxes"].detach().cpu().numpy().astype(np.float32) scores = pred["pred_scores"].detach().cpu().numpy().astype(np.float32) labels = pred["pred_labels"].detach().cpu().numpy().astype(np.int32) label_base = 1 if labels.size > 0 and labels.min() == 0: label_base = 0 dets3d: List[Det3D] = [] for b, s, lb in zip(boxes, scores, labels): if float(s) < PCDET_CONF_LOW: continue idx_cls = int(lb) - 1 if label_base == 1 else int(lb) cls_name = cfg.CLASS_NAMES[idx_cls] if 0 <= idx_cls < len(cfg.CLASS_NAMES) else str(int(lb)) dets3d.append(Det3D(box7=b.copy(), cls_name=canonical_class(cls_name), score=float(s))) return frame_id, raw_points, dets3d # ========================= # Fusion # ========================= def fuse_frame(yolo_dets: List[Det2D], pcdet_dets: List[Det3D], img_w: int, img_h: int, calib: Dict, T_lidar2cam: np.ndarray) -> List[Det3D]: # 3D->2D projection for PCDet dets if len(pcdet_dets) > 0: boxes7 = np.stack([d.box7 for d in pcdet_dets], axis=0) proj = project_boxes3d_to_2d(boxes7, calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION) for d, p in zip(pcdet_dets, proj): d.proj_xyxy = p _, p2y = associate_yolo_pcdet(yolo_dets, pcdet_dets, img_w, img_h) fused: List[Det3D] = [] for pj, pdet in enumerate(pcdet_dets): if pj in p2y: ydet = yolo_dets[p2y[pj]] if pdet.proj_xyxy is None: continue # match quality q, _ = match_quality_2d(ydet.xyxy, pdet.proj_xyxy, img_w, img_h) fused_cls, fused_score = fuse_with_quality(ydet, pdet, q) if fused_score >= FUSED_KEEP_THRESH: # 决策级融合:默认沿用 PCDet 的 3D box7 box7 = pdet.box7.copy() # 可选:启发式微调 3D 尺寸(默认关闭) if REFINE_3D_BOX_DIMS: box7 = refine_3d_dims_with_2d(box7, ydet.xyxy, pdet.proj_xyxy, q) # fused 的 2D 显示框:默认做一个 yolo/proj 的加权融合,让它“看得出来不是一条线” fused_proj = pdet.proj_xyxy if FUSE_2D_BOX_FOR_VIS: fused_proj = fuse_2d_box_for_vis(ydet.xyxy, pdet.proj_xyxy, q, img_w, img_h) fused.append(Det3D( box7=box7, cls_name=canonical_class(fused_cls), score=float(fused_score), proj_xyxy=fused_proj )) else: # unmatched but confident enough if pdet.score >= PCDET_CONF_HIGH_UNMATCHED: fused.append(Det3D( box7=pdet.box7.copy(), cls_name=pdet.cls_name, score=float(pdet.score), proj_xyxy=pdet.proj_xyxy )) return fused # ========================= # Eval helpers # ========================= def load_models(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"[INFO] device={device}") print("[INFO] Loading YOLO...") yolo_model = load_yolo_model(YOLO_WEIGHTS) print("[INFO] Loading PCDet...") cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_pcdet_model(PCDET_CFG, PCDET_CKPT, device) print(f"[PCDet] CLASS_NAMES = {list(cfg.CLASS_NAMES)}") return device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn # ========================= # Main # ========================= def main(): random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) ensure_dir(DEBUG_DIR) device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_models() bin_files = sorted(glob.glob(os.path.join(PCDET_POINTS_DIR, "*.bin"))) frame_ids_all = [Path(p).stem for p in bin_files] valid_ids = [fid for fid in frame_ids_all if find_image_for_frame(fid) is not None] if not valid_ids: print("[ERROR] No matching (pointcloud, image) pairs found.") return # eval subset if NUM_EVAL_FRAMES is None: eval_ids = valid_ids else: eval_ids = random.sample(valid_ids, k=min(NUM_EVAL_FRAMES, len(valid_ids))) # debug subset debug_ids = random.sample(valid_ids, k=min(NUM_DEBUG_FRAMES, len(valid_ids))) idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # eval storage all_dets_pcd: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_dets_fus: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_gts: Dict[str, Dict[str, List[np.ndarray]]] = {c: {} for c in CLASSES} chosen_T = None # ---------- Pass 1: eval collect ---------- print(f"[EVAL] collecting on {len(eval_ids)} frames ...") for k, fid in enumerate(eval_ids): if fid not in idx_map: continue if (k % PROGRESS_EVERY) == 0: print(f"[EVAL] {k}/{len(eval_ids)} frame={fid}") img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T) # store preds baseline for d in pcdet_dets: c = canonical_class(d.cls_name) if c in all_dets_pcd and d.score >= EVAL_SCORE_THR_PCD: all_dets_pcd[c].append((fid, d.box7.copy(), float(d.score))) # store preds fused for d in fused_dets: c = canonical_class(d.cls_name) if c in all_dets_fus and d.score >= EVAL_SCORE_THR_FUSED: all_dets_fus[c].append((fid, d.box7.copy(), float(d.score))) # store GT gt_path = os.path.join(PCDET_GT_LABEL_DIR, f"{fid}.txt") gt_dets = parse_pcdet_gt_label(gt_path) for gd in gt_dets: c = canonical_class(gd.cls_name) if c in all_gts: all_gts[c].setdefault(fid, []).append(gd.box7.copy()) # ---------- Pass 2: debug visualization ---------- print(f"[DEBUG] saving {len(debug_ids)} visualizations into {DEBUG_DIR}/ ...") for k, fid in enumerate(debug_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T) y_top = top1_yolo(yolo_dets) p_top = top1_3d(pcdet_dets) f_top = top1_3d(fused_dets) out_path = os.path.join(DEBUG_DIR, f"{fid}.jpg") make_debug_vis(fid, img, y_top, p_top, f_top, raw_points, out_path) if (k % 5) == 0: print(f"[DEBUG] {k}/{len(debug_ids)} saved {out_path}") # ---------- Print metrics ---------- print("\n================= 3D Evaluation =================") print(f"[EvalFrames] {len(eval_ids)} | PCD_thr={EVAL_SCORE_THR_PCD} | FUSED_thr={EVAL_SCORE_THR_FUSED}") for thr in EVAL_IOU_THRESHOLDS: res_p = eval_3d_map_mar(all_dets_pcd, all_gts, CLASSES, iou_thr=thr) res_f = eval_3d_map_mar(all_dets_fus, all_gts, CLASSES, iou_thr=thr) print(f"\n[IOU={thr:.2f}] PCDet baseline: mAP={res_p['mAP']:.4f}, mAR={res_p['mAR']:.4f}") for c in CLASSES: pc = res_p["per_class"].get(c, {}) if pc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={pc.get('nDet', 0)}") else: print(f" - {c:14s} AP={pc['AP']:.4f} AR={pc['AR']:.4f} nGT={pc['nGT']} nDet={pc['nDet']}") print(f"[IOU={thr:.2f}] FUSED: mAP={res_f['mAP']:.4f}, mAR={res_f['mAR']:.4f}") for c in CLASSES: fc = res_f["per_class"].get(c, {}) if fc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={fc.get('nDet', 0)}") else: print(f" - {c:14s} AP={fc['AP']:.4f} AR={fc['AR']:.4f} nGT={fc['nGT']} nDet={fc['nDet']}") print(f"\n[INFO] Done. Debug images saved in: {DEBUG_DIR}/") if __name__ == "__main__": main()
VIS_FRONT_WIDTH_RATIO_TO_IMG = 1.2 或 1.5(默认 1.0)VIS_SHOW_PCDET_WHEN_FUSED = TrueFUSE_2D_BOX_FOR_VIS = TrueREFINE_3D_BOX_DIMS = True如果你跑完之后发现 FRONT 的点太密/太亮 或 太暗,主要调这两个就够了:
FRONT_DENSITY_GAMMA(小一点更亮)和 FRONT_DILATE_KSIZE(大一点点更粗)。
我的意见是根据下面的要求去更改我之前给你提供的代码:右侧 FRONT 面板更宽(可调比例),并且用密度渲染 + 膨胀让点更清楚
FRONT 使用“有限窗口裁剪”(clamp),避免框异常导致视图被拉大
融合时引入 match quality q(0~1)(由 IoU/距离/contain/edge/center-in 组合),让融合得分随匹配质量连续变化
融合框做 refine_box_by_points_robust()(满足 q/点数等条件才做),使融合框大小/中心可能变化,但是也要在一定范围内
增加 eval 进度打印,避免你以为“没反应”,也就是1) 右侧 FRONT 视图看不清 -> 解决:
运行:python fusion.py
输出:
我的意见是根据下面的要求去更改我之前给你提供的代码:右侧 FRONT 面板更宽(可调比例),并且用密度渲染 + 膨胀让点更清楚
FRONT 使用“有限窗口裁剪”(clamp),避免框异常导致视图被拉大
融合时引入 match quality q(0~1)(由 IoU/距离/contain/edge/center-in 组合),让融合得分随匹配质量连续变化
融合框做 refine_box_by_points_robust()(满足 q/点数等条件才做),使融合框大小/中心可能变化,但是也要在一定范围内
增加 eval 进度打印,避免你以为“没反应”,也就是1) 右侧 FRONT 视图看不清 -> 解决:
运行:python fusion.py
输出:
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Fusion + Visualization + 3D Eval (NO intrusion logic) 按你的要求对你原始代码做的改动点(核心都已落实): 1) 右侧 FRONT 视图看不清 -> 解决: - FRONT 面板更宽(可调比例 FRONT_PANEL_WIDTH_RATIO) - 点云改为“密度渲染 + 膨胀”,更清晰(FRONT_DENSITY_GAMMA / FRONT_DILATE_KSIZE) - FRONT 裁剪范围使用 clamp:有限窗口,避免异常框导致视图被拉得过大(FRONT_WINDOW_*) 2) 融合框为什么和点云框一样? - Late fusion 默认只能沿用 LiDAR 的 3D box(YOLO无深度尺寸) - 本代码新增:refine_box_by_points_robust()(满足 q/点数等条件才做),使融合框几何(中心/尺寸)可能变化,但变化幅度被严格限制 3) 不同重叠度/中心距对融合置信度影响连续变化: - 引入 match quality q∈[0,1](IoU/center dist/contain/edge/center-in 组合) - s_eff = s * (Q_GATE_A + Q_GATE_B*q) - fused_score = DS(s_eff_img, s_eff_lid) * ((1-g)+g*q) 4) eval 增加进度打印,避免你以为“没反应”(每 PROGRESS_EVERY 帧打印一次,并带 det 数量) 运行: python fusion.py 输出: - debug_vis/*.jpg - 终端打印 eval 指标 """ import os import sys import glob import math import random from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Tuple, Optional import numpy as np import cv2 import torch import torch.nn as nn # ========================= # Paths (按你环境) # ========================= IMG_DIR = "/media/jd/4997BB1603CFE2C4/lw/aug2/img" PCDET_POINTS_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/points" PCDET_GT_LABEL_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/labels" ULTRALYTICS_REPO = "/media/jd/4997BB1603CFE2C4/lw/ultralytic" YOLO_WEIGHTS = "/media/jd/4997BB1603CFE2C4/lw/ultralytic/runs1/1209yolo11_m+SM+d1/weights/best.pt" OPENPCDET_REPO = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet" PCDET_CFG = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/tools/cfgs/custom_models/train.yaml" PCDET_CKPT = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/output/cfgs/custom_models/train/default/ckpt/checkpoint_epoch_80.pth" DEBUG_DIR = "debug_vis" # ========================= # Classes # ========================= CLASSES = ['Drone', 'Plastic_sheet', 'Kite', 'Balloon', 'Bird'] # ========================= # Calibration # ========================= CALIB = { "extrinsic_matrix": np.array([ [0.00871822977022152, -0.9990101808868351, -0.043619387365335945, -0.04000000000000002], [-0.0003806461322609286, 0.043617726472722454, -0.9990482215818578, 0.25], [0.9999619230641715, 0.008726535498373544, -1.3877787807814457e-17, -0.04000000000000002], [0.0, 0.0, 0.0, 1.0] ], dtype=np.float64), "fx": 3605.0801593073, "fy": 3604.9573399128, "cx": 951.9363889574, "cy": 816.9773743048, "width": 1920, "height": 1536, "dist": np.array([ 2.4515361243, -46.8007861419, -0.0002973913, -0.0008024507, -144.3698857610, 2.6420544359, -46.0443623397, -158.1742719597 ], dtype=np.float64) } USE_DISTORTION = True EXTRINSIC_MODE = "auto" # auto / lidar2cam / cam2lidar # ========================= # Colors (BGR) # ========================= COL_YOLO = (0, 255, 0) COL_PCDET = (255, 0, 0) COL_FUSED = (0, 0, 255) # ========================= # Thresholds # ========================= YOLO_CONF_LOW = 0.05 PCDET_CONF_LOW = 0.01 YOLO_KEEP_TOPK = 1 PCDET_CONF_HIGH_UNMATCHED = 0.10 FUSED_KEEP_THRESH = 0.10 # matching MATCH_IOU_THR = 0.05 MATCH_CENTER_DIST_THR_PX = 160.0 PROJ_BOX_EXPAND_RATIO = 0.18 COST_ALPHA = 0.7 # ========================= # Overlap visualization / scoring (稳定版) # ========================= OV_IOU_THR = 0.20 OV_CENTER_DIST_PX = 40.0 OV_CONTAINMENT_THR = 0.60 # inter / min(area) OV_EDGE_GAP_PX = 6.0 # 两框边缘最近距离 <= 这个也算重叠/贴边 OV_MIN_SCORE_FOR_STYLE = 0.55 # overlap_score>=此值 -> 虚线+半透明 # ========================= # Match quality q 的参考尺度 # ========================= Q_IOU_REF = 0.30 Q_DIST_REF_PX = 80.0 Q_EDGE_REF_PX = 8.0 # ========================= # Fusion: q -> affects score # 你指定的形式: # s_eff = s * (a + b*q) # fused_score = DS(s_eff_img, s_eff_lid) * ((1-g)+g*q) # ========================= Q_GATE_A = 0.35 Q_GATE_B = 0.65 FUSED_Q_GATING_STRENGTH = 0.60 # ========================= # Debug / Eval # ========================= RANDOM_SEED = 42 NUM_DEBUG_FRAMES = 20 NUM_EVAL_FRAMES = 800 # None=全量(慢),建议先用 300~1000 验证流程 EVAL_IOU_THRESHOLDS = [0.7] EVAL_SCORE_THR_PCD = 0.10 EVAL_SCORE_THR_FUSED = 0.10 PROGRESS_EVERY = 20 # ========================= # BEV # ========================= BEV_X_RANGE = (0.0, 80.0) BEV_Y_RANGE = (-30.0, 30.0) BEV_RESOLUTION = 0.10 BEV_DENSITY_GAMMA = 0.55 BEV_DILATE_KSIZE = 3 # ========================= # FRONT (X-Z) “抠出来”视图 # ========================= FRONT_RESOLUTION = 0.03 FRONT_MIN_WPX = 220 FRONT_MIN_HPX = 320 # 从 3D box 裁剪点云的扩张 FRONT_CROP_EXPAND_XY = 1.0 # meters FRONT_CROP_EXPAND_Z = 0.8 # meters # 密度渲染参数 FRONT_DENSITY_GAMMA = 0.55 FRONT_DILATE_KSIZE = 3 # “有限窗口裁剪 clamp”:避免框异常导致视图被拉很大(单位:m) FRONT_WINDOW_MIN_X = 10.0 FRONT_WINDOW_MAX_X = 32.0 FRONT_WINDOW_MIN_Z = 10.0 FRONT_WINDOW_MAX_Z = 26.0 FRONT_WINDOW_MARGIN_X = 2.0 FRONT_WINDOW_MARGIN_Z = 2.0 # render 保护:crop 点数过多就抽样,否则 bincount 也能做但会更慢 FRONT_MAX_RENDER_POINTS = 200000 # ========================= # 可视化面板比例(相对于原图宽 W) # 你的要求:右侧 FRONT 更宽(可调) # ========================= BEV_PANEL_WIDTH_RATIO = 0.45 FRONT_PANEL_WIDTH_RATIO = 0.95 # <- 调这个:0.8/1.0/1.2 都行 # ========================= # FUSED 3D box 通过点云鲁棒精炼(按要求新增) # ========================= REFINE_ENABLE = True REFINE_MIN_Q = 0.55 REFINE_MIN_POINTS = 25 REFINE_GOOD_POINTS = 120 REFINE_EXPAND_XY = 0.30 REFINE_EXPAND_Z = 0.30 REFINE_ABS_QUANTILE_XY = 0.90 REFINE_ABS_QUANTILE_Z = 0.90 REFINE_PAD_XY = 0.10 REFINE_PAD_Z = 0.12 # 限制变化范围(相对原 box 的比例) REFINE_DIM_SCALE_MIN = 0.75 REFINE_DIM_SCALE_MAX = 1.25 # 限制中心偏移(绝对 m) REFINE_CENTER_MAX_SHIFT_XY = 1.20 REFINE_CENTER_MAX_SHIFT_Z = 1.00 # 精炼强度上限(再乘以 q/点数强度) REFINE_ALPHA_MAX = 0.85 # ========================= # Sensor weights table (base for DS) # ========================= PCDET_AP07 = { "Drone": 91.6, "Plastic_sheet": 55.52, "Kite": 40.61, "Balloon": 99.96, "Bird": 73.37 } # ========================= # Data structures # ========================= @dataclass class Det2D: xyxy: List[float] cls_name: str score: float @dataclass class Det3D: box7: np.ndarray cls_name: str score: float proj_xyxy: Optional[List[float]] = None # ========================= # Utils # ========================= def ensure_dir(p: str): os.makedirs(p, exist_ok=True) def canonical_class(name: str) -> str: if name is None: return name n = name.strip() n_low = n.lower().replace("-", "_") mapping = { "drone": "Drone", "kite": "Kite", "balloon": "Balloon", "bird": "Bird", "plastic_sheet": "Plastic_sheet", "plastic": "Plastic_sheet", "plasticsheet": "Plastic_sheet", } return mapping.get(n_low, n) def find_image_for_frame(frame_id: str) -> Optional[str]: for ext in [".jpg", ".png", ".jpeg", ".bmp"]: p = os.path.join(IMG_DIR, frame_id + ext) if os.path.exists(p): return p g = glob.glob(os.path.join(IMG_DIR, frame_id + ".*")) return g[0] if g else None def clip_box_xyxy(box, w, h): x1, y1, x2, y2 = box x1 = max(0, min(w - 1, x1)) y1 = max(0, min(h - 1, y1)) x2 = max(0, min(w - 1, x2)) y2 = max(0, min(h - 1, y2)) if x2 < x1: x1, x2 = x2, x1 if y2 < y1: y1, y2 = y2, y1 return [float(x1), float(y1), float(x2), float(y2)] def expand_box_xyxy(box, ratio=0.1): x1, y1, x2, y2 = box cx = (x1 + x2) / 2.0 cy = (y1 + y2) / 2.0 w = max(1.0, (x2 - x1)) h = max(1.0, (y2 - y1)) w2 = w * (1.0 + ratio) h2 = h * (1.0 + ratio) return [cx - w2/2, cy - h2/2, cx + w2/2, cy + h2/2] def box_iou2d(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) iw = max(0.0, ix2 - ix1) ih = max(0.0, iy2 - iy1) inter = iw * ih area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1) area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1) return float(inter / (area_a + area_b - inter + 1e-9)) def box_intersection_area(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) iw = max(0.0, ix2 - ix1) ih = max(0.0, iy2 - iy1) return float(iw * ih) def box_area(a) -> float: return float(max(0.0, a[2]-a[0]) * max(0.0, a[3]-a[1])) def center_of_box(box): x1, y1, x2, y2 = box return (0.5*(x1+x2), 0.5*(y1+y2)) def center_dist_px(a, b) -> float: ac = center_of_box(a) bc = center_of_box(b) return float(math.hypot(ac[0]-bc[0], ac[1]-bc[1])) def point_in_box(pt, box): x, y = pt x1, y1, x2, y2 = box return (x >= x1) and (x <= x2) and (y >= y1) and (y <= y2) def rect_edge_gap(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b if ax2 < bx1: gx = bx1 - ax2 elif bx2 < ax1: gx = ax1 - bx2 else: gx = 0.0 if ay2 < by1: gy = by1 - ay2 elif by2 < ay1: gy = ay1 - by2 else: gy = 0.0 return float(math.hypot(gx, gy)) # ========================= # Stable overlap + match quality score # ========================= def overlap_and_score_2d(a, b, diag: float) -> Tuple[bool, float, Dict]: """ 返回: is_overlap (bool) overlap_score (0~1) info dict (iou, dist, containment, gap, center_in) """ iou = box_iou2d(a, b) dist = center_dist_px(a, b) gap = rect_edge_gap(a, b) inter = box_intersection_area(a, b) area_a = box_area(a) area_b = box_area(b) min_area = max(1e-9, min(area_a, area_b)) containment = float(np.clip(inter / min_area, 0.0, 1.0)) ac = center_of_box(a) bc = center_of_box(b) center_in = (point_in_box(ac, b) or point_in_box(bc, a)) cond_iou = (iou >= OV_IOU_THR) cond_contain = (containment >= OV_CONTAINMENT_THR) cond_center_in = center_in cond_edge = (gap <= OV_EDGE_GAP_PX) cond_dist = (dist <= OV_CENTER_DIST_PX) is_overlap = cond_iou or cond_contain or cond_center_in or (cond_edge and cond_dist) iou_term = min(iou / Q_IOU_REF, 1.0) dist_term = max(0.0, 1.0 - dist / max(1e-6, Q_DIST_REF_PX)) gap_term = max(0.0, 1.0 - gap / max(1e-6, Q_EDGE_REF_PX)) contain_term = containment center_bonus = 0.15 if center_in else 0.0 score = 0.45*iou_term + 0.25*dist_term + 0.20*contain_term + 0.10*gap_term + center_bonus score = float(np.clip(score, 0.0, 1.0)) info = {"iou": iou, "dist": dist, "gap": gap, "containment": containment, "center_in": center_in} return is_overlap, score, info def match_quality_2d(yolo_box, proj_box, img_w, img_h) -> Tuple[float, Dict]: """ q: 0~1,越大表示 YOLO 与 PCDet(投影)对齐越好 """ diag = math.hypot(img_w, img_h) + 1e-9 _, score, info = overlap_and_score_2d(yolo_box, proj_box, diag) q = float(np.clip(0.15 + 0.85 * score, 0.0, 1.0)) return q, info # ========================= # Drawing helpers # ========================= def draw_text_box(img, x, y, text, color, font_scale=0.6, thickness=2): (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness) x = int(np.clip(x, 0, img.shape[1]-1)) y = int(np.clip(y, 0, img.shape[0]-1)) x2 = int(np.clip(x + tw + 6, 0, img.shape[1]-1)) y2 = int(np.clip(y + th + 6, 0, img.shape[0]-1)) cv2.rectangle(img, (x, y), (x2, y2), (0, 0, 0), -1) cv2.putText(img, text, (x+3, y+th+3), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness, cv2.LINE_AA) def draw_transparent_rect(img, box, color, alpha=0.22): x1, y1, x2, y2 = [int(round(v)) for v in box] overlay = img.copy() cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1) cv2.addWeighted(overlay, alpha, img, 1.0 - alpha, 0, img) def draw_dashed_line(img, p1, p2, color, thickness=2, dash_len=14, gap_len=8): x1, y1 = p1 x2, y2 = p2 dx = x2 - x1 dy = y2 - y1 length = math.hypot(dx, dy) if length < 1e-6: return vx = dx / length vy = dy / length dist = 0.0 while dist < length: seg_start = dist seg_end = min(dist + dash_len, length) sx = int(round(x1 + vx * seg_start)) sy = int(round(y1 + vy * seg_start)) ex = int(round(x1 + vx * seg_end)) ey = int(round(y1 + vy * seg_end)) cv2.line(img, (sx, sy), (ex, ey), color, thickness, cv2.LINE_AA) dist += dash_len + gap_len def draw_dashed_rect(img, box, color, thickness=2): x1, y1, x2, y2 = [int(round(v)) for v in box] draw_dashed_line(img, (x1, y1), (x2, y1), color, thickness) draw_dashed_line(img, (x2, y1), (x2, y2), color, thickness) draw_dashed_line(img, (x2, y2), (x1, y2), color, thickness) draw_dashed_line(img, (x1, y2), (x1, y1), color, thickness) def draw_box_smart(img, box, color, overlap_score: float, solid_thickness=2): if overlap_score >= OV_MIN_SCORE_FOR_STYLE: draw_transparent_rect(img, box, color, alpha=0.22) draw_dashed_rect(img, box, color, thickness=max(2, solid_thickness)) else: x1, y1, x2, y2 = [int(round(v)) for v in box] cv2.rectangle(img, (x1, y1), (x2, y2), color, solid_thickness) def draw_legend(img, x=12, y=12): draw_text_box(img, x, y, "Legend:", (255,255,255), 0.65, 2) y += 30 cv2.rectangle(img, (x, y+6), (x+18, y+24), COL_YOLO, -1) draw_text_box(img, x+26, y, "YOLO (2D)", (255,255,255), 0.6, 2) y += 28 cv2.rectangle(img, (x, y+6), (x+18, y+24), COL_PCDET, -1) draw_text_box(img, x+26, y, "PCDet-proj (3D->2D)", (255,255,255), 0.6, 2) y += 28 cv2.rectangle(img, (x, y+6), (x+18, y+24), COL_FUSED, -1) draw_text_box(img, x+26, y, "FUSED", (255,255,255), 0.6, 2) y += 30 draw_text_box(img, x, y, "Overlap: IoU/dist/contain/edge/center-in", (255,255,255), 0.50, 2) # ========================= # Density raster + fit_to_rect # ========================= def rasterize_density(rr: np.ndarray, cc: np.ndarray, H: int, W: int, gamma: float = 0.55, dilate_ksize: int = 3) -> np.ndarray: """ rr/cc: int32 arrays in [0,H-1]/[0,W-1] 输出 BGR 密度图(log + gamma + optional dilate) """ rr = rr.astype(np.int64) cc = cc.astype(np.int64) idx = rr * W + cc counts = np.bincount(idx, minlength=H * W).reshape(H, W).astype(np.float32) if counts.max() > 0: dens = np.log1p(counts) dens = dens / (dens.max() + 1e-6) dens = np.power(dens, gamma) gray = (dens * 255.0).astype(np.uint8) else: gray = np.zeros((H, W), dtype=np.uint8) if dilate_ksize is not None and dilate_ksize >= 2: k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_ksize, dilate_ksize)) gray = cv2.dilate(gray, k, iterations=1) return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) def resize_keep_aspect(img: np.ndarray, target_w: Optional[int] = None, target_h: Optional[int] = None, interp=cv2.INTER_AREA) -> np.ndarray: h, w = img.shape[:2] if target_w is None and target_h is None: return img if target_w is not None and target_h is not None: scale = min(target_w / max(1, w), target_h / max(1, h)) elif target_w is not None: scale = target_w / max(1, w) else: scale = target_h / max(1, h) nw = max(1, int(round(w * scale))) nh = max(1, int(round(h * scale))) if nw == w and nh == h: return img return cv2.resize(img, (nw, nh), interpolation=interp) def fit_to_rect(img: np.ndarray, target_w: int, target_h: int, pad_color=(0, 0, 0), interp=cv2.INTER_AREA) -> np.ndarray: resized = resize_keep_aspect(img, target_w=target_w, target_h=target_h, interp=interp) h, w = resized.shape[:2] out = np.zeros((target_h, target_w, 3), dtype=np.uint8) out[:] = pad_color y0 = (target_h - h) // 2 x0 = (target_w - w) // 2 out[y0:y0 + h, x0:x0 + w] = resized return out # ========================= # DS fusion with dynamic match-quality # ========================= def get_sensor_weights(cls_name: str) -> Tuple[float, float]: c = canonical_class(cls_name) ap = PCDET_AP07.get(c, 70.0) / 100.0 w_lidar = float(np.clip(ap, 0.15, 0.95)) w_img = float(np.clip(1.0 - w_lidar, 0.05, 0.85)) s = w_img + w_lidar return w_img / s, w_lidar / s def ds_fuse_singleton(cls_a: str, s_a: float, w_a: float, cls_b: str, s_b: float, w_b: float) -> Tuple[str, float]: cls_a = canonical_class(cls_a) cls_b = canonical_class(cls_b) m_a = float(np.clip(w_a * s_a, 0.0, 0.999999)) m_b = float(np.clip(w_b * s_b, 0.0, 0.999999)) th_a = 1.0 - m_a th_b = 1.0 - m_b K = (m_a * m_b) if (cls_a != cls_b) else 0.0 denom = 1.0 - K + 1e-9 if cls_a == cls_b: m = (m_a*m_b + m_a*th_b + th_a*m_b) / denom return cls_a, float(m) else: ma = (m_a * th_b) / denom mb = (th_a * m_b) / denom return (cls_a, float(ma)) if ma >= mb else (cls_b, float(mb)) def fuse_with_quality(ydet: Det2D, pdet: Det3D, q: float) -> Tuple[str, float]: """ s_eff = s * (a + b*q) fused_score = DS(s_eff_img, s_eff_lid) * ((1-g)+g*q) """ w_img, w_lidar = get_sensor_weights(pdet.cls_name) gate = float(np.clip(Q_GATE_A + Q_GATE_B * q, 0.0, 1.0)) s_img = float(np.clip(ydet.score * gate, 0.0, 1.0)) s_lid = float(np.clip(pdet.score * gate, 0.0, 1.0)) fused_cls, fused_score = ds_fuse_singleton( ydet.cls_name, s_img, w_img, pdet.cls_name, s_lid, w_lidar ) fused_score = float(np.clip( fused_score * ((1.0 - FUSED_Q_GATING_STRENGTH) + FUSED_Q_GATING_STRENGTH * q), 0.0, 1.0 )) return fused_cls, fused_score # ========================= # 3D geometry & projection # ========================= def boxes3d_to_corners(boxes7: np.ndarray) -> np.ndarray: N = boxes7.shape[0] corners = np.zeros((N, 8, 3), dtype=np.float32) for i in range(N): x, y, z, dx, dy, dz, yaw = boxes7[i].tolist() local = np.array([ [ dx/2, dy/2, dz/2], [ dx/2, -dy/2, dz/2], [-dx/2, -dy/2, dz/2], [-dx/2, dy/2, dz/2], [ dx/2, dy/2, -dz/2], [ dx/2, -dy/2, -dz/2], [-dx/2, -dy/2, -dz/2], [-dx/2, dy/2, -dz/2], ], dtype=np.float32) cy = math.cos(yaw); sy = math.sin(yaw) R = np.array([[cy, -sy, 0.0], [sy, cy, 0.0], [0.0, 0.0, 1.0]], dtype=np.float32) corners[i] = (local @ R.T) + np.array([x, y, z], dtype=np.float32) return corners def project_points_lidar_to_img(pts_lidar: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, use_distortion=True): fx, fy, cx, cy = calib["fx"], calib["fy"], calib["cx"], calib["cy"] K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float64) dist = calib["dist"].astype(np.float64) if use_distortion else None pts_h = np.concatenate([pts_lidar.astype(np.float64), np.ones((pts_lidar.shape[0], 1), dtype=np.float64)], axis=1) pts_cam = (T_lidar2cam @ pts_h.T).T[:, :3] valid = pts_cam[:, 2] > 1e-6 rvec = np.zeros((3, 1), dtype=np.float64) tvec = np.zeros((3, 1), dtype=np.float64) img_pts, _ = cv2.projectPoints(pts_cam, rvec, tvec, K, dist) return img_pts.reshape(-1, 2).astype(np.float32), valid def get_extrinsic_matrix(calib: Dict, pts_lidar_xyz: np.ndarray, img_w: int, img_h: int) -> np.ndarray: T = calib["extrinsic_matrix"].copy() if EXTRINSIC_MODE == "lidar2cam": return T if EXTRINSIC_MODE == "cam2lidar": return np.linalg.inv(T) if pts_lidar_xyz is None or pts_lidar_xyz.shape[0] < 100: return T pts = pts_lidar_xyz if pts.shape[0] > 8000: pts = pts[np.random.choice(pts.shape[0], 8000, replace=False)] def score_for(Tuse): img_pts, valid = project_points_lidar_to_img(pts, calib, Tuse, img_w, img_h, use_distortion=USE_DISTORTION) img_pts = img_pts[valid] if img_pts.shape[0] == 0: return 0.0 inside = (img_pts[:, 0] >= 0) & (img_pts[:, 0] < img_w) & (img_pts[:, 1] >= 0) & (img_pts[:, 1] < img_h) return float(inside.mean()) s1 = score_for(T) s2 = score_for(np.linalg.inv(T)) chosen = T if s1 >= s2 else np.linalg.inv(T) print(f"[CALIB] auto | lidar2cam_inlier={s1:.3f} cam2lidar_inlier={s2:.3f} -> choose {'lidar2cam' if s1>=s2 else 'cam2lidar'}") return chosen def project_boxes3d_to_2d(boxes7: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, use_distortion=True): if boxes7.shape[0] == 0: return [] corners = boxes3d_to_corners(boxes7) out = [] for i in range(corners.shape[0]): img_pts, valid = project_points_lidar_to_img(corners[i], calib, T_lidar2cam, img_w, img_h, use_distortion) if valid.sum() < 4: out.append(None) continue xs = img_pts[valid, 0]; ys = img_pts[valid, 1] box = clip_box_xyxy([float(xs.min()), float(ys.min()), float(xs.max()), float(ys.max())], img_w, img_h) if (box[2]-box[0]) < 2 or (box[3]-box[1]) < 2: out.append(None) else: out.append(box) return out # ========================= # Matching (Hungarian) # ========================= def hungarian_match(cost: np.ndarray) -> List[Tuple[int, int]]: try: from scipy.optimize import linear_sum_assignment r, c = linear_sum_assignment(cost) return list(zip(r.tolist(), c.tolist())) except Exception: matches = [] used_r, used_c = set(), set() idxs = np.dstack(np.unravel_index(np.argsort(cost.ravel()), cost.shape))[0] for i, j in idxs: if i in used_r or j in used_c: continue used_r.add(int(i)); used_c.add(int(j)) matches.append((int(i), int(j))) return matches def associate_yolo_pcdet(yolo: List[Det2D], pcdet: List[Det3D], img_w: int, img_h: int): N, M = len(yolo), len(pcdet) if N == 0 or M == 0: return {}, {} diag = math.hypot(img_w, img_h) + 1e-9 cost = np.ones((N, M), dtype=np.float32) * 10.0 for i in range(N): a = yolo[i].xyxy a_c = center_of_box(a) for j in range(M): b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) b_c = center_of_box(b) iou = box_iou2d(a, b) cd = math.hypot(a_c[0]-b_c[0], a_c[1]-b_c[1]) / diag inside = point_in_box(a_c, b) or point_in_box(b_c, a) bonus = -0.15 if inside else 0.0 cost[i, j] = COST_ALPHA*(1.0 - iou) + (1.0 - COST_ALPHA)*cd + bonus pairs = hungarian_match(cost) y2p, p2y = {}, {} for i, j in pairs: b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) iou = box_iou2d(yolo[i].xyxy, b) cd = center_dist_px(yolo[i].xyxy, b) y_c = center_of_box(yolo[i].xyxy) b_c = center_of_box(b) inside = point_in_box(y_c, b) or point_in_box(b_c, yolo[i].xyxy) if (iou >= MATCH_IOU_THR) or (cd <= MATCH_CENTER_DIST_THR_PX) or inside: y2p[i] = j p2y[j] = i return y2p, p2y # ========================= # OBB & refine helpers # ========================= def points_in_obb(points_xyz: np.ndarray, box7: np.ndarray, expand_xy: float, expand_z: float) -> np.ndarray: """ points_xyz: (N,3) box7: x,y,z,dx,dy,dz,yaw (yaw about z) 返回 mask """ cx, cy, cz, dx, dy, dz, yaw = box7.tolist() px = points_xyz[:, 0] - cx py = points_xyz[:, 1] - cy pz = points_xyz[:, 2] - cz c = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz hx = dx / 2.0 + expand_xy hy = dy / 2.0 + expand_xy hz = dz / 2.0 + expand_z mask = (np.abs(lx) <= hx) & (np.abs(ly) <= hy) & (np.abs(lz) <= hz) return mask def refine_box_by_points_robust(raw_points: Optional[np.ndarray], box7: np.ndarray, q: float) -> np.ndarray: """ 满足 q/点数条件才做鲁棒精炼: - 取 box 内(含小扩张)点云 - local 坐标下用 median 做中心偏移,用 abs-quantile 做尺寸 - 然后对 center shift / dims scale 做 clamp,并用 alpha(q, n) 连续混合 """ if (not REFINE_ENABLE) or raw_points is None or raw_points.shape[0] == 0: return box7 if q < REFINE_MIN_Q: return box7 pts_xyz = raw_points[:, :3] mask = points_in_obb(pts_xyz, box7, REFINE_EXPAND_XY, REFINE_EXPAND_Z) pts = pts_xyz[mask] n = int(pts.shape[0]) if n < REFINE_MIN_POINTS: return box7 cx, cy, cz, dx, dy, dz, yaw = box7.tolist() # to local px = pts[:, 0] - cx py = pts[:, 1] - cy pz = pts[:, 2] - cz c = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz # robust center offset mx = float(np.median(lx)) my = float(np.median(ly)) mz = float(np.median(lz)) # robust half sizes ax = np.abs(lx - mx) ay = np.abs(ly - my) az = np.abs(lz - mz) hx = float(np.quantile(ax, REFINE_ABS_QUANTILE_XY) + REFINE_PAD_XY) hy = float(np.quantile(ay, REFINE_ABS_QUANTILE_XY) + REFINE_PAD_XY) hz = float(np.quantile(az, REFINE_ABS_QUANTILE_Z) + REFINE_PAD_Z) new_dx_raw = max(0.05, 2.0 * hx) new_dy_raw = max(0.05, 2.0 * hy) new_dz_raw = max(0.05, 2.0 * hz) # clamp dims relative to original dx0 = max(0.05, float(dx)) dy0 = max(0.05, float(dy)) dz0 = max(0.05, float(dz)) new_dx = float(np.clip(new_dx_raw, dx0 * REFINE_DIM_SCALE_MIN, dx0 * REFINE_DIM_SCALE_MAX)) new_dy = float(np.clip(new_dy_raw, dy0 * REFINE_DIM_SCALE_MIN, dy0 * REFINE_DIM_SCALE_MAX)) new_dz = float(np.clip(new_dz_raw, dz0 * REFINE_DIM_SCALE_MIN, dz0 * REFINE_DIM_SCALE_MAX)) # clamp local center shift mx = float(np.clip(mx, -REFINE_CENTER_MAX_SHIFT_XY, REFINE_CENTER_MAX_SHIFT_XY)) my = float(np.clip(my, -REFINE_CENTER_MAX_SHIFT_XY, REFINE_CENTER_MAX_SHIFT_XY)) mz = float(np.clip(mz, -REFINE_CENTER_MAX_SHIFT_Z, REFINE_CENTER_MAX_SHIFT_Z)) # alpha based on q and point count (continuous) q_strength = (q - REFINE_MIN_Q) / max(1e-6, (1.0 - REFINE_MIN_Q)) n_strength = (n - REFINE_MIN_POINTS) / max(1.0, float(REFINE_GOOD_POINTS - REFINE_MIN_POINTS)) q_strength = float(np.clip(q_strength, 0.0, 1.0)) n_strength = float(np.clip(n_strength, 0.0, 1.0)) alpha = REFINE_ALPHA_MAX * (q_strength * n_strength) alpha = float(np.clip(alpha, 0.0, 1.0)) if alpha <= 1e-4: return box7 # apply center shift in world cyaw = math.cos(yaw) syaw = math.sin(yaw) # world shift for (mx,my) using yaw dxw = (cyaw * mx - syaw * my) * alpha dyw = (syaw * mx + cyaw * my) * alpha dzw = mz * alpha cx2 = cx + dxw cy2 = cy + dyw cz2 = cz + dzw # blend dims dx2 = dx0 + alpha * (new_dx - dx0) dy2 = dy0 + alpha * (new_dy - dy0) dz2 = dz0 + alpha * (new_dz - dz0) return np.array([cx2, cy2, cz2, dx2, dy2, dz2, yaw], dtype=np.float32) # ========================= # FRONT “抠出来”视图(密度渲染 + clamp) # ========================= def make_front_xz_crop(points: Optional[np.ndarray], det_ref: Optional[Det3D], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray: """ 右侧正视图:只显示 det_ref 盒子附近(盒内+扩张)的点云 X 横轴, Z 纵轴 """ canvas = np.zeros((FRONT_MIN_HPX, FRONT_MIN_WPX, 3), dtype=np.uint8) draw_text_box(canvas, 6, 10, "FRONT (X-Z) CROP", (220,220,220), 0.6, 2) if det_ref is None or points is None or points.shape[0] == 0: draw_text_box(canvas, 6, 40, "no det_ref / no points", (200,200,200), 0.55, 2) return canvas pts_xyz = points[:, :3] mask = points_in_obb(pts_xyz, det_ref.box7, FRONT_CROP_EXPAND_XY, FRONT_CROP_EXPAND_Z) crop = pts_xyz[mask] if crop.shape[0] == 0: draw_text_box(canvas, 6, 40, "no points in crop", (200,200,200), 0.55, 2) return canvas if crop.shape[0] > FRONT_MAX_RENDER_POINTS: idx = np.random.choice(crop.shape[0], FRONT_MAX_RENDER_POINTS, replace=False) crop = crop[idx] x, y, z, dx, dy, dz, yaw = det_ref.box7.tolist() # 先按 box+expand+margin 得到想要窗口,再做 clamp 限制最大窗口 want_x_span = (dx + 2.0 * (FRONT_CROP_EXPAND_XY + FRONT_WINDOW_MARGIN_X)) want_z_span = (dz + 2.0 * (FRONT_CROP_EXPAND_Z + FRONT_WINDOW_MARGIN_Z)) x_span = float(np.clip(want_x_span, FRONT_WINDOW_MIN_X, FRONT_WINDOW_MAX_X)) z_span = float(np.clip(want_z_span, FRONT_WINDOW_MIN_Z, FRONT_WINDOW_MAX_Z)) x_min = x - x_span / 2.0 x_max = x + x_span / 2.0 z_min = z - z_span / 2.0 z_max = z + z_span / 2.0 # 分辨率 -> 画布大小 res = FRONT_RESOLUTION W = max(FRONT_MIN_WPX, int(math.ceil((x_max - x_min) / res))) H = max(FRONT_MIN_HPX, int(math.ceil((z_max - z_min) / res))) front = np.zeros((H, W, 3), dtype=np.uint8) # 密度渲染 cc = ((crop[:, 0] - x_min) / res).astype(np.int32) rr = ((z_max - crop[:, 2]) / res).astype(np.int32) cc = np.clip(cc, 0, W - 1) rr = np.clip(rr, 0, H - 1) front = rasterize_density(rr, cc, H, W, gamma=FRONT_DENSITY_GAMMA, dilate_ksize=FRONT_DILATE_KSIZE) def draw_det_xz(det: Det3D, col, thick=2, tag=""): corners = boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] xs = corners[:, 0] zs = corners[:, 2] x1, x2 = float(xs.min()), float(xs.max()) z1, z2 = float(zs.min()), float(zs.max()) x1p = int((x1 - x_min) / res) x2p = int((x2 - x_min) / res) y1p = int((z_max - z2) / res) y2p = int((z_max - z1) / res) x1p = int(np.clip(x1p, 0, W-1)) x2p = int(np.clip(x2p, 0, W-1)) y1p = int(np.clip(y1p, 0, H-1)) y2p = int(np.clip(y2p, 0, H-1)) cv2.rectangle(front, (x1p, y1p), (x2p, y2p), col, thick) cxp = int(np.clip(0.5 * (x1p + x2p), 0, W - 1)) cyp = int(np.clip(0.5 * (y1p + y2p), 0, H - 1)) cv2.circle(front, (cxp, cyp), 3, col, -1) draw_text_box(front, max(0, x1p), max(0, y1p-18), f"{tag}{det.cls_name}:{det.score:.2f}", col, 0.55, 2) # FRONT 里画 PCDet 和 FUSED(两者可能不同,因为 FUSED 可能 refine 了) if pcdet_top is not None: draw_det_xz(pcdet_top, COL_PCDET, thick=1, tag="PCDet ") if fused_top is not None: draw_det_xz(fused_top, COL_FUSED, thick=2, tag="FUSED ") draw_text_box(front, 6, 10, "FRONT (X-Z) CROP", (220,220,220), 0.6, 2) draw_text_box(front, 6, 40, f"clamp win: X={x_span:.1f}m Z={z_span:.1f}m res={res:.2f}", (200,200,200), 0.5, 2) return front # ========================= # BEV panel(密度渲染 + 膨胀) # ========================= def make_bev_image(points: Optional[np.ndarray], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray: x_min, x_max = BEV_X_RANGE y_min, y_max = BEV_Y_RANGE res = BEV_RESOLUTION W = int((y_max - y_min) / res) H = int((x_max - x_min) / res) bev = np.zeros((H, W, 3), dtype=np.uint8) if points is not None and points.shape[0] > 0: mask = (points[:, 0] >= x_min) & (points[:, 0] <= x_max) & (points[:, 1] >= y_min) & (points[:, 1] <= y_max) pts = points[mask] if pts.shape[0] > 0: rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) bev = rasterize_density(rr, cc, H, W, gamma=BEV_DENSITY_GAMMA, dilate_ksize=BEV_DILATE_KSIZE) def draw_one(det: Det3D, col_bgr, thick=2, tag=""): x, y, z, dx, dy, dz, yaw = det.box7.tolist() cy = math.cos(yaw); sy = math.sin(yaw) local = np.array([[ dx/2, dy/2], [ dx/2, -dy/2], [-dx/2, -dy/2], [-dx/2, dy/2]], dtype=np.float32) R = np.array([[cy, -sy], [sy, cy]], dtype=np.float32) corners_xy = (local @ R.T) + np.array([x, y], dtype=np.float32) rr = ((x_max - corners_xy[:, 0]) / res).astype(np.int32) cc = ((corners_xy[:, 1] - y_min) / res).astype(np.int32) poly = np.stack([cc, rr], axis=1).reshape(-1, 1, 2) cv2.polylines(bev, [poly], True, col_bgr, thick) rc = int((x_max - x) / res) cc0 = int((y - y_min) / res) draw_text_box(bev, max(0, cc0+4), max(0, rc-18), f"{tag}{det.cls_name}:{det.score:.2f}", col_bgr, 0.5, 2) if pcdet_top is not None: draw_one(pcdet_top, COL_PCDET, thick=1, tag="P ") if fused_top is not None: draw_one(fused_top, COL_FUSED, thick=2, tag="F ") draw_text_box(bev, 10, 10, "BEV", (220,220,220), 0.7, 2) return bev # ========================= # Visualize (Image | BEV | FRONT) # ========================= def top1_yolo(dets: List[Det2D]) -> Optional[Det2D]: return max(dets, key=lambda d: d.score) if dets else None def top1_3d(dets: List[Det3D]) -> Optional[Det3D]: return max(dets, key=lambda d: d.score) if dets else None def make_debug_vis(frame_id: str, img_bgr: np.ndarray, yolo_top: Optional[Det2D], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D], raw_points: Optional[np.ndarray], out_path: str): vis = img_bgr.copy() H, W = vis.shape[:2] draw_legend(vis, 12, 12) draw_text_box(vis, 12, H-40, f"frame:{frame_id}", (0, 220, 220), 0.75, 2) box_map: Dict[str, Tuple[List[float], str]] = {} if yolo_top is not None: b = clip_box_xyxy(yolo_top.xyxy, W, H) box_map["YOLO"] = (b, f"{yolo_top.cls_name}:{yolo_top.score:.2f}") if pcdet_top is not None and pcdet_top.proj_xyxy is not None: b = clip_box_xyxy(pcdet_top.proj_xyxy, W, H) box_map["PCDet"] = (b, f"{pcdet_top.cls_name}:{pcdet_top.score:.2f}") if fused_top is not None and fused_top.proj_xyxy is not None: b = clip_box_xyxy(fused_top.proj_xyxy, W, H) box_map["FUSED"] = (b, f"{fused_top.cls_name}:{fused_top.score:.2f}") diag = math.hypot(W, H) + 1e-9 overlap_score = {k: 0.0 for k in box_map.keys()} notes = [] keys = list(box_map.keys()) for i in range(len(keys)): for j in range(i+1, len(keys)): ki, kj = keys[i], keys[j] bi, _ = box_map[ki] bj, _ = box_map[kj] is_ov, sc, info = overlap_and_score_2d(bi, bj, diag) if is_ov: overlap_score[ki] = max(overlap_score[ki], sc) overlap_score[kj] = max(overlap_score[kj], sc) notes.append(f"{ki} vs {kj}: sc={sc:.2f} iou={info['iou']:.2f} d={info['dist']:.1f}px gap={info['gap']:.1f}px") # draw boxes if "YOLO" in box_map and yolo_top is not None: b, _ = box_map["YOLO"] draw_box_smart(vis, b, COL_YOLO, overlap_score.get("YOLO", 0.0), 2) draw_text_box(vis, int(b[0]), max(0, int(b[1]) - 22), f"YOLO {yolo_top.cls_name}:{yolo_top.score:.2f}", COL_YOLO, 0.6, 2) if "PCDet" in box_map and pcdet_top is not None: b, _ = box_map["PCDet"] draw_box_smart(vis, b, COL_PCDET, overlap_score.get("PCDet", 0.0), 2) draw_text_box(vis, int(b[0]), min(H-28, int(b[3]) + 6), f"PCDet {pcdet_top.cls_name}:{pcdet_top.score:.2f}", COL_PCDET, 0.6, 2) if "FUSED" in box_map and fused_top is not None: b, _ = box_map["FUSED"] draw_box_smart(vis, b, COL_FUSED, overlap_score.get("FUSED", 0.0), 3) txt = f"FUSED {fused_top.cls_name}:{fused_top.score:.2f}" (tw, _), _ = cv2.getTextSize(txt, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2) draw_text_box(vis, max(0, int(b[2]) - tw - 8), max(0, int(b[1]) - 22), txt, COL_FUSED, 0.6, 2) # notes if notes: y0 = 150 draw_text_box(vis, 12, y0, "Overlap notes:", (255,255,255), 0.58, 2) y0 += 26 for k, line in enumerate(notes[:6]): draw_text_box(vis, 12, y0 + 22*k, line, (255,255,255), 0.50, 2) # panels det_ref = fused_top if fused_top is not None else pcdet_top bev = make_bev_image(raw_points, pcdet_top, fused_top) front = make_front_xz_crop(raw_points, det_ref, pcdet_top, fused_top) bev_w = max(120, int(W * BEV_PANEL_WIDTH_RATIO)) front_w = max(180, int(W * FRONT_PANEL_WIDTH_RATIO)) bev_r = fit_to_rect(bev, bev_w, H, pad_color=(0, 0, 0), interp=cv2.INTER_NEAREST) front_r = fit_to_rect(front, front_w, H, pad_color=(0, 0, 0), interp=cv2.INTER_NEAREST) canvas = cv2.hconcat([vis, bev_r, front_r]) cv2.imwrite(out_path, canvas) # ========================= # 3D Eval # ========================= def iou3d_matrix_cpu(boxes_a: np.ndarray, boxes_b: np.ndarray) -> np.ndarray: N, M = boxes_a.shape[0], boxes_b.shape[0] if N == 0 or M == 0: return np.zeros((N, M), dtype=np.float32) from pcdet.ops.iou3d_nms import iou3d_nms_utils ta = torch.from_numpy(boxes_a).float() tb = torch.from_numpy(boxes_b).float() bev_iou = iou3d_nms_utils.boxes_bev_iou_cpu(ta, tb).cpu().numpy().astype(np.float32) area_a = (boxes_a[:, 3] * boxes_a[:, 4]).reshape(N, 1) area_b = (boxes_b[:, 3] * boxes_b[:, 4]).reshape(1, M) inter_area = np.where( bev_iou > 0, bev_iou * (area_a + area_b) / (1.0 + bev_iou + 1e-9), 0.0 ).astype(np.float32) zmax_a = boxes_a[:, 2] + boxes_a[:, 5] / 2.0 zmin_a = boxes_a[:, 2] - boxes_a[:, 5] / 2.0 zmax_b = boxes_b[:, 2] + boxes_b[:, 5] / 2.0 zmin_b = boxes_b[:, 2] - boxes_b[:, 5] / 2.0 overlap_h = np.maximum( 0.0, np.minimum(zmax_a.reshape(N, 1), zmax_b.reshape(1, M)) - np.maximum(zmin_a.reshape(N, 1), zmin_b.reshape(1, M)) ).astype(np.float32) inter_vol = inter_area * overlap_h vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).reshape(N, 1) vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).reshape(1, M) union_vol = vol_a + vol_b - inter_vol + 1e-9 return (inter_vol / union_vol).astype(np.float32) def compute_ap_from_pr(rec: np.ndarray, prec: np.ndarray) -> float: if rec.size == 0: return 0.0 mrec = np.concatenate(([0.0], rec, [1.0])) mpre = np.concatenate(([0.0], prec, [0.0])) for i in range(mpre.size - 1, 0, -1): mpre[i-1] = max(mpre[i-1], mpre[i]) idx = np.where(mrec[1:] != mrec[:-1])[0] ap = np.sum((mrec[idx+1] - mrec[idx]) * mpre[idx+1]) return float(ap) def eval_3d_map_mar( all_dets: Dict[str, List[Tuple[str, np.ndarray, float]]], all_gts: Dict[str, Dict[str, List[np.ndarray]]], class_names: List[str], iou_thr: float ) -> Dict: results = {"iou_thr": iou_thr, "per_class": {}, "mAP": 0.0, "mAR": 0.0} aps, ars = [], [] for cls in class_names: dets = all_dets.get(cls, []) gts_by_frame = all_gts.get(cls, {}) npos = sum(len(v) for v in gts_by_frame.values()) if npos == 0: results["per_class"][cls] = {"AP": None, "AR": None, "nGT": 0, "nDet": len(dets)} continue dets_sorted = sorted(dets, key=lambda x: x[2], reverse=True) tp = np.zeros((len(dets_sorted),), dtype=np.float32) fp = np.zeros((len(dets_sorted),), dtype=np.float32) matched = {fid: np.zeros((len(gts_by_frame[fid]),), dtype=bool) for fid in gts_by_frame.keys()} for i, (fid, box, score) in enumerate(dets_sorted): gt_list = gts_by_frame.get(fid, []) if len(gt_list) == 0: fp[i] = 1.0 continue gt_boxes = np.stack(gt_list, axis=0).astype(np.float32) det_box = box.reshape(1, 7).astype(np.float32) ious = iou3d_matrix_cpu(det_box, gt_boxes).reshape(-1) jmax = int(np.argmax(ious)) if ious[jmax] >= iou_thr and (not matched[fid][jmax]): tp[i] = 1.0 matched[fid][jmax] = True else: fp[i] = 1.0 tp_cum = np.cumsum(tp) fp_cum = np.cumsum(fp) rec = tp_cum / float(npos) prec = tp_cum / np.maximum(tp_cum + fp_cum, 1e-9) ap = compute_ap_from_pr(rec, prec) ar = float(rec[-1]) if rec.size > 0 else 0.0 results["per_class"][cls] = {"AP": ap, "AR": ar, "nGT": int(npos), "nDet": int(len(dets_sorted))} aps.append(ap); ars.append(ar) results["mAP"] = float(np.mean(aps)) if len(aps) > 0 else 0.0 results["mAR"] = float(np.mean(ars)) if len(ars) > 0 else 0.0 return results def parse_pcdet_gt_label(txt_path: str) -> List[Det3D]: dets = [] if not os.path.exists(txt_path): return dets with open(txt_path, "r", encoding="utf-8") as f: lines = [ln.strip() for ln in f.readlines() if ln.strip()] for ln in lines: parts = ln.split() if len(parts) < 8: continue nums = list(map(float, parts[:7])) cls_name = canonical_class(parts[7]) dets.append(Det3D(box7=np.array(nums, dtype=np.float32), cls_name=cls_name, score=1.0)) return dets # ========================= # PCDet load compat # ========================= def _extract_model_state(ckpt_obj): if isinstance(ckpt_obj, dict): if "model_state" in ckpt_obj: return ckpt_obj["model_state"] if "state_dict" in ckpt_obj: return ckpt_obj["state_dict"] return ckpt_obj def _override_pcdet_score_thresh(cfg, thr: float = 0.0): try: if hasattr(cfg.MODEL, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.POST_PROCESSING: cfg.MODEL.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass try: if hasattr(cfg.MODEL, "DENSE_HEAD") and hasattr(cfg.MODEL.DENSE_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.DENSE_HEAD.POST_PROCESSING: cfg.MODEL.DENSE_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass try: if hasattr(cfg.MODEL, "ROI_HEAD") and hasattr(cfg.MODEL.ROI_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.ROI_HEAD.POST_PROCESSING: cfg.MODEL.ROI_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass def _ensure_map_to_bev_proj_bn_registered(model, state_dict, device): proj_w_key = "map_to_bev_module.proj.weight" bn_w_key = "map_to_bev_module.bn.weight" if proj_w_key not in state_dict or bn_w_key not in state_dict: return mtb = getattr(model, "map_to_bev_module", None) if mtb is None: return if isinstance(getattr(mtb, "proj", None), nn.Module) and isinstance(getattr(mtb, "bn", None), nn.Module): return w = state_dict[proj_w_key] out_ch, in_ch, kH, kW = w.shape use_bias = ("map_to_bev_module.proj.bias" in state_dict) padding = (kH // 2, kW // 2) if (kH > 1 or kW > 1) else (0, 0) mtb.proj = nn.Conv2d(in_ch, out_ch, kernel_size=(kH, kW), stride=1, padding=padding, bias=use_bias).to(device) mtb.bn = nn.BatchNorm2d(out_ch, eps=1e-3, momentum=0.01).to(device) print(f"[PCDet][COMPAT] Registered map_to_bev_module.proj/bn: Conv2d({in_ch}->{out_ch}, k={kH}x{kW})") # ========================= # Load models # ========================= def load_yolo_model(weights_path: str): try: from ultralytics import YOLO except Exception: sys.path.insert(0, ULTRALYTICS_REPO) from ultralytics import YOLO return YOLO(weights_path) def load_pcdet_model(cfg_path: str, ckpt_path: str, device: torch.device): sys.path.insert(0, OPENPCDET_REPO) from pcdet.config import cfg, cfg_from_yaml_file from pcdet.datasets import DatasetTemplate from pcdet.models import build_network, load_data_to_gpu from pcdet.utils import common_utils class DemoDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, root_path, ext=".bin", logger=None): super().__init__(dataset_cfg=dataset_cfg, class_names=class_names, training=False, root_path=root_path, logger=logger) self.root_path = Path(root_path) self.ext = ext self.points_dir = self.root_path / "points" self.sample_file_list = sorted(glob.glob(str(self.points_dir / f"*{self.ext}"))) def __len__(self): return len(self.sample_file_list) def __getitem__(self, index): p = Path(self.sample_file_list[index]) points = np.fromfile(str(p), dtype=np.float32).reshape(-1, 4) input_dict = {"points": points, "frame_id": p.stem} data_dict = self.prepare_data(data_dict=input_dict) return data_dict logger = common_utils.create_logger() cfg_from_yaml_file(cfg_path, cfg) _override_pcdet_score_thresh(cfg, thr=0.0) dataset_root = str(Path(PCDET_POINTS_DIR).parent) dataset = DemoDataset(cfg.DATA_CONFIG, cfg.CLASS_NAMES, dataset_root, ext=".bin", logger=logger) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=dataset) model.to(device).eval() ckpt_obj = torch.load(ckpt_path, map_location="cpu") state = _extract_model_state(ckpt_obj) _ensure_map_to_bev_proj_bn_registered(model, state, device) ret = model.load_state_dict(state, strict=False) print(f"[PCDet] load_state_dict done. missing={len(ret.missing_keys)} unexpected={len(ret.unexpected_keys)}") return cfg, dataset, model, load_data_to_gpu # ========================= # Inference # ========================= @torch.no_grad() def infer_yolo(yolo_model, img_bgr: np.ndarray) -> List[Det2D]: img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) results = yolo_model(img_rgb, conf=YOLO_CONF_LOW, iou=0.7, verbose=False) r = results[0] dets: List[Det2D] = [] if r.boxes is None or len(r.boxes) == 0: return dets xyxy = r.boxes.xyxy.detach().cpu().numpy() conf = r.boxes.conf.detach().cpu().numpy() cls = r.boxes.cls.detach().cpu().numpy().astype(int) for b, s, c in zip(xyxy, conf, cls): if float(s) < YOLO_CONF_LOW: continue cls_name = CLASSES[c] if 0 <= c < len(CLASSES) else str(c) dets.append(Det2D(xyxy=b.tolist(), cls_name=canonical_class(cls_name), score=float(s))) dets = sorted(dets, key=lambda d: d.score, reverse=True)[:YOLO_KEEP_TOPK] return dets @torch.no_grad() def infer_pcdet(cfg, dataset, model, load_data_to_gpu_fn, index: int, device: torch.device): data_dict = dataset[index] frame_id = data_dict["frame_id"] bin_path = str(Path(PCDET_POINTS_DIR) / f"{frame_id}.bin") raw_points = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) if os.path.exists(bin_path) else None batch = dataset.collate_batch([data_dict]) if device.type == "cuda": load_data_to_gpu_fn(batch) pred_dicts, _ = model.forward(batch) pred = pred_dicts[0] boxes = pred["pred_boxes"].detach().cpu().numpy().astype(np.float32) scores = pred["pred_scores"].detach().cpu().numpy().astype(np.float32) labels = pred["pred_labels"].detach().cpu().numpy().astype(np.int32) label_base = 1 if labels.size > 0 and labels.min() == 0: label_base = 0 dets3d: List[Det3D] = [] for b, s, lb in zip(boxes, scores, labels): if float(s) < PCDET_CONF_LOW: continue idx_cls = int(lb) - 1 if label_base == 1 else int(lb) cls_name = cfg.CLASS_NAMES[idx_cls] if 0 <= idx_cls < len(cfg.CLASS_NAMES) else str(int(lb)) dets3d.append(Det3D(box7=b.copy(), cls_name=canonical_class(cls_name), score=float(s))) return frame_id, raw_points, dets3d # ========================= # Fusion (带 refine_box_by_points_robust) # ========================= def fuse_frame(yolo_dets: List[Det2D], pcdet_dets: List[Det3D], img_w: int, img_h: int, calib: Dict, T_lidar2cam: np.ndarray, raw_points: Optional[np.ndarray]) -> List[Det3D]: # 3D->2D projection if len(pcdet_dets) > 0: boxes7 = np.stack([d.box7 for d in pcdet_dets], axis=0) proj = project_boxes3d_to_2d(boxes7, calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION) for d, p in zip(pcdet_dets, proj): d.proj_xyxy = p y2p, p2y = associate_yolo_pcdet(yolo_dets, pcdet_dets, img_w, img_h) fused: List[Det3D] = [] for pj, pdet in enumerate(pcdet_dets): if pj in p2y: ydet = yolo_dets[p2y[pj]] if pdet.proj_xyxy is None: continue # match quality q (IoU/center/contain/edge/center-in) q, _ = match_quality_2d(ydet.xyxy, pdet.proj_xyxy, img_w, img_h) fused_cls, fused_score = fuse_with_quality(ydet, pdet, q) if fused_score >= FUSED_KEEP_THRESH: # 先沿用 LiDAR 3D box box7 = pdet.box7.copy() # 再按要求做鲁棒精炼(满足 q/点数才做;变化幅度被 clamp) box7_ref = refine_box_by_points_robust(raw_points, box7, q) # 重新投影 refined box 到 2D(否则 fused 框仍会等于 pcdet 投影框) proj_ref = project_boxes3d_to_2d(box7_ref.reshape(1, 7), calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION) proj_xyxy = proj_ref[0] if (len(proj_ref) > 0 and proj_ref[0] is not None) else pdet.proj_xyxy fused.append(Det3D( box7=box7_ref.copy(), cls_name=canonical_class(fused_cls), score=float(fused_score), proj_xyxy=proj_xyxy )) else: # unmatched but confident enough if pdet.score >= PCDET_CONF_HIGH_UNMATCHED: fused.append(Det3D( box7=pdet.box7.copy(), cls_name=pdet.cls_name, score=float(pdet.score), proj_xyxy=pdet.proj_xyxy )) return fused # ========================= # Eval helpers # ========================= def load_models(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"[INFO] device={device}") print("[INFO] Loading YOLO...") yolo_model = load_yolo_model(YOLO_WEIGHTS) print("[INFO] Loading PCDet...") cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_pcdet_model(PCDET_CFG, PCDET_CKPT, device) print(f"[PCDet] CLASS_NAMES = {list(cfg.CLASS_NAMES)}") return device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn # ========================= # Main # ========================= def main(): random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) ensure_dir(DEBUG_DIR) device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_models() bin_files = sorted(glob.glob(os.path.join(PCDET_POINTS_DIR, "*.bin"))) frame_ids_all = [Path(p).stem for p in bin_files] valid_ids = [fid for fid in frame_ids_all if find_image_for_frame(fid) is not None] if not valid_ids: print("[ERROR] No matching (pointcloud, image) pairs found.") return # eval subset if NUM_EVAL_FRAMES is None: eval_ids = valid_ids else: eval_ids = random.sample(valid_ids, k=min(NUM_EVAL_FRAMES, len(valid_ids))) # debug subset debug_ids = random.sample(valid_ids, k=min(NUM_DEBUG_FRAMES, len(valid_ids))) idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # eval storage all_dets_pcd: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_dets_fus: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_gts: Dict[str, Dict[str, List[np.ndarray]]] = {c: {} for c in CLASSES} chosen_T = None # ---------- Pass 1: eval collect ---------- print(f"[EVAL] collecting on {len(eval_ids)} frames ...") for k, fid in enumerate(eval_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T, raw_points) # progress print(更明确,避免“没反应”) if (k % PROGRESS_EVERY) == 0: print(f"[EVAL] {k}/{len(eval_ids)} frame={fid} | yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused_dets)}") # store preds baseline for d in pcdet_dets: c = canonical_class(d.cls_name) if c in all_dets_pcd and d.score >= EVAL_SCORE_THR_PCD: all_dets_pcd[c].append((fid, d.box7.copy(), float(d.score))) # store preds fused for d in fused_dets: c = canonical_class(d.cls_name) if c in all_dets_fus and d.score >= EVAL_SCORE_THR_FUSED: all_dets_fus[c].append((fid, d.box7.copy(), float(d.score))) # store GT gt_path = os.path.join(PCDET_GT_LABEL_DIR, f"{fid}.txt") gt_dets = parse_pcdet_gt_label(gt_path) for gd in gt_dets: c = canonical_class(gd.cls_name) if c in all_gts: all_gts[c].setdefault(fid, []).append(gd.box7.copy()) # ---------- Pass 2: debug visualization ---------- print(f"[DEBUG] saving {len(debug_ids)} visualizations into {DEBUG_DIR}/ ...") for k, fid in enumerate(debug_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T, raw_points) y_top = top1_yolo(yolo_dets) p_top = top1_3d(pcdet_dets) f_top = top1_3d(fused_dets) out_path = os.path.join(DEBUG_DIR, f"{fid}.jpg") make_debug_vis(fid, img, y_top, p_top, f_top, raw_points, out_path) if (k % 5) == 0: print(f"[DEBUG] {k}/{len(debug_ids)} saved {out_path}") # ---------- Print metrics ---------- print("\n================= 3D Evaluation =================") print(f"[EvalFrames] {len(eval_ids)} | PCD_thr={EVAL_SCORE_THR_PCD} | FUSED_thr={EVAL_SCORE_THR_FUSED}") for thr in EVAL_IOU_THRESHOLDS: res_p = eval_3d_map_mar(all_dets_pcd, all_gts, CLASSES, iou_thr=thr) res_f = eval_3d_map_mar(all_dets_fus, all_gts, CLASSES, iou_thr=thr) print(f"\n[IOU={thr:.2f}] PCDet baseline: mAP={res_p['mAP']:.4f}, mAR={res_p['mAR']:.4f}") for c in CLASSES: pc = res_p["per_class"].get(c, {}) if pc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={pc.get('nDet', 0)}") else: print(f" - {c:14s} AP={pc['AP']:.4f} AR={pc['AR']:.4f} nGT={pc['nGT']} nDet={pc['nDet']}") print(f"[IOU={thr:.2f}] FUSED: mAP={res_f['mAP']:.4f}, mAR={res_f['mAR']:.4f}") for c in CLASSES: fc = res_f["per_class"].get(c, {}) if fc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={fc.get('nDet', 0)}") else: print(f" - {c:14s} AP={fc['AP']:.4f} AR={fc['AR']:.4f} nGT={fc['nGT']} nDet={fc['nDet']}") print(f"\n[INFO] Done. Debug images saved in: {DEBUG_DIR}/") if __name__ == "__main__": main()
FRONT_PANEL_WIDTH_RATIO = 0.8 / 1.0 / 1.2FRONT_DENSITY_GAMMA 调小(例如 0.45)FRONT_DILATE_KSIZE = 3/5FRONT_WINDOW_MAX_X / FRONT_WINDOW_MAX_ZREFINE_MIN_Q 调低一点(比如 0.50),或 REFINE_MIN_POINTS 调低(比如 18)REFINE_DIM_SCALE_MAX 降到 1.15,REFINE_CENTER_MAX_SHIFT_XY 降到 0.8如果你希望我把 refine 过程的统计(点数n、alpha、尺寸变化比例)也打印出来用于排查,我可以再给你加一个 REFINE_DEBUG_PRINT=True 的开关。
现在这个代码需要改的内容包括,点云的原始三维检测框必须给出现在部分情况不会显示,只能显示一个红色检测框,检测框字太大,有时候显示完类别后置信度数字都不能在这张图显示,同时图例的应该单独放在所有图片最下面,字体内容放大,置信度这些可以在图例和检测结果都标注,但是图例的内容一定要清晰,字体可以放大一些,同时也要确保X-Z这个视角的检测框红色中心小一点,现在这个大小比点云还大,我都看不到原始点云,X-Z视角原始检测结果类别和置信度和融合检测结果类别和置信度一定不能遮盖点云,请给出修改后的代码
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Fusion + Visualization + 3D Eval (NO intrusion logic) 本版按你最新要求修改的点(只改你提到的问题,不改变总体流程): A) 原始三维检测框(PCDet)必须显示 - 2D图像面板:PCDet 用“蓝色虚线框”画在最上层,避免被红色 FUSED 盖住导致“只看到红框” - FRONT(X-Z)/BEV:同样 PCDet 用虚线,FUSED 用实线,保证同时可见 - 3D->2D投影更鲁棒:corners 只要有 >=1 个点在相机前方也会给出一个可画的框(避免 None 导致不画) B) 字体太大/置信度显示不全 - 新增 draw_text_box_fit():自动把文字框“挪进画面内”,必要时自动缩小字体,确保 cls + conf 不被截断 - 检测框附近的字整体变小(DET_FONT_SCALE),避免遮挡与截断 C) 图例单独放在所有图片最下面,字体更大且清晰 - 不再把图例画在左上角 - 输出图最下方添加统一 legend bar(字体更大,内容更清晰,包含“cls conf”格式说明) D) FRONT(X-Z) 视角: - 红色中心标记变小(由实心大圆 -> 小十字),不会压住点云 - PCDet/FUSED 的“类别+置信度”放到 FRONT 顶部信息栏(header bar),绝不覆盖点云区域 E) 其它(保持你之前要求的逻辑): - FRONT 面板宽度可调(FRONT_PANEL_WIDTH_RATIO) - 点云密度渲染 + 膨胀(density+ dilate) - FRONT clamp 有限窗口裁剪 - match quality q∈[0,1] 连续影响分数 - refine_box_by_points_robust() 满足 q/点数才做,且中心/尺寸变化有 clamp - eval 进度打印(每 PROGRESS_EVERY 帧) 运行: python fusion.py 输出: - debug_vis/*.jpg - 终端打印 eval 指标 """ import os import sys import glob import math import random from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Tuple, Optional import numpy as np import cv2 import torch import torch.nn as nn # ========================= # Paths (按你环境) # ========================= IMG_DIR = "/media/jd/4997BB1603CFE2C4/lw/aug2/img" PCDET_POINTS_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/points" PCDET_GT_LABEL_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/labels" ULTRALYTICS_REPO = "/media/jd/4997BB1603CFE2C4/lw/ultralytic" YOLO_WEIGHTS = "/media/jd/4997BB1603CFE2C4/lw/ultralytic/runs1/1209yolo11_m+SM+d1/weights/best.pt" OPENPCDET_REPO = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet" PCDET_CFG = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/tools/cfgs/custom_models/train.yaml" PCDET_CKPT = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/output/cfgs/custom_models/train/default/ckpt/checkpoint_epoch_80.pth" DEBUG_DIR = "debug_vis" # ========================= # Classes # ========================= CLASSES = ['Drone', 'Plastic_sheet', 'Kite', 'Balloon', 'Bird'] # ========================= # Calibration # ========================= CALIB = { "extrinsic_matrix": np.array([ [0.00871822977022152, -0.9990101808868351, -0.043619387365335945, -0.04000000000000002], [-0.0003806461322609286, 0.043617726472722454, -0.9990482215818578, 0.25], [0.9999619230641715, 0.008726535498373544, -1.3877787807814457e-17, -0.04000000000000002], [0.0, 0.0, 0.0, 1.0] ], dtype=np.float64), "fx": 3605.0801593073, "fy": 3604.9573399128, "cx": 951.9363889574, "cy": 816.9773743048, "width": 1920, "height": 1536, "dist": np.array([ 2.4515361243, -46.8007861419, -0.0002973913, -0.0008024507, -144.3698857610, 2.6420544359, -46.0443623397, -158.1742719597 ], dtype=np.float64) } USE_DISTORTION = True EXTRINSIC_MODE = "auto" # auto / lidar2cam / cam2lidar # ========================= # Colors (BGR) # ========================= COL_YOLO = (0, 255, 0) # green COL_PCDET = (255, 0, 0) # blue COL_FUSED = (0, 0, 255) # red # ========================= # Thresholds # ========================= YOLO_CONF_LOW = 0.05 PCDET_CONF_LOW = 0.01 YOLO_KEEP_TOPK = 1 PCDET_CONF_HIGH_UNMATCHED = 0.10 FUSED_KEEP_THRESH = 0.10 # matching MATCH_IOU_THR = 0.05 MATCH_CENTER_DIST_THR_PX = 160.0 PROJ_BOX_EXPAND_RATIO = 0.18 COST_ALPHA = 0.7 # ========================= # Overlap / quality scoring # ========================= OV_IOU_THR = 0.20 OV_CENTER_DIST_PX = 40.0 OV_CONTAINMENT_THR = 0.60 OV_EDGE_GAP_PX = 6.0 Q_IOU_REF = 0.30 Q_DIST_REF_PX = 80.0 Q_EDGE_REF_PX = 8.0 # ========================= # Fusion: q -> affects score # s_eff = s * (a + b*q) # fused_score = DS(s_eff_img, s_eff_lid) * ((1-g)+g*q) # ========================= Q_GATE_A = 0.35 Q_GATE_B = 0.65 FUSED_Q_GATING_STRENGTH = 0.60 # ========================= # Debug / Eval # ========================= RANDOM_SEED = 42 NUM_DEBUG_FRAMES = 20 NUM_EVAL_FRAMES = 800 EVAL_IOU_THRESHOLDS = [0.7] EVAL_SCORE_THR_PCD = 0.10 EVAL_SCORE_THR_FUSED = 0.10 PROGRESS_EVERY = 20 # ========================= # BEV # ========================= BEV_X_RANGE = (0.0, 80.0) BEV_Y_RANGE = (-30.0, 30.0) BEV_RESOLUTION = 0.10 BEV_DENSITY_GAMMA = 0.55 BEV_DILATE_KSIZE = 3 BEV_HEADER_H = 84 # ========================= # FRONT (X-Z) # ========================= FRONT_RESOLUTION = 0.03 FRONT_MIN_WPX = 220 FRONT_MIN_HPX = 320 FRONT_CROP_EXPAND_XY = 1.0 FRONT_CROP_EXPAND_Z = 0.8 FRONT_DENSITY_GAMMA = 0.55 FRONT_DILATE_KSIZE = 3 FRONT_WINDOW_MIN_X = 10.0 FRONT_WINDOW_MAX_X = 32.0 FRONT_WINDOW_MIN_Z = 10.0 FRONT_WINDOW_MAX_Z = 26.0 FRONT_WINDOW_MARGIN_X = 2.0 FRONT_WINDOW_MARGIN_Z = 2.0 FRONT_MAX_RENDER_POINTS = 200000 FRONT_HEADER_H = 96 # 关键信息全部放这,避免覆盖点云 # ========================= # Panel width ratios # ========================= BEV_PANEL_WIDTH_RATIO = 0.45 FRONT_PANEL_WIDTH_RATIO = 0.95 # <- 你要调宽就调这个 # ========================= # Text styles # ========================= DET_FONT_SCALE = 0.55 # 检测框旁文字小一点 DET_TEXT_THICKNESS = 2 HEADER_FONT_SCALE = 0.62 HEADER_TEXT_THICKNESS = 2 LEGEND_BAR_H = 150 # 图例单独放底部 LEGEND_FONT_MAIN = 0.90 # 图例字体大一点 LEGEND_FONT_SUB = 0.75 LEGEND_THICKNESS = 2 # ========================= # FUSED 3D box refine by points # ========================= REFINE_ENABLE = True REFINE_MIN_Q = 0.55 REFINE_MIN_POINTS = 25 REFINE_GOOD_POINTS = 120 REFINE_EXPAND_XY = 0.30 REFINE_EXPAND_Z = 0.30 REFINE_ABS_QUANTILE_XY = 0.90 REFINE_ABS_QUANTILE_Z = 0.90 REFINE_PAD_XY = 0.10 REFINE_PAD_Z = 0.12 REFINE_DIM_SCALE_MIN = 0.75 REFINE_DIM_SCALE_MAX = 1.25 REFINE_CENTER_MAX_SHIFT_XY = 1.20 REFINE_CENTER_MAX_SHIFT_Z = 1.00 REFINE_ALPHA_MAX = 0.85 # ========================= # Sensor weights table (base for DS) # ========================= PCDET_AP07 = { "Drone": 91.6, "Plastic_sheet": 55.52, "Kite": 40.61, "Balloon": 99.96, "Bird": 73.37 } # ========================= # Data structures # ========================= @dataclass class Det2D: xyxy: List[float] cls_name: str score: float @dataclass class Det3D: box7: np.ndarray cls_name: str score: float proj_xyxy: Optional[List[float]] = None # ========================= # Utils # ========================= def ensure_dir(p: str): os.makedirs(p, exist_ok=True) def canonical_class(name: str) -> str: if name is None: return name n = name.strip() n_low = n.lower().replace("-", "_") mapping = { "drone": "Drone", "kite": "Kite", "balloon": "Balloon", "bird": "Bird", "plastic_sheet": "Plastic_sheet", "plastic": "Plastic_sheet", "plasticsheet": "Plastic_sheet", } return mapping.get(n_low, n) def find_image_for_frame(frame_id: str) -> Optional[str]: for ext in [".jpg", ".png", ".jpeg", ".bmp"]: p = os.path.join(IMG_DIR, frame_id + ext) if os.path.exists(p): return p g = glob.glob(os.path.join(IMG_DIR, frame_id + ".*")) return g[0] if g else None def clip_box_xyxy(box, w, h): x1, y1, x2, y2 = box x1 = max(0, min(w - 1, x1)) y1 = max(0, min(h - 1, y1)) x2 = max(0, min(w - 1, x2)) y2 = max(0, min(h - 1, y2)) if x2 < x1: x1, x2 = x2, x1 if y2 < y1: y1, y2 = y2, y1 return [float(x1), float(y1), float(x2), float(y2)] def expand_box_xyxy(box, ratio=0.1): x1, y1, x2, y2 = box cx = (x1 + x2) / 2.0 cy = (y1 + y2) / 2.0 w = max(1.0, (x2 - x1)) h = max(1.0, (y2 - y1)) w2 = w * (1.0 + ratio) h2 = h * (1.0 + ratio) return [cx - w2/2, cy - h2/2, cx + w2/2, cy + h2/2] def box_iou2d(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) iw = max(0.0, ix2 - ix1) ih = max(0.0, iy2 - iy1) inter = iw * ih area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1) area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1) return float(inter / (area_a + area_b - inter + 1e-9)) def box_intersection_area(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b ix1 = max(ax1, bx1) iy1 = max(ay1, by1) ix2 = min(ax2, bx2) iy2 = min(ay2, by2) iw = max(0.0, ix2 - ix1) ih = max(0.0, iy2 - iy1) return float(iw * ih) def box_area(a) -> float: return float(max(0.0, a[2]-a[0]) * max(0.0, a[3]-a[1])) def center_of_box(box): x1, y1, x2, y2 = box return (0.5*(x1+x2), 0.5*(y1+y2)) def center_dist_px(a, b) -> float: ac = center_of_box(a) bc = center_of_box(b) return float(math.hypot(ac[0]-bc[0], ac[1]-bc[1])) def point_in_box(pt, box): x, y = pt x1, y1, x2, y2 = box return (x >= x1) and (x <= x2) and (y >= y1) and (y <= y2) def rect_edge_gap(a, b) -> float: ax1, ay1, ax2, ay2 = a bx1, by1, bx2, by2 = b if ax2 < bx1: gx = bx1 - ax2 elif bx2 < ax1: gx = ax1 - bx2 else: gx = 0.0 if ay2 < by1: gy = by1 - ay2 elif by2 < ay1: gy = ay1 - by2 else: gy = 0.0 return float(math.hypot(gx, gy)) # ========================= # Match quality q (0..1) # ========================= def overlap_score_2d(a, b, img_w, img_h) -> Tuple[float, Dict]: iou = box_iou2d(a, b) dist = center_dist_px(a, b) gap = rect_edge_gap(a, b) inter = box_intersection_area(a, b) min_area = max(1e-9, min(box_area(a), box_area(b))) containment = float(np.clip(inter / min_area, 0.0, 1.0)) ac = center_of_box(a) bc = center_of_box(b) center_in = (point_in_box(ac, b) or point_in_box(bc, a)) iou_term = min(iou / Q_IOU_REF, 1.0) dist_term = max(0.0, 1.0 - dist / max(1e-6, Q_DIST_REF_PX)) gap_term = max(0.0, 1.0 - gap / max(1e-6, Q_EDGE_REF_PX)) contain_term = containment center_bonus = 0.15 if center_in else 0.0 score = 0.45*iou_term + 0.25*dist_term + 0.20*contain_term + 0.10*gap_term + center_bonus score = float(np.clip(score, 0.0, 1.0)) info = {"iou": iou, "dist": dist, "gap": gap, "containment": containment, "center_in": center_in} return score, info def match_quality_2d(yolo_box, proj_box, img_w, img_h) -> Tuple[float, Dict]: score, info = overlap_score_2d(yolo_box, proj_box, img_w, img_h) q = float(np.clip(0.15 + 0.85 * score, 0.0, 1.0)) return q, info # ========================= # Drawing helpers # ========================= def draw_text_box_fit(img, x, y, text, color, font_scale=0.6, thickness=2, pad=4, bg=(0, 0, 0)): """ 关键:自动确保文字框完全落在图内; 如文本过长会自动缩小 font_scale(最低到 0.35) """ H, W = img.shape[:2] fs = float(font_scale) while True: (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fs, thickness) if (tw + 2*pad <= W) or (fs <= 0.35): break fs -= 0.05 (tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fs, thickness) x = int(round(x)) y = int(round(y)) x = max(0, min(W - (tw + 2*pad) - 1, x)) y = max(0, min(H - (th + 2*pad) - 1, y)) cv2.rectangle(img, (x, y), (x + tw + 2*pad, y + th + 2*pad), bg, -1) cv2.putText(img, text, (x + pad, y + pad + th), cv2.FONT_HERSHEY_SIMPLEX, fs, color, thickness, cv2.LINE_AA) def draw_dashed_line(img, p1, p2, color, thickness=2, dash_len=14, gap_len=8): x1, y1 = p1 x2, y2 = p2 dx = x2 - x1 dy = y2 - y1 length = math.hypot(dx, dy) if length < 1e-6: return vx = dx / length vy = dy / length dist = 0.0 while dist < length: seg_start = dist seg_end = min(dist + dash_len, length) sx = int(round(x1 + vx * seg_start)) sy = int(round(y1 + vy * seg_start)) ex = int(round(x1 + vx * seg_end)) ey = int(round(y1 + vy * seg_end)) cv2.line(img, (sx, sy), (ex, ey), color, thickness, cv2.LINE_AA) dist += dash_len + gap_len def draw_dashed_rect(img, box, color, thickness=2, dash_len=14, gap_len=8): x1, y1, x2, y2 = [int(round(v)) for v in box] draw_dashed_line(img, (x1, y1), (x2, y1), color, thickness, dash_len, gap_len) draw_dashed_line(img, (x2, y1), (x2, y2), color, thickness, dash_len, gap_len) draw_dashed_line(img, (x2, y2), (x1, y2), color, thickness, dash_len, gap_len) draw_dashed_line(img, (x1, y2), (x1, y1), color, thickness, dash_len, gap_len) def draw_dashed_poly(img, pts_xy: np.ndarray, color, thickness=2, dash_len=14, gap_len=8): """ pts_xy: (N,2) int """ n = pts_xy.shape[0] for i in range(n): p1 = tuple(map(int, pts_xy[i])) p2 = tuple(map(int, pts_xy[(i+1) % n])) draw_dashed_line(img, p1, p2, color, thickness, dash_len, gap_len) def draw_center_marker_cross(img, x, y, color, size=2, thickness=1): """ 小十字标记(比实心圆更不遮挡点云) """ x = int(round(x)) y = int(round(y)) cv2.line(img, (x - size, y), (x + size, y), color, thickness, cv2.LINE_AA) cv2.line(img, (x, y - size), (x, y + size), color, thickness, cv2.LINE_AA) def resize_keep_aspect(img: np.ndarray, target_w: Optional[int] = None, target_h: Optional[int] = None, interp=cv2.INTER_AREA) -> np.ndarray: h, w = img.shape[:2] if target_w is None and target_h is None: return img if target_w is not None and target_h is not None: scale = min(target_w / max(1, w), target_h / max(1, h)) elif target_w is not None: scale = target_w / max(1, w) else: scale = target_h / max(1, h) nw = max(1, int(round(w * scale))) nh = max(1, int(round(h * scale))) if nw == w and nh == h: return img return cv2.resize(img, (nw, nh), interpolation=interp) def fit_to_rect(img: np.ndarray, target_w: int, target_h: int, pad_color=(0, 0, 0), interp=cv2.INTER_AREA) -> np.ndarray: resized = resize_keep_aspect(img, target_w=target_w, target_h=target_h, interp=interp) h, w = resized.shape[:2] out = np.zeros((target_h, target_w, 3), dtype=np.uint8) out[:] = pad_color y0 = (target_h - h) // 2 x0 = (target_w - w) // 2 out[y0:y0 + h, x0:x0 + w] = resized return out def rasterize_density(rr: np.ndarray, cc: np.ndarray, H: int, W: int, gamma: float = 0.55, dilate_ksize: int = 3) -> np.ndarray: rr = rr.astype(np.int64) cc = cc.astype(np.int64) idx = rr * W + cc counts = np.bincount(idx, minlength=H * W).reshape(H, W).astype(np.float32) if counts.max() > 0: dens = np.log1p(counts) dens = dens / (dens.max() + 1e-6) dens = np.power(dens, gamma) gray = (dens * 255.0).astype(np.uint8) else: gray = np.zeros((H, W), dtype=np.uint8) if dilate_ksize is not None and dilate_ksize >= 2: k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_ksize, dilate_ksize)) gray = cv2.dilate(gray, k, iterations=1) return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) # ========================= # DS fusion with q gating # ========================= def get_sensor_weights(cls_name: str) -> Tuple[float, float]: c = canonical_class(cls_name) ap = PCDET_AP07.get(c, 70.0) / 100.0 w_lidar = float(np.clip(ap, 0.15, 0.95)) w_img = float(np.clip(1.0 - w_lidar, 0.05, 0.85)) s = w_img + w_lidar return w_img / s, w_lidar / s def ds_fuse_singleton(cls_a: str, s_a: float, w_a: float, cls_b: str, s_b: float, w_b: float) -> Tuple[str, float]: cls_a = canonical_class(cls_a) cls_b = canonical_class(cls_b) m_a = float(np.clip(w_a * s_a, 0.0, 0.999999)) m_b = float(np.clip(w_b * s_b, 0.0, 0.999999)) th_a = 1.0 - m_a th_b = 1.0 - m_b K = (m_a * m_b) if (cls_a != cls_b) else 0.0 denom = 1.0 - K + 1e-9 if cls_a == cls_b: m = (m_a*m_b + m_a*th_b + th_a*m_b) / denom return cls_a, float(m) else: ma = (m_a * th_b) / denom mb = (th_a * m_b) / denom return (cls_a, float(ma)) if ma >= mb else (cls_b, float(mb)) def fuse_with_quality(ydet: Det2D, pdet: Det3D, q: float) -> Tuple[str, float]: w_img, w_lidar = get_sensor_weights(pdet.cls_name) gate = float(np.clip(Q_GATE_A + Q_GATE_B * q, 0.0, 1.0)) s_img = float(np.clip(ydet.score * gate, 0.0, 1.0)) s_lid = float(np.clip(pdet.score * gate, 0.0, 1.0)) fused_cls, fused_score = ds_fuse_singleton( ydet.cls_name, s_img, w_img, pdet.cls_name, s_lid, w_lidar ) fused_score = float(np.clip( fused_score * ((1.0 - FUSED_Q_GATING_STRENGTH) + FUSED_Q_GATING_STRENGTH * q), 0.0, 1.0 )) return fused_cls, fused_score # ========================= # 3D geometry & projection # ========================= def boxes3d_to_corners(boxes7: np.ndarray) -> np.ndarray: N = boxes7.shape[0] corners = np.zeros((N, 8, 3), dtype=np.float32) for i in range(N): x, y, z, dx, dy, dz, yaw = boxes7[i].tolist() local = np.array([ [ dx/2, dy/2, dz/2], [ dx/2, -dy/2, dz/2], [-dx/2, -dy/2, dz/2], [-dx/2, dy/2, dz/2], [ dx/2, dy/2, -dz/2], [ dx/2, -dy/2, -dz/2], [-dx/2, -dy/2, -dz/2], [-dx/2, dy/2, -dz/2], ], dtype=np.float32) cy = math.cos(yaw); sy = math.sin(yaw) R = np.array([[cy, -sy, 0.0], [sy, cy, 0.0], [0.0, 0.0, 1.0]], dtype=np.float32) corners[i] = (local @ R.T) + np.array([x, y, z], dtype=np.float32) return corners def project_points_lidar_to_img(pts_lidar: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, use_distortion=True): fx, fy, cx, cy = calib["fx"], calib["fy"], calib["cx"], calib["cy"] K = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]], dtype=np.float64) dist = calib["dist"].astype(np.float64) if use_distortion else None pts_h = np.concatenate([pts_lidar.astype(np.float64), np.ones((pts_lidar.shape[0], 1), dtype=np.float64)], axis=1) pts_cam = (T_lidar2cam @ pts_h.T).T[:, :3] valid = pts_cam[:, 2] > 1e-6 rvec = np.zeros((3, 1), dtype=np.float64) tvec = np.zeros((3, 1), dtype=np.float64) img_pts, _ = cv2.projectPoints(pts_cam, rvec, tvec, K, dist) return img_pts.reshape(-1, 2).astype(np.float32), valid def get_extrinsic_matrix(calib: Dict, pts_lidar_xyz: np.ndarray, img_w: int, img_h: int) -> np.ndarray: T = calib["extrinsic_matrix"].copy() if EXTRINSIC_MODE == "lidar2cam": return T if EXTRINSIC_MODE == "cam2lidar": return np.linalg.inv(T) if pts_lidar_xyz is None or pts_lidar_xyz.shape[0] < 100: return T pts = pts_lidar_xyz if pts.shape[0] > 8000: pts = pts[np.random.choice(pts.shape[0], 8000, replace=False)] def score_for(Tuse): img_pts, valid = project_points_lidar_to_img(pts, calib, Tuse, img_w, img_h, use_distortion=USE_DISTORTION) img_pts = img_pts[valid] if img_pts.shape[0] == 0: return 0.0 inside = (img_pts[:, 0] >= 0) & (img_pts[:, 0] < img_w) & (img_pts[:, 1] >= 0) & (img_pts[:, 1] < img_h) return float(inside.mean()) s1 = score_for(T) s2 = score_for(np.linalg.inv(T)) chosen = T if s1 >= s2 else np.linalg.inv(T) print(f"[CALIB] auto | lidar2cam_inlier={s1:.3f} cam2lidar_inlier={s2:.3f} -> choose {'lidar2cam' if s1>=s2 else 'cam2lidar'}") return chosen def project_boxes3d_to_2d(boxes7: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, use_distortion=True): """ 修改点:更鲁棒 - 原版 valid.sum()<4 就 None,这会导致部分情况下 PCDet 框不画(你说的“只剩红框”) - 这里改成:valid.sum()>=1 就返回可画框(极端情况下返回一个小框) """ if boxes7.shape[0] == 0: return [] corners = boxes3d_to_corners(boxes7) out = [] for i in range(corners.shape[0]): img_pts, valid = project_points_lidar_to_img(corners[i], calib, T_lidar2cam, img_w, img_h, use_distortion) if valid.sum() < 1: out.append(None) continue xs = img_pts[valid, 0] ys = img_pts[valid, 1] x1, y1, x2, y2 = float(xs.min()), float(ys.min()), float(xs.max()), float(ys.max()) # 极端情况下只有一个点 -> 造一个小框,避免 None if (x2 - x1) < 2.0: x1 -= 2.0 x2 += 2.0 if (y2 - y1) < 2.0: y1 -= 2.0 y2 += 2.0 box = clip_box_xyxy([x1, y1, x2, y2], img_w, img_h) if (box[2]-box[0]) < 2 or (box[3]-box[1]) < 2: out.append(None) else: out.append(box) return out # ========================= # Matching (Hungarian) # ========================= def hungarian_match(cost: np.ndarray) -> List[Tuple[int, int]]: try: from scipy.optimize import linear_sum_assignment r, c = linear_sum_assignment(cost) return list(zip(r.tolist(), c.tolist())) except Exception: matches = [] used_r, used_c = set(), set() idxs = np.dstack(np.unravel_index(np.argsort(cost.ravel()), cost.shape))[0] for i, j in idxs: if i in used_r or j in used_c: continue used_r.add(int(i)); used_c.add(int(j)) matches.append((int(i), int(j))) return matches def associate_yolo_pcdet(yolo: List[Det2D], pcdet: List[Det3D], img_w: int, img_h: int): N, M = len(yolo), len(pcdet) if N == 0 or M == 0: return {}, {} diag = math.hypot(img_w, img_h) + 1e-9 cost = np.ones((N, M), dtype=np.float32) * 10.0 for i in range(N): a = yolo[i].xyxy a_c = center_of_box(a) for j in range(M): b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) b_c = center_of_box(b) iou = box_iou2d(a, b) cd = math.hypot(a_c[0]-b_c[0], a_c[1]-b_c[1]) / diag inside = point_in_box(a_c, b) or point_in_box(b_c, a) bonus = -0.15 if inside else 0.0 cost[i, j] = COST_ALPHA*(1.0 - iou) + (1.0 - COST_ALPHA)*cd + bonus pairs = hungarian_match(cost) y2p, p2y = {}, {} for i, j in pairs: b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) iou = box_iou2d(yolo[i].xyxy, b) cd = center_dist_px(yolo[i].xyxy, b) y_c = center_of_box(yolo[i].xyxy) b_c = center_of_box(b) inside = point_in_box(y_c, b) or point_in_box(b_c, yolo[i].xyxy) if (iou >= MATCH_IOU_THR) or (cd <= MATCH_CENTER_DIST_THR_PX) or inside: y2p[i] = j p2y[j] = i return y2p, p2y # ========================= # OBB & refine # ========================= def points_in_obb(points_xyz: np.ndarray, box7: np.ndarray, expand_xy: float, expand_z: float) -> np.ndarray: cx, cy, cz, dx, dy, dz, yaw = box7.tolist() px = points_xyz[:, 0] - cx py = points_xyz[:, 1] - cy pz = points_xyz[:, 2] - cz c = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz hx = dx / 2.0 + expand_xy hy = dy / 2.0 + expand_xy hz = dz / 2.0 + expand_z return (np.abs(lx) <= hx) & (np.abs(ly) <= hy) & (np.abs(lz) <= hz) def refine_box_by_points_robust(raw_points: Optional[np.ndarray], box7: np.ndarray, q: float) -> np.ndarray: if (not REFINE_ENABLE) or raw_points is None or raw_points.shape[0] == 0: return box7 if q < REFINE_MIN_Q: return box7 pts_xyz = raw_points[:, :3] mask = points_in_obb(pts_xyz, box7, REFINE_EXPAND_XY, REFINE_EXPAND_Z) pts = pts_xyz[mask] n = int(pts.shape[0]) if n < REFINE_MIN_POINTS: return box7 cx, cy, cz, dx, dy, dz, yaw = box7.tolist() px = pts[:, 0] - cx py = pts[:, 1] - cy pz = pts[:, 2] - cz c = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz mx = float(np.median(lx)) my = float(np.median(ly)) mz = float(np.median(lz)) ax = np.abs(lx - mx) ay = np.abs(ly - my) az = np.abs(lz - mz) hx = float(np.quantile(ax, REFINE_ABS_QUANTILE_XY) + REFINE_PAD_XY) hy = float(np.quantile(ay, REFINE_ABS_QUANTILE_XY) + REFINE_PAD_XY) hz = float(np.quantile(az, REFINE_ABS_QUANTILE_Z) + REFINE_PAD_Z) new_dx_raw = max(0.05, 2.0 * hx) new_dy_raw = max(0.05, 2.0 * hy) new_dz_raw = max(0.05, 2.0 * hz) dx0 = max(0.05, float(dx)) dy0 = max(0.05, float(dy)) dz0 = max(0.05, float(dz)) new_dx = float(np.clip(new_dx_raw, dx0 * REFINE_DIM_SCALE_MIN, dx0 * REFINE_DIM_SCALE_MAX)) new_dy = float(np.clip(new_dy_raw, dy0 * REFINE_DIM_SCALE_MIN, dy0 * REFINE_DIM_SCALE_MAX)) new_dz = float(np.clip(new_dz_raw, dz0 * REFINE_DIM_SCALE_MIN, dz0 * REFINE_DIM_SCALE_MAX)) mx = float(np.clip(mx, -REFINE_CENTER_MAX_SHIFT_XY, REFINE_CENTER_MAX_SHIFT_XY)) my = float(np.clip(my, -REFINE_CENTER_MAX_SHIFT_XY, REFINE_CENTER_MAX_SHIFT_XY)) mz = float(np.clip(mz, -REFINE_CENTER_MAX_SHIFT_Z, REFINE_CENTER_MAX_SHIFT_Z)) q_strength = (q - REFINE_MIN_Q) / max(1e-6, (1.0 - REFINE_MIN_Q)) n_strength = (n - REFINE_MIN_POINTS) / max(1.0, float(REFINE_GOOD_POINTS - REFINE_MIN_POINTS)) q_strength = float(np.clip(q_strength, 0.0, 1.0)) n_strength = float(np.clip(n_strength, 0.0, 1.0)) alpha = REFINE_ALPHA_MAX * (q_strength * n_strength) alpha = float(np.clip(alpha, 0.0, 1.0)) if alpha <= 1e-4: return box7 cyaw = math.cos(yaw) syaw = math.sin(yaw) dxw = (cyaw * mx - syaw * my) * alpha dyw = (syaw * mx + cyaw * my) * alpha dzw = mz * alpha cx2 = cx + dxw cy2 = cy + dyw cz2 = cz + dzw dx2 = dx0 + alpha * (new_dx - dx0) dy2 = dy0 + alpha * (new_dy - dy0) dz2 = dz0 + alpha * (new_dz - dz0) return np.array([cx2, cy2, cz2, dx2, dy2, dz2, yaw], dtype=np.float32) # ========================= # BEV panel (header + density) # ========================= def make_bev_image(points: Optional[np.ndarray], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray: x_min, x_max = BEV_X_RANGE y_min, y_max = BEV_Y_RANGE res = BEV_RESOLUTION W = int((y_max - y_min) / res) H = int((x_max - x_min) / res) bev_data = np.zeros((H, W, 3), dtype=np.uint8) if points is not None and points.shape[0] > 0: mask = (points[:, 0] >= x_min) & (points[:, 0] <= x_max) & (points[:, 1] >= y_min) & (points[:, 1] <= y_max) pts = points[mask] if pts.shape[0] > 0: rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) bev_data = rasterize_density(rr, cc, H, W, gamma=BEV_DENSITY_GAMMA, dilate_ksize=BEV_DILATE_KSIZE) # header(避免文字盖住点云) bev = np.zeros((H + BEV_HEADER_H, W, 3), dtype=np.uint8) bev[:BEV_HEADER_H, :] = (16, 16, 16) bev[BEV_HEADER_H:, :] = bev_data # header text draw_text_box_fit(bev, 8, 8, "BEV (X-Y)", (255, 255, 255), font_scale=HEADER_FONT_SCALE, thickness=HEADER_TEXT_THICKNESS) draw_text_box_fit(bev, 8, 38, f"range X[{x_min:.0f},{x_max:.0f}] Y[{y_min:.0f},{y_max:.0f}] res={res:.2f}", (220, 220, 220), font_scale=0.55, thickness=2) # labels in header y_line = 62 if pcdet_top is not None: draw_text_box_fit(bev, 8, y_line, f"PCDet: {pcdet_top.cls_name} {pcdet_top.score:.2f}", COL_PCDET, font_scale=0.58, thickness=2) y_line += 24 if fused_top is not None: draw_text_box_fit(bev, 8, y_line, f"FUSED: {fused_top.cls_name} {fused_top.score:.2f}", COL_FUSED, font_scale=0.58, thickness=2) # draw rotated boxes on data region (offset by header) def draw_box_bev(det: Det3D, color, dashed: bool, thick: int): x, y, z, dx, dy, dz, yaw = det.box7.tolist() cy = math.cos(yaw); sy = math.sin(yaw) local = np.array([[ dx/2, dy/2], [ dx/2, -dy/2], [-dx/2, -dy/2], [-dx/2, dy/2]], dtype=np.float32) R = np.array([[cy, -sy], [sy, cy]], dtype=np.float32) corners_xy = (local @ R.T) + np.array([x, y], dtype=np.float32) rr = ((x_max - corners_xy[:, 0]) / res).astype(np.int32) cc = ((corners_xy[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) pts_draw = np.stack([cc, rr], axis=1).astype(np.int32) pts_draw[:, 1] += BEV_HEADER_H # y offset if dashed: draw_dashed_poly(bev, pts_draw, color, thickness=thick, dash_len=12, gap_len=8) else: cv2.polylines(bev, [pts_draw.reshape(-1, 1, 2)], True, color, thick, cv2.LINE_AA) # center marker (small cross) rc = int((x_max - x) / res) cc0 = int((y - y_min) / res) rc = int(np.clip(rc, 0, H - 1)) + BEV_HEADER_H cc0 = int(np.clip(cc0, 0, W - 1)) draw_center_marker_cross(bev, cc0, rc, color, size=2, thickness=1) # 画顺序:先 FUSED(实线),再 PCDet(虚线覆盖在上面) -> 两者都能看到 if fused_top is not None: draw_box_bev(fused_top, COL_FUSED, dashed=False, thick=2) if pcdet_top is not None: draw_box_bev(pcdet_top, COL_PCDET, dashed=True, thick=2) return bev # ========================= # FRONT (X-Z) panel (header + density + clamp) # ========================= def make_front_xz_crop(points: Optional[np.ndarray], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D]) -> np.ndarray: """ 关键改动: - 顶部 header 放文字(不盖点云) - 红色中心标记变小(小十字) - crop window 以 (pcdet,fused) union 决定,再做 clamp,保证原始框不会“被裁掉看不见” """ base = np.zeros((FRONT_MIN_HPX, FRONT_MIN_WPX, 3), dtype=np.uint8) draw_text_box_fit(base, 6, 8, "FRONT (X-Z)", (255, 255, 255), font_scale=0.7, thickness=2) if points is None or points.shape[0] == 0 or (pcdet_top is None and fused_top is None): draw_text_box_fit(base, 6, 38, "no points / no det", (220, 220, 220), font_scale=0.6, thickness=2) return base pts_xyz = points[:, :3] # union mask of points in OBBs (so both boxes都有点) mask_union = None if pcdet_top is not None: m = points_in_obb(pts_xyz, pcdet_top.box7, FRONT_CROP_EXPAND_XY, FRONT_CROP_EXPAND_Z) mask_union = m if mask_union is None else (mask_union | m) if fused_top is not None: m = points_in_obb(pts_xyz, fused_top.box7, FRONT_CROP_EXPAND_XY, FRONT_CROP_EXPAND_Z) mask_union = m if mask_union is None else (mask_union | m) crop = pts_xyz[mask_union] if mask_union is not None else pts_xyz if crop.shape[0] == 0: crop = pts_xyz if crop.shape[0] > FRONT_MAX_RENDER_POINTS: idx = np.random.choice(crop.shape[0], FRONT_MAX_RENDER_POINTS, replace=False) crop = crop[idx] # union extents in X/Z from both boxes boxes = [] if pcdet_top is not None: boxes.append(pcdet_top.box7) if fused_top is not None: boxes.append(fused_top.box7) boxes_np = np.stack(boxes, axis=0).astype(np.float32) corners = boxes3d_to_corners(boxes_np) # (K,8,3) xs = corners[:, :, 0].reshape(-1) zs = corners[:, :, 2].reshape(-1) x_min_want = float(xs.min() - FRONT_WINDOW_MARGIN_X) x_max_want = float(xs.max() + FRONT_WINDOW_MARGIN_X) z_min_want = float(zs.min() - FRONT_WINDOW_MARGIN_Z) z_max_want = float(zs.max() + FRONT_WINDOW_MARGIN_Z) x_span_want = x_max_want - x_min_want z_span_want = z_max_want - z_min_want x_span = float(np.clip(x_span_want, FRONT_WINDOW_MIN_X, FRONT_WINDOW_MAX_X)) z_span = float(np.clip(z_span_want, FRONT_WINDOW_MIN_Z, FRONT_WINDOW_MAX_Z)) cx = 0.5 * (x_min_want + x_max_want) cz = 0.5 * (z_min_want + z_max_want) x_min = cx - x_span / 2.0 x_max = cx + x_span / 2.0 z_min = cz - z_span / 2.0 z_max = cz + z_span / 2.0 # filter points to window (optional) mwin = (crop[:, 0] >= x_min) & (crop[:, 0] <= x_max) & (crop[:, 2] >= z_min) & (crop[:, 2] <= z_max) crop2 = crop[mwin] if crop2.shape[0] > 0: crop = crop2 res = FRONT_RESOLUTION W = max(FRONT_MIN_WPX, int(math.ceil((x_max - x_min) / res))) H = max(FRONT_MIN_HPX, int(math.ceil((z_max - z_min) / res))) # density image (data region) cc = ((crop[:, 0] - x_min) / res).astype(np.int32) rr = ((z_max - crop[:, 2]) / res).astype(np.int32) cc = np.clip(cc, 0, W - 1) rr = np.clip(rr, 0, H - 1) front_data = rasterize_density(rr, cc, H, W, gamma=FRONT_DENSITY_GAMMA, dilate_ksize=FRONT_DILATE_KSIZE) # build final with header (no text over points) front = np.zeros((H + FRONT_HEADER_H, W, 3), dtype=np.uint8) front[:FRONT_HEADER_H, :] = (16, 16, 16) front[FRONT_HEADER_H:, :] = front_data # header text draw_text_box_fit(front, 8, 8, "FRONT (X-Z)", (255, 255, 255), font_scale=HEADER_FONT_SCALE, thickness=HEADER_TEXT_THICKNESS) draw_text_box_fit(front, 8, 38, f"clamp win: Xspan={x_span:.1f}m Zspan={z_span:.1f}m res={res:.2f}", (220, 220, 220), font_scale=0.55, thickness=2) y_line = 62 if pcdet_top is not None: draw_text_box_fit(front, 8, y_line, f"PCDet: {pcdet_top.cls_name} {pcdet_top.score:.2f}", COL_PCDET, font_scale=0.60, thickness=2) y_line += 24 if fused_top is not None: draw_text_box_fit(front, 8, y_line, f"FUSED: {fused_top.cls_name} {fused_top.score:.2f}", COL_FUSED, font_scale=0.60, thickness=2) def draw_det_xz(det: Det3D, color, dashed: bool, thick: int, center_size: int): corners1 = boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] xs1 = corners1[:, 0] zs1 = corners1[:, 2] x1, x2 = float(xs1.min()), float(xs1.max()) z1, z2 = float(zs1.min()), float(zs1.max()) x1p = int((x1 - x_min) / res) x2p = int((x2 - x_min) / res) y1p = int((z_max - z2) / res) y2p = int((z_max - z1) / res) x1p = int(np.clip(x1p, 0, W - 1)) x2p = int(np.clip(x2p, 0, W - 1)) y1p = int(np.clip(y1p, 0, H - 1)) y2p = int(np.clip(y2p, 0, H - 1)) # offset to data region y1o = y1p + FRONT_HEADER_H y2o = y2p + FRONT_HEADER_H box = [x1p, y1o, x2p, y2o] if dashed: draw_dashed_rect(front, box, color, thickness=thick, dash_len=12, gap_len=8) else: cv2.rectangle(front, (x1p, y1o), (x2p, y2o), color, thick, cv2.LINE_AA) cxp = int(np.clip(0.5 * (x1p + x2p), 0, W - 1)) cyp = int(np.clip(0.5 * (y1o + y2o), FRONT_HEADER_H, H + FRONT_HEADER_H - 1)) draw_center_marker_cross(front, cxp, cyp, color, size=center_size, thickness=1) # draw order: FUSED solid first, PCDet dashed on top -> 两个都可见 if fused_top is not None: # 红色中心更小(center_size=1) draw_det_xz(fused_top, COL_FUSED, dashed=False, thick=2, center_size=1) if pcdet_top is not None: draw_det_xz(pcdet_top, COL_PCDET, dashed=True, thick=2, center_size=2) return front # ========================= # Legend bar at bottom (clear + large font) # ========================= def render_legend_bar(total_w: int) -> np.ndarray: bar = np.zeros((LEGEND_BAR_H, total_w, 3), dtype=np.uint8) bar[:] = (18, 18, 18) x = 14 y = 12 def item(color, text, x0, y0): cv2.rectangle(bar, (x0, y0 + 6), (x0 + 28, y0 + 34), color, -1) draw_text_box_fit(bar, x0 + 38, y0, text, (255, 255, 255), font_scale=LEGEND_FONT_MAIN, thickness=LEGEND_THICKNESS, bg=(18, 18, 18)) item(COL_YOLO, "YOLO (2D box) label: cls conf", x, y) x += 560 item(COL_PCDET, "PCDet (3D->2D / BEV / X-Z) BLUE DASHED", x, y) x += 740 item(COL_FUSED, "FUSED (3D refined) RED SOLID", x, y) # second row y2 = 74 msg1 = "Text near boxes: 'cls conf' (confidence). X-Z/BEV labels are in top header (never cover point cloud)." msg2 = "X-Z center marker is small cross (RED smaller). If boxes overlap, dashed-vs-solid keeps both visible." draw_text_box_fit(bar, 14, y2, msg1, (235, 235, 235), font_scale=LEGEND_FONT_SUB, thickness=2, bg=(18, 18, 18)) draw_text_box_fit(bar, 14, y2 + 32, msg2, (235, 235, 235), font_scale=LEGEND_FONT_SUB, thickness=2, bg=(18, 18, 18)) # top separator line cv2.line(bar, (0, 0), (total_w - 1, 0), (60, 60, 60), 2) return bar # ========================= # Visualize # ========================= def top1_yolo(dets: List[Det2D]) -> Optional[Det2D]: return max(dets, key=lambda d: d.score) if dets else None def top1_3d(dets: List[Det3D]) -> Optional[Det3D]: return max(dets, key=lambda d: d.score) if dets else None def make_debug_vis(frame_id: str, img_bgr: np.ndarray, yolo_top: Optional[Det2D], pcdet_top: Optional[Det3D], fused_top: Optional[Det3D], raw_points: Optional[np.ndarray], out_path: str): vis = img_bgr.copy() H, W = vis.shape[:2] # frame id draw_text_box_fit(vis, 12, H - 42, f"frame: {frame_id}", (0, 220, 220), font_scale=0.72, thickness=2) # 2D boxes on image: order matters to guarantee PCDet visible # YOLO (green) solid if yolo_top is not None: b = clip_box_xyxy(yolo_top.xyxy, W, H) x1, y1, x2, y2 = [int(round(v)) for v in b] cv2.rectangle(vis, (x1, y1), (x2, y2), COL_YOLO, 2, cv2.LINE_AA) draw_text_box_fit(vis, x1, y1 - 28, f"YOLO {yolo_top.cls_name} {yolo_top.score:.2f}", COL_YOLO, font_scale=DET_FONT_SCALE, thickness=DET_TEXT_THICKNESS) # FUSED (red) solid (draw first) if fused_top is not None and fused_top.proj_xyxy is not None: b = clip_box_xyxy(fused_top.proj_xyxy, W, H) x1, y1, x2, y2 = [int(round(v)) for v in b] cv2.rectangle(vis, (x1, y1), (x2, y2), COL_FUSED, 3, cv2.LINE_AA) draw_text_box_fit(vis, x2 - 10, y1 - 28, f"FUSED {fused_top.cls_name} {fused_top.score:.2f}", COL_FUSED, font_scale=DET_FONT_SCALE, thickness=DET_TEXT_THICKNESS) # PCDet (blue) dashed on TOP -> 避免只看到红框 if pcdet_top is not None and pcdet_top.proj_xyxy is not None: b = clip_box_xyxy(pcdet_top.proj_xyxy, W, H) draw_dashed_rect(vis, b, COL_PCDET, thickness=2, dash_len=16, gap_len=10) draw_text_box_fit(vis, int(b[0]), int(b[3]) + 6, f"PCDet {pcdet_top.cls_name} {pcdet_top.score:.2f}", COL_PCDET, font_scale=DET_FONT_SCALE, thickness=DET_TEXT_THICKNESS) # panels bev = make_bev_image(raw_points, pcdet_top, fused_top) front = make_front_xz_crop(raw_points, pcdet_top, fused_top) bev_w = max(120, int(W * BEV_PANEL_WIDTH_RATIO)) front_w = max(180, int(W * FRONT_PANEL_WIDTH_RATIO)) bev_r = fit_to_rect(bev, bev_w, H, pad_color=(0, 0, 0), interp=cv2.INTER_NEAREST) front_r = fit_to_rect(front, front_w, H, pad_color=(0, 0, 0), interp=cv2.INTER_NEAREST) top_canvas = cv2.hconcat([vis, bev_r, front_r]) # legend bar at bottom legend = render_legend_bar(top_canvas.shape[1]) canvas = cv2.vconcat([top_canvas, legend]) cv2.imwrite(out_path, canvas) # ========================= # 3D Eval # ========================= def iou3d_matrix_cpu(boxes_a: np.ndarray, boxes_b: np.ndarray) -> np.ndarray: N, M = boxes_a.shape[0], boxes_b.shape[0] if N == 0 or M == 0: return np.zeros((N, M), dtype=np.float32) from pcdet.ops.iou3d_nms import iou3d_nms_utils ta = torch.from_numpy(boxes_a).float() tb = torch.from_numpy(boxes_b).float() bev_iou = iou3d_nms_utils.boxes_bev_iou_cpu(ta, tb).cpu().numpy().astype(np.float32) area_a = (boxes_a[:, 3] * boxes_a[:, 4]).reshape(N, 1) area_b = (boxes_b[:, 3] * boxes_b[:, 4]).reshape(1, M) inter_area = np.where( bev_iou > 0, bev_iou * (area_a + area_b) / (1.0 + bev_iou + 1e-9), 0.0 ).astype(np.float32) zmax_a = boxes_a[:, 2] + boxes_a[:, 5] / 2.0 zmin_a = boxes_a[:, 2] - boxes_a[:, 5] / 2.0 zmax_b = boxes_b[:, 2] + boxes_b[:, 5] / 2.0 zmin_b = boxes_b[:, 2] - boxes_b[:, 5] / 2.0 overlap_h = np.maximum( 0.0, np.minimum(zmax_a.reshape(N, 1), zmax_b.reshape(1, M)) - np.maximum(zmin_a.reshape(N, 1), zmin_b.reshape(1, M)) ).astype(np.float32) inter_vol = inter_area * overlap_h vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).reshape(N, 1) vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).reshape(1, M) union_vol = vol_a + vol_b - inter_vol + 1e-9 return (inter_vol / union_vol).astype(np.float32) def compute_ap_from_pr(rec: np.ndarray, prec: np.ndarray) -> float: if rec.size == 0: return 0.0 mrec = np.concatenate(([0.0], rec, [1.0])) mpre = np.concatenate(([0.0], prec, [0.0])) for i in range(mpre.size - 1, 0, -1): mpre[i-1] = max(mpre[i-1], mpre[i]) idx = np.where(mrec[1:] != mrec[:-1])[0] ap = np.sum((mrec[idx+1] - mrec[idx]) * mpre[idx+1]) return float(ap) def eval_3d_map_mar( all_dets: Dict[str, List[Tuple[str, np.ndarray, float]]], all_gts: Dict[str, Dict[str, List[np.ndarray]]], class_names: List[str], iou_thr: float ) -> Dict: results = {"iou_thr": iou_thr, "per_class": {}, "mAP": 0.0, "mAR": 0.0} aps, ars = [], [] for cls in class_names: dets = all_dets.get(cls, []) gts_by_frame = all_gts.get(cls, {}) npos = sum(len(v) for v in gts_by_frame.values()) if npos == 0: results["per_class"][cls] = {"AP": None, "AR": None, "nGT": 0, "nDet": len(dets)} continue dets_sorted = sorted(dets, key=lambda x: x[2], reverse=True) tp = np.zeros((len(dets_sorted),), dtype=np.float32) fp = np.zeros((len(dets_sorted),), dtype=np.float32) matched = {fid: np.zeros((len(gts_by_frame[fid]),), dtype=bool) for fid in gts_by_frame.keys()} for i, (fid, box, score) in enumerate(dets_sorted): gt_list = gts_by_frame.get(fid, []) if len(gt_list) == 0: fp[i] = 1.0 continue gt_boxes = np.stack(gt_list, axis=0).astype(np.float32) det_box = box.reshape(1, 7).astype(np.float32) ious = iou3d_matrix_cpu(det_box, gt_boxes).reshape(-1) jmax = int(np.argmax(ious)) if ious[jmax] >= iou_thr and (not matched[fid][jmax]): tp[i] = 1.0 matched[fid][jmax] = True else: fp[i] = 1.0 tp_cum = np.cumsum(tp) fp_cum = np.cumsum(fp) rec = tp_cum / float(npos) prec = tp_cum / np.maximum(tp_cum + fp_cum, 1e-9) ap = compute_ap_from_pr(rec, prec) ar = float(rec[-1]) if rec.size > 0 else 0.0 results["per_class"][cls] = {"AP": ap, "AR": ar, "nGT": int(npos), "nDet": int(len(dets_sorted))} aps.append(ap); ars.append(ar) results["mAP"] = float(np.mean(aps)) if len(aps) > 0 else 0.0 results["mAR"] = float(np.mean(ars)) if len(ars) > 0 else 0.0 return results def parse_pcdet_gt_label(txt_path: str) -> List[Det3D]: dets = [] if not os.path.exists(txt_path): return dets with open(txt_path, "r", encoding="utf-8") as f: lines = [ln.strip() for ln in f.readlines() if ln.strip()] for ln in lines: parts = ln.split() if len(parts) < 8: continue nums = list(map(float, parts[:7])) cls_name = canonical_class(parts[7]) dets.append(Det3D(box7=np.array(nums, dtype=np.float32), cls_name=cls_name, score=1.0)) return dets # ========================= # PCDet load compat # ========================= def _extract_model_state(ckpt_obj): if isinstance(ckpt_obj, dict): if "model_state" in ckpt_obj: return ckpt_obj["model_state"] if "state_dict" in ckpt_obj: return ckpt_obj["state_dict"] return ckpt_obj def _override_pcdet_score_thresh(cfg, thr: float = 0.0): try: if hasattr(cfg.MODEL, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.POST_PROCESSING: cfg.MODEL.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass try: if hasattr(cfg.MODEL, "DENSE_HEAD") and hasattr(cfg.MODEL.DENSE_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.DENSE_HEAD.POST_PROCESSING: cfg.MODEL.DENSE_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass try: if hasattr(cfg.MODEL, "ROI_HEAD") and hasattr(cfg.MODEL.ROI_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.ROI_HEAD.POST_PROCESSING: cfg.MODEL.ROI_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr) except Exception: pass def _ensure_map_to_bev_proj_bn_registered(model, state_dict, device): proj_w_key = "map_to_bev_module.proj.weight" bn_w_key = "map_to_bev_module.bn.weight" if proj_w_key not in state_dict or bn_w_key not in state_dict: return mtb = getattr(model, "map_to_bev_module", None) if mtb is None: return if isinstance(getattr(mtb, "proj", None), nn.Module) and isinstance(getattr(mtb, "bn", None), nn.Module): return w = state_dict[proj_w_key] out_ch, in_ch, kH, kW = w.shape use_bias = ("map_to_bev_module.proj.bias" in state_dict) padding = (kH // 2, kW // 2) if (kH > 1 or kW > 1) else (0, 0) mtb.proj = nn.Conv2d(in_ch, out_ch, kernel_size=(kH, kW), stride=1, padding=padding, bias=use_bias).to(device) mtb.bn = nn.BatchNorm2d(out_ch, eps=1e-3, momentum=0.01).to(device) print(f"[PCDet][COMPAT] Registered map_to_bev_module.proj/bn: Conv2d({in_ch}->{out_ch}, k={kH}x{kW})") # ========================= # Load models # ========================= def load_yolo_model(weights_path: str): try: from ultralytics import YOLO except Exception: sys.path.insert(0, ULTRALYTICS_REPO) from ultralytics import YOLO return YOLO(weights_path) def load_pcdet_model(cfg_path: str, ckpt_path: str, device: torch.device): sys.path.insert(0, OPENPCDET_REPO) from pcdet.config import cfg, cfg_from_yaml_file from pcdet.datasets import DatasetTemplate from pcdet.models import build_network, load_data_to_gpu from pcdet.utils import common_utils class DemoDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, root_path, ext=".bin", logger=None): super().__init__(dataset_cfg=dataset_cfg, class_names=class_names, training=False, root_path=root_path, logger=logger) self.root_path = Path(root_path) self.ext = ext self.points_dir = self.root_path / "points" self.sample_file_list = sorted(glob.glob(str(self.points_dir / f"*{self.ext}"))) def __len__(self): return len(self.sample_file_list) def __getitem__(self, index): p = Path(self.sample_file_list[index]) points = np.fromfile(str(p), dtype=np.float32).reshape(-1, 4) input_dict = {"points": points, "frame_id": p.stem} data_dict = self.prepare_data(data_dict=input_dict) return data_dict logger = common_utils.create_logger() cfg_from_yaml_file(cfg_path, cfg) _override_pcdet_score_thresh(cfg, thr=0.0) dataset_root = str(Path(PCDET_POINTS_DIR).parent) dataset = DemoDataset(cfg.DATA_CONFIG, cfg.CLASS_NAMES, dataset_root, ext=".bin", logger=logger) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=dataset) model.to(device).eval() ckpt_obj = torch.load(ckpt_path, map_location="cpu") state = _extract_model_state(ckpt_obj) _ensure_map_to_bev_proj_bn_registered(model, state, device) ret = model.load_state_dict(state, strict=False) print(f"[PCDet] load_state_dict done. missing={len(ret.missing_keys)} unexpected={len(ret.unexpected_keys)}") return cfg, dataset, model, load_data_to_gpu # ========================= # Inference # ========================= @torch.no_grad() def infer_yolo(yolo_model, img_bgr: np.ndarray) -> List[Det2D]: img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB) results = yolo_model(img_rgb, conf=YOLO_CONF_LOW, iou=0.7, verbose=False) r = results[0] dets: List[Det2D] = [] if r.boxes is None or len(r.boxes) == 0: return dets xyxy = r.boxes.xyxy.detach().cpu().numpy() conf = r.boxes.conf.detach().cpu().numpy() cls = r.boxes.cls.detach().cpu().numpy().astype(int) for b, s, c in zip(xyxy, conf, cls): if float(s) < YOLO_CONF_LOW: continue cls_name = CLASSES[c] if 0 <= c < len(CLASSES) else str(c) dets.append(Det2D(xyxy=b.tolist(), cls_name=canonical_class(cls_name), score=float(s))) dets = sorted(dets, key=lambda d: d.score, reverse=True)[:YOLO_KEEP_TOPK] return dets @torch.no_grad() def infer_pcdet(cfg, dataset, model, load_data_to_gpu_fn, index: int, device: torch.device): data_dict = dataset[index] frame_id = data_dict["frame_id"] bin_path = str(Path(PCDET_POINTS_DIR) / f"{frame_id}.bin") raw_points = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) if os.path.exists(bin_path) else None batch = dataset.collate_batch([data_dict]) if device.type == "cuda": load_data_to_gpu_fn(batch) pred_dicts, _ = model.forward(batch) pred = pred_dicts[0] boxes = pred["pred_boxes"].detach().cpu().numpy().astype(np.float32) scores = pred["pred_scores"].detach().cpu().numpy().astype(np.float32) labels = pred["pred_labels"].detach().cpu().numpy().astype(np.int32) label_base = 1 if labels.size > 0 and labels.min() == 0: label_base = 0 dets3d: List[Det3D] = [] for b, s, lb in zip(boxes, scores, labels): if float(s) < PCDET_CONF_LOW: continue idx_cls = int(lb) - 1 if label_base == 1 else int(lb) cls_name = cfg.CLASS_NAMES[idx_cls] if 0 <= idx_cls < len(cfg.CLASS_NAMES) else str(int(lb)) dets3d.append(Det3D(box7=b.copy(), cls_name=canonical_class(cls_name), score=float(s))) return frame_id, raw_points, dets3d # ========================= # Fusion (with refine) # ========================= def fuse_frame(yolo_dets: List[Det2D], pcdet_dets: List[Det3D], img_w: int, img_h: int, calib: Dict, T_lidar2cam: np.ndarray, raw_points: Optional[np.ndarray]) -> List[Det3D]: # 3D->2D projection for PCDet if len(pcdet_dets) > 0: boxes7 = np.stack([d.box7 for d in pcdet_dets], axis=0) proj = project_boxes3d_to_2d(boxes7, calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION) for d, p in zip(pcdet_dets, proj): d.proj_xyxy = p _, p2y = associate_yolo_pcdet(yolo_dets, pcdet_dets, img_w, img_h) fused: List[Det3D] = [] for pj, pdet in enumerate(pcdet_dets): if pj in p2y: ydet = yolo_dets[p2y[pj]] if pdet.proj_xyxy is None: continue q, _ = match_quality_2d(ydet.xyxy, pdet.proj_xyxy, img_w, img_h) fused_cls, fused_score = fuse_with_quality(ydet, pdet, q) if fused_score >= FUSED_KEEP_THRESH: box7 = pdet.box7.copy() box7_ref = refine_box_by_points_robust(raw_points, box7, q) proj_ref = project_boxes3d_to_2d(box7_ref.reshape(1, 7), calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION) proj_xyxy = proj_ref[0] if (len(proj_ref) > 0 and proj_ref[0] is not None) else pdet.proj_xyxy fused.append(Det3D( box7=box7_ref.copy(), cls_name=canonical_class(fused_cls), score=float(fused_score), proj_xyxy=proj_xyxy )) else: if pdet.score >= PCDET_CONF_HIGH_UNMATCHED: fused.append(Det3D( box7=pdet.box7.copy(), cls_name=pdet.cls_name, score=float(pdet.score), proj_xyxy=pdet.proj_xyxy )) return fused # ========================= # Load all models # ========================= def load_models(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(f"[INFO] device={device}") print("[INFO] Loading YOLO...") yolo_model = load_yolo_model(YOLO_WEIGHTS) print("[INFO] Loading PCDet...") cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_pcdet_model(PCDET_CFG, PCDET_CKPT, device) print(f"[PCDet] CLASS_NAMES = {list(cfg.CLASS_NAMES)}") return device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn # ========================= # Main # ========================= def main(): random.seed(RANDOM_SEED) np.random.seed(RANDOM_SEED) ensure_dir(DEBUG_DIR) device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_models() bin_files = sorted(glob.glob(os.path.join(PCDET_POINTS_DIR, "*.bin"))) frame_ids_all = [Path(p).stem for p in bin_files] valid_ids = [fid for fid in frame_ids_all if find_image_for_frame(fid) is not None] if not valid_ids: print("[ERROR] No matching (pointcloud, image) pairs found.") return eval_ids = valid_ids if (NUM_EVAL_FRAMES is None) else random.sample(valid_ids, k=min(NUM_EVAL_FRAMES, len(valid_ids))) debug_ids = random.sample(valid_ids, k=min(NUM_DEBUG_FRAMES, len(valid_ids))) idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} all_dets_pcd: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_dets_fus: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_gts: Dict[str, Dict[str, List[np.ndarray]]] = {c: {} for c in CLASSES} chosen_T = None # ---------- Pass 1: eval collect ---------- print(f"[EVAL] collecting on {len(eval_ids)} frames ...") for k, fid in enumerate(eval_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T, raw_points) if (k % PROGRESS_EVERY) == 0: print(f"[EVAL] {k}/{len(eval_ids)} frame={fid} | yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused_dets)}") for d in pcdet_dets: c = canonical_class(d.cls_name) if c in all_dets_pcd and d.score >= EVAL_SCORE_THR_PCD: all_dets_pcd[c].append((fid, d.box7.copy(), float(d.score))) for d in fused_dets: c = canonical_class(d.cls_name) if c in all_dets_fus and d.score >= EVAL_SCORE_THR_FUSED: all_dets_fus[c].append((fid, d.box7.copy(), float(d.score))) gt_path = os.path.join(PCDET_GT_LABEL_DIR, f"{fid}.txt") gt_dets = parse_pcdet_gt_label(gt_path) for gd in gt_dets: c = canonical_class(gd.cls_name) if c in all_gts: all_gts[c].setdefault(fid, []).append(gd.box7.copy()) # ---------- Pass 2: debug visualization ---------- print(f"[DEBUG] saving {len(debug_ids)} visualizations into {DEBUG_DIR}/ ...") for k, fid in enumerate(debug_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T, raw_points) y_top = top1_yolo(yolo_dets) p_top = top1_3d(pcdet_dets) f_top = top1_3d(fused_dets) out_path = os.path.join(DEBUG_DIR, f"{fid}.jpg") make_debug_vis(fid, img, y_top, p_top, f_top, raw_points, out_path) if (k % 5) == 0: print(f"[DEBUG] {k}/{len(debug_ids)} saved {out_path}") # ---------- Print metrics ---------- print("\n================= 3D Evaluation =================") print(f"[EvalFrames] {len(eval_ids)} | PCD_thr={EVAL_SCORE_THR_PCD} | FUSED_thr={EVAL_SCORE_THR_FUSED}") for thr in EVAL_IOU_THRESHOLDS: res_p = eval_3d_map_mar(all_dets_pcd, all_gts, CLASSES, iou_thr=thr) res_f = eval_3d_map_mar(all_dets_fus, all_gts, CLASSES, iou_thr=thr) print(f"\n[IOU={thr:.2f}] PCDet baseline: mAP={res_p['mAP']:.4f}, mAR={res_p['mAR']:.4f}") for c in CLASSES: pc = res_p["per_class"].get(c, {}) if pc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={pc.get('nDet', 0)}") else: print(f" - {c:14s} AP={pc['AP']:.4f} AR={pc['AR']:.4f} nGT={pc['nGT']} nDet={pc['nDet']}") print(f"[IOU={thr:.2f}] FUSED: mAP={res_f['mAP']:.4f}, mAR={res_f['mAR']:.4f}") for c in CLASSES: fc = res_f["per_class"].get(c, {}) if fc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={fc.get('nDet', 0)}") else: print(f" - {c:14s} AP={fc['AP']:.4f} AR={fc['AR']:.4f} nGT={fc['nGT']} nDet={fc['nDet']}") print(f"\n[INFO] Done. Debug images saved in: {DEBUG_DIR}/") if __name__ == "__main__": main()
右侧 FRONT 更宽
FRONT_PANEL_WIDTH_RATIO = 1.1(更宽)或 0.8(更窄)
FRONT 点更明显(密度图亮一点)
把 FRONT_DENSITY_GAMMA 从 0.55 改到 0.45
点更“粗”更好看(但别太大)
FRONT_DILATE_KSIZE = 3 -> 5
如果你还希望:在左图(原图)上也完全不遮挡画面(比如把 YOLO/PCDet/FUSED 的标签也挪到左图上方的 header 区域),我也可以按 FRONT 那种方式给左图加 header(文字不压图内容),但你这条消息里只明确要求了 X‑Z 的不遮挡,我先按你说的做到了。
目前的问题是X-Z视角的图占比过大,BEV的页面太小,我也要确保这个BEV页面的清晰度,同时这个X-Z视角的点云清晰度不够,既然你给出这么大的可视化区域,但是目前的点云只占据很小一部分,并不利于可视化,要么你就利用检测框给出更多的点云区域。你现在直接把X-Z视角的检测到的三维检测点云和检测框这个直接给出一个截图面,理解吗相当于从这个截图面往后压缩这个部分的点云,YOLO检测和所有的检测的框线条有点细,请按照YOLO或者Openpcdet原本的检测框线粗细给出,现在的X-Z视角及看不清异物点云也看不清他的周围点云,同时图例需要给出的是三个无填充的检测框,而不是三个实心的方框,同时应该居中,字体放大,你要想办法解决,给出修改后的完整的代码
我已经把你提到的所有可视化问题都改进进 fusion.py 了(BEV 放大且局部裁剪、更清晰;X‑Z 用“截面压缩/厚度 slab”让周围点云更多、更清楚;所有框线加粗到接近 YOLO/PCDet 常见显示;底部图例改为居中三种空心框+大字体)。
你直接下载替换你原来的 fusion.py 即可运行:
运行方式不变:
bashpython fusion.py
输出:
debug_vis/*.jpg让 BEV 更大 / X‑Z 更小
BEV_PANEL_WIDTH_RATIO / FRONT_PANEL_WIDTH_RATIO
X‑Z 周围点云更多(压缩截面更“厚”)
FRONT_SLAB_Y_PAD, FRONT_SLAB_MAX_HALF
X‑Z 更亮更粗更清楚
FRONT_DENSITY_GAMMA(更小更亮,比如 0.40)
FRONT_DILATE_KSIZE(更大更粗,比如 7)
BEV 局部裁剪窗口大小(让目标更大更清晰)
BEV_WINDOW_MIN_X/Y, BEV_WINDOW_MAX_X/Y
如果你跑完后还有“BEV 仍然不够大 / X‑Z 仍然不够聚焦”的具体帧截图,我可以再把 局部窗口的 quantile / clamp 调到更贴合你数据分布的默认值。
你上述代码是否更改了评估指标或者其他计算的过程呢,为什么这个融合后的map和mar很差呢,
评估指标(mAP/mAR 的计算方式)我没有改:iou3d_matrix_cpu()、eval_3d_map_mar()、IoU 阈值(比如 0.7)、按类别统计 TP/FP 的逻辑都还是同一套。你看到融合后 mAP/mAR 变差,通常不是“评估被改了”,而是融合让输出的 det 集合/类别/分数/几何发生了变化,在当前评估设置下被扣分了。
下面是最常见、也最符合你这套 late fusion 代码结构的原因(按影响从大到小排):
你现在的融合是 fused_cls = DS( img , lidar ),也就是允许 fused 类别 != PCDet 类别。
但 3D eval 是按类别算 TP/FP 的:即便 3D box IoU 很高,只要类别错了就是 FP(原类别那边还少一个 TP),所以 mAP 会非常惨。
快速验证(强烈建议先做这个 ablation):
fused_cls = pdet.cls_namefused_score 仍用你的 DS/q gating(或更简单先用 pdet.score 看上限)如果这样 mAP/mAR 立刻恢复/变好,那问题就确认是“改类”导致的。
你用了两层衰减:
s_eff = s * (a + b*q)(q 小就会被乘到很低)fused_score *= ((1-g)+g*q)(再乘一次)如果 q 在大量帧里并不高(比如 0.3~0.6),分数会整体下移,很多 det 会:
FUSED_KEEP_THRESHEVAL_SCORE_THR_FUSED
过滤掉 → mAR 掉,mAP 也跟着掉。快速验证:
Q_GATE_A=1.0, Q_GATE_B=0.0FUSED_Q_GATING_STRENGTH=0.0FUSED_KEEP_THRESH、EVAL_SCORE_THR_FUSED 先降到 0.01~0.05 看趋势你评估是 IoU=0.7(很严格)。点云稀疏/噪声时,refine 轻微移动中心或缩放尺寸,都可能把 IoU 从 0.72 打到 0.65,直接从 TP 变 FP。
快速验证:
REFINE_ENABLE=False 跑一次对比更保守的做法:
REFINE_MIN_Q 提到 0.7~0.8REFINE_MIN_POINTS 提到 80~150REFINE_DIM_SCALE_MAX 降到 1.10~1.15,REFINE_CENTER_MAX_SHIFT_XY 降到 0.3~0.6你的匹配主要依据 2D 投影(IoU/中心距/包含),在密集场景或投影误差时,YOLO 可能被匹配到错误的 PCDet box → 类别、分数都会被带偏。
快速验证:
if q < 0.7: fused_score = pdet.score; fused_cls = pdet.cls_name因为 YOLO 没有深度几何,late fusion 本质是“用 2D 证据改 3D 输出”。如果你允许它改类别、又用 q 去强烈衰减/筛除、再加上 refine 改几何,在 IoU=0.7 的 3D 指标下,很容易比纯 LiDAR baseline 更差——除非融合策略非常保守、并且匹配质量非常可靠。
fused_cls = pdet.cls_name(只融合分数)REFINE_ENABLE=False你会很快知道到底是 “改类”、还是 “分数被压低导致召回掉”、还是 “refine 损伤 IoU”。
如果你愿意把你终端输出里这几行发我(不需要日志全贴):
nDet(各类别的 nDet)我现在的代码我想让他直接评估我的点云数据集里的test数据集怎么办呢,怎么修改代码呢,我的/Openpcdet/data/custom/里面还有一个文件夹是/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/ImageSets,里面存放着test.txt,train.txt,我现在向评估指定数据集的文件,怎么修改代码呢
不会改动评估公式也能“只评估 test split”。你现在这份 fusion.py 主要问题是 eval_ids 是从 points 目录里随机抽出来的,并不知道 OpenPCDet 的 ImageSets/test.txt 里定义的 split。
你要做的是:读取 /Openpcdet/data/custom/ImageSets/test.txt 里的 frame_id 列表,然后用它当作 eval_ids(以及 debug_ids 的来源)。
下面给你一套最稳、改动最小的做法(只改几段代码,不碰你的 mAP/mAR 计算逻辑)。
放到你那些 NUM_EVAL_FRAMES / NUM_DEBUG_FRAMES 附近就行:
python# ====== Use ImageSets split for eval/debug ====== USE_IMAGESETS_SPLIT = True # 打开:按 ImageSets/test.txt 来跑 EVAL_SPLIT_NAME = "test" # "test" / "train" / "val"(取决于你有哪些 txt) DEBUG_SPLIT_NAME = "test" # debug 可与 eval 相同,也可以换成 "train" SHUFFLE_EVAL_IDS = False # 建议 False:保持 test.txt 的顺序更可复现
再补一个 ImageSets 目录(你已经有 points_dir 了,所以 root 很好取):
pythonDATASET_ROOT = str(Path(PCDET_POINTS_DIR).parent) # .../Openpcdet/data/custom IMAGESETS_DIR = os.path.join(DATASET_ROOT, "ImageSets")
放到工具函数区(比如 find_image_for_frame() 后面):
pythondef load_imageset_ids(txt_path: str) -> List[str]: """ 读取 ImageSets/*.txt,返回 frame_id 列表(自动去掉扩展名) 兼容每行是: 000123 000123.bin points/000123.bin """ if not os.path.exists(txt_path): raise FileNotFoundError(f"ImageSets file not found: {txt_path}") ids = [] with open(txt_path, "r", encoding="utf-8") as f: for ln in f: ln = ln.strip() if not ln: continue # Path(...).stem 会把 "000123.bin" / "points/000123.bin" 都变成 "000123" fid = Path(ln).stem ids.append(fid) return ids
main() 里替换“挑选 eval_ids/debug_ids”的那一段你现在 main 里大概是这种逻辑(扫描 bin_files → valid_ids → random.sample)。
把那块改成下面这个版本(核心是:优先用 ImageSets/test.txt)。
你只需要替换从
bin_files = ...到debug_ids = ...这一段即可。
python# 建立 dataset index map(你后面 infer_pcdet 需要用) idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # ========= 关键:用 ImageSets split 来决定 eval_ids ========= if USE_IMAGESETS_SPLIT: eval_txt = os.path.join(IMAGESETS_DIR, f"{EVAL_SPLIT_NAME}.txt") debug_txt = os.path.join(IMAGESETS_DIR, f"{DEBUG_SPLIT_NAME}.txt") eval_ids_raw = load_imageset_ids(eval_txt) debug_ids_raw = load_imageset_ids(debug_txt) # 过滤:必须有点云文件 & dataset里能索引到 & 最好有图像(你的融合需要图像) def is_valid(fid: str) -> bool: if fid not in idx_map: return False if not os.path.exists(os.path.join(PCDET_POINTS_DIR, f"{fid}.bin")): return False if find_image_for_frame(fid) is None: return False return True eval_ids = [fid for fid in eval_ids_raw if is_valid(fid)] if not eval_ids: print(f"[ERROR] No valid ids found from {eval_txt} (after filtering).") return # 是否打乱(一般不建议) if SHUFFLE_EVAL_IDS: random.shuffle(eval_ids) # 可选:只评估前 N 帧(NUM_EVAL_FRAMES=None 表示全量) if NUM_EVAL_FRAMES is not None: eval_ids = eval_ids[:min(NUM_EVAL_FRAMES, len(eval_ids))] # debug 从 debug split 中取,默认再随机抽 NUM_DEBUG_FRAMES debug_pool = [fid for fid in debug_ids_raw if is_valid(fid)] if not debug_pool: debug_pool = eval_ids[:] # fallback:用 eval_ids debug_ids = random.sample(debug_pool, k=min(NUM_DEBUG_FRAMES, len(debug_pool))) print(f"[SPLIT] eval from ImageSets/{EVAL_SPLIT_NAME}.txt -> {len(eval_ids)} frames") print(f"[SPLIT] debug from ImageSets/{DEBUG_SPLIT_NAME}.txt -> pool={len(debug_pool)} pick={len(debug_ids)}") else: # ====== 原来的逻辑:从 points 目录里找并随机抽 ====== bin_files = sorted(glob.glob(os.path.join(PCDET_POINTS_DIR, "*.bin"))) frame_ids_all = [Path(p).stem for p in bin_files] valid_ids = [fid for fid in frame_ids_all if find_image_for_frame(fid) is not None and fid in idx_map] if NUM_EVAL_FRAMES is None: eval_ids = valid_ids else: eval_ids = random.sample(valid_ids, k=min(NUM_EVAL_FRAMES, len(valid_ids))) debug_ids = random.sample(valid_ids, k=min(NUM_DEBUG_FRAMES, len(valid_ids)))
⚠️ 注意:我把 idx_map = {...} 提前了,因为 split 文件里的 id 必须映射到 dataset index 才能 infer_pcdet()。
OpenPCDet 习惯上:
train.txt / val.txt 有 GT(labels)test.txt 可能没有 labels(因为是提交测试集)如果你的 custom/labels/ 里 没有 test.txt 对应的那些 label 文件,那你跑 mAP/mAR 会出现:
nGT=0 或 AP/AR 为 None你可以用下面方式快速确认(随便挑一个 test id):
bashls /media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/labels/$(head -n 1 /media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/ImageSets/test.txt).txt
EVAL_SPLIT_NAME="train" 或你自己建一个 val.txt / test_with_label.txt想 严格按 test.txt 全量评估(有 GT 才有意义):
pythonUSE_IMAGESETS_SPLIT = True EVAL_SPLIT_NAME = "test" NUM_EVAL_FRAMES = None SHUFFLE_EVAL_IDS = False
只想快速验证流程:
pythonNUM_EVAL_FRAMES = 200
如果你把 test.txt 里随便贴 3 行(frame id),我也可以顺手帮你判断它是不是带扩展名/路径形式,以及你 labels 目录结构是否匹配(避免你读出来的 id 对不上 label 文件)。
请基于下面的代码,给出我限界检测的代码,,你可以继续使用这个代码,在这个代码里继续改,也可以给出新的代码,用新代码调用这个里面的模块就行,限界构建的思路如下,我想简单利用霍夫变换或者其他方法提取轨道区域,这个轨道提取方法你也要思考,分割出轨道边缘后与轨道点云匹配,然后利用我给你的单轨设计的文档中关于限界的构建思路,根据文档里或者假设列车大小,用这个作为限界的高,限界截面的上底是轨道区域,限界宽度比列车宽度大,具体限界大小可以参考车辆限界或者建筑限界,从轨道开始处延伸到轨道点云消失处的一个三维限界,这个限界是一个八边形截面的限界,限界需要在点云里可视化,请用混合后的三维检测框取利用重叠度和中心点等方法综合判断与限界的关系,这个综合方法你需要去考虑,要全面,并打印结果,每次检测一组点云与图像的数据即可,不需要每次检测很多。请给出代码:#!/usr/bin/env python3
"""
Fusion + Visualization + 3D Eval (NO intrusion logic)
本版按你最新反馈进一步修改:
面板占比:
X-Z 视角点云太少/不清晰:
线条太细:
图例(legend):
其余你之前要求仍保留:
运行:
python fusion.py
输出:
import os
import sys
import glob
import math
import random
from dataclasses import dataclass
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import numpy as np
import cv2
import torch
import torch.nn as nn
IMG_DIR = "/media/jd/4997BB1603CFE2C4/lw/aug2/img"
PCDET_POINTS_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/points"
PCDET_GT_LABEL_DIR = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/data/custom/labels"
ULTRALYTICS_REPO = "/media/jd/4997BB1603CFE2C4/lw/ultralytic"
YOLO_WEIGHTS = "/media/jd/4997BB1603CFE2C4/lw/ultralytic/runs1/1209yolo11_m+SM+d1/weights/best.pt"
OPENPCDET_REPO = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet"
PCDET_CFG = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/tools/cfgs/custom_models/train.yaml"
PCDET_CKPT = "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/output/cfgs/custom_models/train/default/ckpt/checkpoint_epoch_80.pth"
DEBUG_DIR = "debug_vis"
CLASSES = ['Drone', 'Plastic_sheet', 'Kite', 'Balloon', 'Bird']
CALIB = {
"extrinsic_matrix": np.array([
[0.00871822977022152, -0.9990101808868351, -0.043619387365335945, -0.04000000000000002],
[-0.0003806461322609286, 0.043617726472722454, -0.9990482215818578, 0.25],
[0.9999619230641715, 0.008726535498373544, -1.3877787807814457e-17, -0.04000000000000002],
[0.0, 0.0, 0.0, 1.0]
], dtype=np.float64),
"fx": 3605.0801593073,
"fy": 3604.9573399128,
"cx": 951.9363889574,
"cy": 816.9773743048,
"width": 1920,
"height": 1536,
"dist": np.array([
2.4515361243, -46.8007861419,
-0.0002973913, -0.0008024507,
-144.3698857610, 2.6420544359,
-46.0443623397, -158.1742719597
], dtype=np.float64)
}
USE_DISTORTION = True
EXTRINSIC_MODE = "auto" # auto / lidar2cam / cam2lidar
COL_YOLO = (0, 255, 0) # green
COL_PCDET = (255, 0, 0) # blue
COL_FUSED = (0, 0, 255) # red
LINE_THICK_YOLO = 4
LINE_THICK_PCDET = 4
LINE_THICK_FUSED = 4
DASH_LEN = 18
DASH_GAP = 12
YOLO_CONF_LOW = 0.05
PCDET_CONF_LOW = 0.01
YOLO_KEEP_TOPK = 1
PCDET_CONF_HIGH_UNMATCHED = 0.10
FUSED_KEEP_THRESH = 0.10
MATCH_IOU_THR = 0.05
MATCH_CENTER_DIST_THR_PX = 160.0
PROJ_BOX_EXPAND_RATIO = 0.18
COST_ALPHA = 0.7
Q_IOU_REF = 0.30
Q_DIST_REF_PX = 80.0
Q_EDGE_REF_PX = 8.0
Q_GATE_A = 0.35
Q_GATE_B = 0.65
FUSED_Q_GATING_STRENGTH = 0.60
RANDOM_SEED = 42
NUM_DEBUG_FRAMES = 20
NUM_EVAL_FRAMES = 800
EVAL_IOU_THRESHOLDS = [0.7]
EVAL_SCORE_THR_PCD = 0.10
EVAL_SCORE_THR_FUSED = 0.10
PROGRESS_EVERY = 20
BEV_RESOLUTION = 0.07 # 更细 -> 更清晰
BEV_DENSITY_GAMMA = 0.55
BEV_DILATE_KSIZE = 3
BEV_USE_CLAHE = False
BEV_HEADER_H = 76
BEV_LOCAL_CROP = True
BEV_WINDOW_MARGIN_X = 2.0
BEV_WINDOW_MARGIN_Y = 2.0
BEV_WINDOW_MIN_X = 18.0
BEV_WINDOW_MAX_X = 45.0
BEV_WINDOW_MIN_Y = 18.0
BEV_WINDOW_MAX_Y = 45.0
BEV_PTS_QUANTILE = 0.97
BEV_MAX_RENDER_POINTS = 250000
FRONT_RESOLUTION = 0.025 # 更细 -> 更清晰
FRONT_MIN_WPX = 220
FRONT_MIN_HPX = 320
FRONT_SLAB_Y_PAD = 3.0
FRONT_SLAB_MIN_HALF = 3.0
FRONT_SLAB_MAX_HALF = 12.0
FRONT_WINDOW_MARGIN_X = 2.0
FRONT_WINDOW_MARGIN_Z = 2.0
FRONT_WINDOW_MIN_X = 6.0
FRONT_WINDOW_MAX_X = 24.0
FRONT_WINDOW_MIN_Z = 6.0
FRONT_WINDOW_MAX_Z = 18.0
FRONT_PTS_QUANTILE = 0.96
FRONT_DENSITY_GAMMA = 0.45
FRONT_DILATE_KSIZE = 5
FRONT_USE_CLAHE = True
FRONT_MAX_RENDER_POINTS = 250000
FRONT_HEADER_H = 86
BEV_PANEL_WIDTH_RATIO = 0.62
FRONT_PANEL_WIDTH_RATIO = 0.50
DET_FONT_SCALE = 0.55
DET_TEXT_THICKNESS = 2
HEADER_FONT_SCALE = 0.62
HEADER_TEXT_THICKNESS = 2
LEGEND_BAR_H = 170
LEGEND_FONT_MAIN = 1.00
LEGEND_FONT_SUB = 0.82
LEGEND_THICKNESS = 2
REFINE_ENABLE = True
REFINE_MIN_Q = 0.55
REFINE_MIN_POINTS = 25
REFINE_GOOD_POINTS = 120
REFINE_EXPAND_XY = 0.30
REFINE_EXPAND_Z = 0.30
REFINE_ABS_QUANTILE_XY = 0.90
REFINE_ABS_QUANTILE_Z = 0.90
REFINE_PAD_XY = 0.10
REFINE_PAD_Z = 0.12
REFINE_DIM_SCALE_MIN = 0.75
REFINE_DIM_SCALE_MAX = 1.25
REFINE_CENTER_MAX_SHIFT_XY = 1.20
REFINE_CENTER_MAX_SHIFT_Z = 1.00
REFINE_ALPHA_MAX = 0.85
PCDET_AP07 = {
"Drone": 91.6,
"Plastic_sheet": 55.52,
"Kite": 40.61,
"Balloon": 99.96,
"Bird": 73.37
}
@dataclass
class Det2D:
xyxy: List[float]
cls_name: str
score: float
@dataclass
class Det3D:
box7: np.ndarray
cls_name: str
score: float
proj_xyxy: Optional[List[float]] = None
def ensure_dir(p: str):
os.makedirs(p, exist_ok=True)
def canonical_class(name: str) -> str:
if name is None:
return name
n = name.strip()
n_low = n.lower().replace("-", "_")
mapping = {
"drone": "Drone",
"kite": "Kite",
"balloon": "Balloon",
"bird": "Bird",
"plastic_sheet": "Plastic_sheet",
"plastic": "Plastic_sheet",
"plasticsheet": "Plastic_sheet",
}
return mapping.get(n_low, n)
def find_image_for_frame(frame_id: str) -> Optional[str]:
for ext in [".jpg", ".png", ".jpeg", ".bmp"]:
p = os.path.join(IMG_DIR, frame_id + ext)
if os.path.exists(p):
return p
g = glob.glob(os.path.join(IMG_DIR, frame_id + ".*"))
return g[0] if g else None
def clip_box_xyxy(box, w, h):
x1, y1, x2, y2 = box
x1 = max(0, min(w - 1, x1))
y1 = max(0, min(h - 1, y1))
x2 = max(0, min(w - 1, x2))
y2 = max(0, min(h - 1, y2))
if x2 < x1:
x1, x2 = x2, x1
if y2 < y1:
y1, y2 = y2, y1
return [float(x1), float(y1), float(x2), float(y2)]
def expand_box_xyxy(box, ratio=0.1):
x1, y1, x2, y2 = box
cx = (x1 + x2) / 2.0
cy = (y1 + y2) / 2.0
w = max(1.0, (x2 - x1))
h = max(1.0, (y2 - y1))
w2 = w * (1.0 + ratio)
h2 = h * (1.0 + ratio)
return [cx - w2/2, cy - h2/2, cx + w2/2, cy + h2/2]
def box_iou2d(a, b) -> float:
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
ix1 = max(ax1, bx1)
iy1 = max(ay1, by1)
ix2 = min(ax2, bx2)
iy2 = min(ay2, by2)
iw = max(0.0, ix2 - ix1)
ih = max(0.0, iy2 - iy1)
inter = iw * ih
area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1)
area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1)
return float(inter / (area_a + area_b - inter + 1e-9))
def box_intersection_area(a, b) -> float:
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
ix1 = max(ax1, bx1)
iy1 = max(ay1, by1)
ix2 = min(ax2, bx2)
iy2 = min(ay2, by2)
iw = max(0.0, ix2 - ix1)
ih = max(0.0, iy2 - iy1)
return float(iw * ih)
def box_area(a) -> float:
return float(max(0.0, a[2]-a[0]) * max(0.0, a[3]-a[1]))
def center_of_box(box):
x1, y1, x2, y2 = box
return (0.5*(x1+x2), 0.5*(y1+y2))
def center_dist_px(a, b) -> float:
ac = center_of_box(a)
bc = center_of_box(b)
return float(math.hypot(ac[0]-bc[0], ac[1]-bc[1]))
def point_in_box(pt, box):
x, y = pt
x1, y1, x2, y2 = box
return (x >= x1) and (x <= x2) and (y >= y1) and (y <= y2)
def rect_edge_gap(a, b) -> float:
ax1, ay1, ax2, ay2 = a
bx1, by1, bx2, by2 = b
if ax2 < bx1:
gx = bx1 - ax2
elif bx2 < ax1:
gx = ax1 - bx2
else:
gx = 0.0
if ay2 < by1:
gy = by1 - ay2
elif by2 < ay1:
gy = ay1 - by2
else:
gy = 0.0
return float(math.hypot(gx, gy))
def overlap_score_2d(a, b) -> Tuple[float, Dict]:
iou = box_iou2d(a, b)
dist = center_dist_px(a, b)
gap = rect_edge_gap(a, b)
textinter = box_intersection_area(a, b) min_area = max(1e-9, min(box_area(a), box_area(b))) containment = float(np.clip(inter / min_area, 0.0, 1.0)) ac = center_of_box(a) bc = center_of_box(b) center_in = (point_in_box(ac, b) or point_in_box(bc, a)) iou_term = min(iou / Q_IOU_REF, 1.0) dist_term = max(0.0, 1.0 - dist / max(1e-6, Q_DIST_REF_PX)) gap_term = max(0.0, 1.0 - gap / max(1e-6, Q_EDGE_REF_PX)) contain_term = containment center_bonus = 0.15 if center_in else 0.0 score = 0.45*iou_term + 0.25*dist_term + 0.20*contain_term + 0.10*gap_term + center_bonus score = float(np.clip(score, 0.0, 1.0)) info = {"iou": iou, "dist": dist, "gap": gap, "containment": containment, "center_in": center_in} return score, info
def match_quality_2d(yolo_box, proj_box) -> Tuple[float, Dict]:
score, info = overlap_score_2d(yolo_box, proj_box)
q = float(np.clip(0.15 + 0.85 * score, 0.0, 1.0))
return q, info
def draw_text_box_fit(img, x, y, text, color,
font_scale=0.6, thickness=2,
pad=4, bg=(0, 0, 0)):
H, W = img.shape[:2]
fs = float(font_scale)
while True:
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fs, thickness)
if (tw + 2*pad <= W) or (fs <= 0.35):
break
fs -= 0.05
text(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, fs, thickness) x = int(round(x)) y = int(round(y)) x = max(0, min(W - (tw + 2*pad) - 1, x)) y = max(0, min(H - (th + 2*pad) - 1, y)) cv2.rectangle(img, (x, y), (x + tw + 2*pad, y + th + 2*pad), bg, -1) cv2.putText(img, text, (x + pad, y + pad + th), cv2.FONT_HERSHEY_SIMPLEX, fs, color, thickness, cv2.LINE_AA)
def draw_dashed_line(img, p1, p2, color, thickness=2, dash_len=14, gap_len=8):
x1, y1 = p1
x2, y2 = p2
dx = x2 - x1
dy = y2 - y1
length = math.hypot(dx, dy)
if length < 1e-6:
return
vx = dx / length
vy = dy / length
dist = 0.0
while dist < length:
seg_start = dist
seg_end = min(dist + dash_len, length)
sx = int(round(x1 + vx * seg_start))
sy = int(round(y1 + vy * seg_start))
ex = int(round(x1 + vx * seg_end))
ey = int(round(y1 + vy * seg_end))
cv2.line(img, (sx, sy), (ex, ey), color, thickness, cv2.LINE_AA)
dist += dash_len + gap_len
def draw_dashed_rect(img, box, color, thickness=2, dash_len=14, gap_len=8):
x1, y1, x2, y2 = [int(round(v)) for v in box]
draw_dashed_line(img, (x1, y1), (x2, y1), color, thickness, dash_len, gap_len)
draw_dashed_line(img, (x2, y1), (x2, y2), color, thickness, dash_len, gap_len)
draw_dashed_line(img, (x2, y2), (x1, y2), color, thickness, dash_len, gap_len)
draw_dashed_line(img, (x1, y2), (x1, y1), color, thickness, dash_len, gap_len)
def draw_dashed_poly(img, pts_xy: np.ndarray, color, thickness=2, dash_len=14, gap_len=8):
n = pts_xy.shape[0]
for i in range(n):
p1 = tuple(map(int, pts_xy[i]))
p2 = tuple(map(int, pts_xy[(i+1) % n]))
draw_dashed_line(img, p1, p2, color, thickness, dash_len, gap_len)
def draw_center_marker_cross(img, x, y, color, size=2, thickness=1):
x = int(round(x))
y = int(round(y))
cv2.line(img, (x - size, y), (x + size, y), color, thickness, cv2.LINE_AA)
cv2.line(img, (x, y - size), (x, y + size), color, thickness, cv2.LINE_AA)
def resize_keep_aspect(img: np.ndarray,
target_w: Optional[int] = None,
target_h: Optional[int] = None,
interp=cv2.INTER_AREA) -> np.ndarray:
h, w = img.shape[:2]
if target_w is None and target_h is None:
return img
if target_w is not None and target_h is not None:
scale = min(target_w / max(1, w), target_h / max(1, h))
elif target_w is not None:
scale = target_w / max(1, w)
else:
scale = target_h / max(1, h)
textnw = max(1, int(round(w * scale))) nh = max(1, int(round(h * scale))) if nw == w and nh == h: return img return cv2.resize(img, (nw, nh), interpolation=interp)
def fit_to_rect(img: np.ndarray,
target_w: int,
target_h: int,
pad_color=(0, 0, 0),
interp_up=cv2.INTER_NEAREST,
interp_down=cv2.INTER_AREA) -> np.ndarray:
"""
自动选择插值:
- 上采样:INTER_NEAREST(密度点更“硬朗”)
- 下采样:INTER_AREA(更保细节)
"""
h, w = img.shape[:2]
scale = min(target_w / max(1, w), target_h / max(1, h))
interp = interp_up if scale >= 1.0 else interp_down
resized = resize_keep_aspect(img, target_w=target_w, target_h=target_h, interp=interp)
textrh, rw = resized.shape[:2] out = np.zeros((target_h, target_w, 3), dtype=np.uint8) out[:] = pad_color y0 = (target_h - rh) // 2 x0 = (target_w - rw) // 2 out[y0:y0 + rh, x0:x0 + rw] = resized return out
def rasterize_density(rr: np.ndarray, cc: np.ndarray, H: int, W: int,
gamma: float = 0.55, dilate_ksize: int = 3,
use_clahe: bool = False) -> np.ndarray:
rr = rr.astype(np.int64)
cc = cc.astype(np.int64)
idx = rr * W + cc
counts = np.bincount(idx, minlength=H * W).reshape(H, W).astype(np.float32)
textif counts.max() > 0: dens = np.log1p(counts) dens = dens / (dens.max() + 1e-6) dens = np.power(dens, gamma) gray = (dens * 255.0).astype(np.uint8) else: gray = np.zeros((H, W), dtype=np.uint8) if use_clahe: clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) gray = clahe.apply(gray) if dilate_ksize is not None and dilate_ksize >= 2: k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_ksize, dilate_ksize)) gray = cv2.dilate(gray, k, iterations=1) return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR)
def get_sensor_weights(cls_name: str) -> Tuple[float, float]:
c = canonical_class(cls_name)
ap = PCDET_AP07.get(c, 70.0) / 100.0
w_lidar = float(np.clip(ap, 0.15, 0.95))
w_img = float(np.clip(1.0 - w_lidar, 0.05, 0.85))
s = w_img + w_lidar
return w_img / s, w_lidar / s
def ds_fuse_singleton(cls_a: str, s_a: float, w_a: float,
cls_b: str, s_b: float, w_b: float) -> Tuple[str, float]:
cls_a = canonical_class(cls_a)
cls_b = canonical_class(cls_b)
textm_a = float(np.clip(w_a * s_a, 0.0, 0.999999)) m_b = float(np.clip(w_b * s_b, 0.0, 0.999999)) th_a = 1.0 - m_a th_b = 1.0 - m_b K = (m_a * m_b) if (cls_a != cls_b) else 0.0 denom = 1.0 - K + 1e-9 if cls_a == cls_b: m = (m_a*m_b + m_a*th_b + th_a*m_b) / denom return cls_a, float(m) else: ma = (m_a * th_b) / denom mb = (th_a * m_b) / denom return (cls_a, float(ma)) if ma >= mb else (cls_b, float(mb))
def fuse_with_quality(ydet: Det2D, pdet: Det3D, q: float) -> Tuple[str, float]:
w_img, w_lidar = get_sensor_weights(pdet.cls_name)
textgate = float(np.clip(Q_GATE_A + Q_GATE_B * q, 0.0, 1.0)) s_img = float(np.clip(ydet.score * gate, 0.0, 1.0)) s_lid = float(np.clip(pdet.score * gate, 0.0, 1.0)) fused_cls, fused_score = ds_fuse_singleton( ydet.cls_name, s_img, w_img, pdet.cls_name, s_lid, w_lidar ) fused_score = float(np.clip( fused_score * ((1.0 - FUSED_Q_GATING_STRENGTH) + FUSED_Q_GATING_STRENGTH * q), 0.0, 1.0 )) return fused_cls, fused_score
def boxes3d_to_corners(boxes7: np.ndarray) -> np.ndarray:
N = boxes7.shape[0]
corners = np.zeros((N, 8, 3), dtype=np.float32)
for i in range(N):
x, y, z, dx, dy, dz, yaw = boxes7[i].tolist()
local = np.array([
[ dx/2, dy/2, dz/2],
[ dx/2, -dy/2, dz/2],
[-dx/2, -dy/2, dz/2],
[-dx/2, dy/2, dz/2],
[ dx/2, dy/2, -dz/2],
[ dx/2, -dy/2, -dz/2],
[-dx/2, -dy/2, -dz/2],
[-dx/2, dy/2, -dz/2],
], dtype=np.float32)
cy = math.cos(yaw); sy = math.sin(yaw)
R = np.array([[cy, -sy, 0.0],
[sy, cy, 0.0],
[0.0, 0.0, 1.0]], dtype=np.float32)
corners[i] = (local @ R.T) + np.array([x, y, z], dtype=np.float32)
return corners
def project_points_lidar_to_img(pts_lidar: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray,
img_w: int, img_h: int, use_distortion=True):
fx, fy, cx, cy = calib["fx"], calib["fy"], calib["cx"], calib["cy"]
K = np.array([[fx, 0, cx],
[0, fy, cy],
[0, 0, 1]], dtype=np.float64)
dist = calib["dist"].astype(np.float64) if use_distortion else None
textpts_h = np.concatenate([pts_lidar.astype(np.float64), np.ones((pts_lidar.shape[0], 1), dtype=np.float64)], axis=1) pts_cam = (T_lidar2cam @ pts_h.T).T[:, :3] valid = pts_cam[:, 2] > 1e-6 rvec = np.zeros((3, 1), dtype=np.float64) tvec = np.zeros((3, 1), dtype=np.float64) img_pts, _ = cv2.projectPoints(pts_cam, rvec, tvec, K, dist) return img_pts.reshape(-1, 2).astype(np.float32), valid
def get_extrinsic_matrix(calib: Dict, pts_lidar_xyz: np.ndarray, img_w: int, img_h: int) -> np.ndarray:
T = calib["extrinsic_matrix"].copy()
if EXTRINSIC_MODE == "lidar2cam":
return T
if EXTRINSIC_MODE == "cam2lidar":
return np.linalg.inv(T)
textif pts_lidar_xyz is None or pts_lidar_xyz.shape[0] < 100: return T pts = pts_lidar_xyz if pts.shape[0] > 8000: pts = pts[np.random.choice(pts.shape[0], 8000, replace=False)] def score_for(Tuse): img_pts, valid = project_points_lidar_to_img(pts, calib, Tuse, img_w, img_h, use_distortion=USE_DISTORTION) img_pts = img_pts[valid] if img_pts.shape[0] == 0: return 0.0 inside = (img_pts[:, 0] >= 0) & (img_pts[:, 0] < img_w) & (img_pts[:, 1] >= 0) & (img_pts[:, 1] < img_h) return float(inside.mean()) s1 = score_for(T) s2 = score_for(np.linalg.inv(T)) chosen = T if s1 >= s2 else np.linalg.inv(T) print(f"[CALIB] auto | lidar2cam_inlier={s1:.3f} cam2lidar_inlier={s2:.3f} -> choose {'lidar2cam' if s1>=s2 else 'cam2lidar'}") return chosen
def project_boxes3d_to_2d(boxes7: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray,
img_w: int, img_h: int, use_distortion=True):
"""
更鲁棒:只要 >=1 corner 在相机前方就给出框,避免 PCDet 框“不显示”
"""
if boxes7.shape[0] == 0:
return []
corners = boxes3d_to_corners(boxes7)
out = []
for i in range(corners.shape[0]):
img_pts, valid = project_points_lidar_to_img(corners[i], calib, T_lidar2cam, img_w, img_h, use_distortion)
if valid.sum() < 1:
out.append(None)
continue
xs = img_pts[valid, 0]
ys = img_pts[valid, 1]
x1, y1, x2, y2 = float(xs.min()), float(ys.min()), float(xs.max()), float(ys.max())
if (x2 - x1) < 2.0:
x1 -= 2.0
x2 += 2.0
if (y2 - y1) < 2.0:
y1 -= 2.0
y2 += 2.0
box = clip_box_xyxy([x1, y1, x2, y2], img_w, img_h)
if (box[2]-box[0]) < 2 or (box[3]-box[1]) < 2:
out.append(None)
else:
out.append(box)
return out
def hungarian_match(cost: np.ndarray) -> List[Tuple[int, int]]:
try:
from scipy.optimize import linear_sum_assignment
r, c = linear_sum_assignment(cost)
return list(zip(r.tolist(), c.tolist()))
except Exception:
matches = []
used_r, used_c = set(), set()
idxs = np.dstack(np.unravel_index(np.argsort(cost.ravel()), cost.shape))[0]
for i, j in idxs:
if i in used_r or j in used_c:
continue
used_r.add(int(i)); used_c.add(int(j))
matches.append((int(i), int(j)))
return matches
def associate_yolo_pcdet(yolo: List[Det2D], pcdet: List[Det3D], img_w: int, img_h: int):
N, M = len(yolo), len(pcdet)
if N == 0 or M == 0:
return {}, {}
textdiag = math.hypot(img_w, img_h) + 1e-9 cost = np.ones((N, M), dtype=np.float32) * 10.0 for i in range(N): a = yolo[i].xyxy a_c = center_of_box(a) for j in range(M): b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) b_c = center_of_box(b) iou = box_iou2d(a, b) cd = math.hypot(a_c[0]-b_c[0], a_c[1]-b_c[1]) / diag inside = point_in_box(a_c, b) or point_in_box(b_c, a) bonus = -0.15 if inside else 0.0 cost[i, j] = COST_ALPHA*(1.0 - iou) + (1.0 - COST_ALPHA)*cd + bonus pairs = hungarian_match(cost) y2p, p2y = {}, {} for i, j in pairs: b0 = pcdet[j].proj_xyxy if b0 is None: continue b = clip_box_xyxy(expand_box_xyxy(b0, PROJ_BOX_EXPAND_RATIO), img_w, img_h) iou = box_iou2d(yolo[i].xyxy, b) cd = center_dist_px(yolo[i].xyxy, b) y_c = center_of_box(yolo[i].xyxy) b_c = center_of_box(b) inside = point_in_box(y_c, b) or point_in_box(b_c, yolo[i].xyxy) if (iou >= MATCH_IOU_THR) or (cd <= MATCH_CENTER_DIST_THR_PX) or inside: y2p[i] = j p2y[j] = i return y2p, p2y
def points_in_obb(points_xyz: np.ndarray, box7: np.ndarray,
expand_xy: float, expand_z: float) -> np.ndarray:
cx, cy, cz, dx, dy, dz, yaw = box7.tolist()
px = points_xyz[:, 0] - cx
py = points_xyz[:, 1] - cy
pz = points_xyz[:, 2] - cz
textc = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz hx = dx / 2.0 + expand_xy hy = dy / 2.0 + expand_xy hz = dz / 2.0 + expand_z return (np.abs(lx) <= hx) & (np.abs(ly) <= hy) & (np.abs(lz) <= hz)
def refine_box_by_points_robust(raw_points: Optional[np.ndarray],
box7: np.ndarray,
q: float) -> np.ndarray:
if (not REFINE_ENABLE) or raw_points is None or raw_points.shape[0] == 0:
return box7
if q < REFINE_MIN_Q:
return box7
textpts_xyz = raw_points[:, :3] mask = points_in_obb(pts_xyz, box7, REFINE_EXPAND_XY, REFINE_EXPAND_Z) pts = pts_xyz[mask] n = int(pts.shape[0]) if n < REFINE_MIN_POINTS: return box7 cx, cy, cz, dx, dy, dz, yaw = box7.tolist() px = pts[:, 0] - cx py = pts[:, 1] - cy pz = pts[:, 2] - cz c = math.cos(-yaw) s = math.sin(-yaw) lx = c * px - s * py ly = s * px + c * py lz = pz mx = float(np.median(lx)) my = float(np.median(ly)) mz = float(np.median(lz)) ax = np.abs(lx - mx) ay = np.abs(ly - my) az = np.abs(lz - mz) hx = float(np.quantile(ax, REFINE_ABS_QUANTILE_XY) + REFINE_PAD_XY) hy = float(np.quantile(ay, REFINE_ABS_QUANTILE_XY) + REFINE_PAD_XY) hz = float(np.quantile(az, REFINE_ABS_QUANTILE_Z) + REFINE_PAD_Z) new_dx_raw = max(0.05, 2.0 * hx) new_dy_raw = max(0.05, 2.0 * hy) new_dz_raw = max(0.05, 2.0 * hz) dx0 = max(0.05, float(dx)) dy0 = max(0.05, float(dy)) dz0 = max(0.05, float(dz)) new_dx = float(np.clip(new_dx_raw, dx0 * REFINE_DIM_SCALE_MIN, dx0 * REFINE_DIM_SCALE_MAX)) new_dy = float(np.clip(new_dy_raw, dy0 * REFINE_DIM_SCALE_MIN, dy0 * REFINE_DIM_SCALE_MAX)) new_dz = float(np.clip(new_dz_raw, dz0 * REFINE_DIM_SCALE_MIN, dz0 * REFINE_DIM_SCALE_MAX)) mx = float(np.clip(mx, -REFINE_CENTER_MAX_SHIFT_XY, REFINE_CENTER_MAX_SHIFT_XY)) my = float(np.clip(my, -REFINE_CENTER_MAX_SHIFT_XY, REFINE_CENTER_MAX_SHIFT_XY)) mz = float(np.clip(mz, -REFINE_CENTER_MAX_SHIFT_Z, REFINE_CENTER_MAX_SHIFT_Z)) q_strength = (q - REFINE_MIN_Q) / max(1e-6, (1.0 - REFINE_MIN_Q)) n_strength = (n - REFINE_MIN_POINTS) / max(1.0, float(REFINE_GOOD_POINTS - REFINE_MIN_POINTS)) q_strength = float(np.clip(q_strength, 0.0, 1.0)) n_strength = float(np.clip(n_strength, 0.0, 1.0)) alpha = REFINE_ALPHA_MAX * (q_strength * n_strength) alpha = float(np.clip(alpha, 0.0, 1.0)) if alpha <= 1e-4: return box7 cyaw = math.cos(yaw) syaw = math.sin(yaw) dxw = (cyaw * mx - syaw * my) * alpha dyw = (syaw * mx + cyaw * my) * alpha dzw = mz * alpha cx2 = cx + dxw cy2 = cy + dyw cz2 = cz + dzw dx2 = dx0 + alpha * (new_dx - dx0) dy2 = dy0 + alpha * (new_dy - dy0) dz2 = dz0 + alpha * (new_dz - dz0) return np.array([cx2, cy2, cz2, dx2, dy2, dz2, yaw], dtype=np.float32)
def _sample_if_too_many(arr: np.ndarray, max_n: int) -> np.ndarray:
if arr.shape[0] <= max_n:
return arr
idx = np.random.choice(arr.shape[0], max_n, replace=False)
return arr[idx]
def compute_bev_window(points_xyz: Optional[np.ndarray],
pcdet_top: Optional[Det3D],
fused_top: Optional[Det3D]) -> Tuple[float, float, float, float]:
"""
return x_min, x_max, y_min, y_max
"""
# fallback global
if (not BEV_LOCAL_CROP) or (pcdet_top is None and fused_top is None):
return 0.0, 80.0, -30.0, 30.0
textdets = [d for d in [pcdet_top, fused_top] if d is not None] centers = np.array([[d.box7[0], d.box7[1]] for d in dets], dtype=np.float32) cx = float(np.mean(centers[:, 0])) cy = float(np.mean(centers[:, 1])) # box extents boxes_np = np.stack([d.box7 for d in dets], axis=0).astype(np.float32) corners = boxes3d_to_corners(boxes_np) # (K,8,3) xs = corners[:, :, 0].reshape(-1) ys = corners[:, :, 1].reshape(-1) hx_box = float(np.max(np.abs(xs - cx))) hy_box = float(np.max(np.abs(ys - cy))) # pts extents (quantile in abs distance) hx_pts = 0.0 hy_pts = 0.0 if points_xyz is not None and points_xyz.shape[0] > 0: pts = _sample_if_too_many(points_xyz, BEV_MAX_RENDER_POINTS) ax = np.abs(pts[:, 0] - cx) ay = np.abs(pts[:, 1] - cy) hx_pts = float(np.quantile(ax, BEV_PTS_QUANTILE)) hy_pts = float(np.quantile(ay, BEV_PTS_QUANTILE)) span_x = 2.0 * (max(hx_box, hx_pts) + BEV_WINDOW_MARGIN_X) span_y = 2.0 * (max(hy_box, hy_pts) + BEV_WINDOW_MARGIN_Y) span_x = float(np.clip(span_x, BEV_WINDOW_MIN_X, BEV_WINDOW_MAX_X)) span_y = float(np.clip(span_y, BEV_WINDOW_MIN_Y, BEV_WINDOW_MAX_Y)) x_min = cx - span_x / 2.0 x_max = cx + span_x / 2.0 y_min = cy - span_y / 2.0 y_max = cy + span_y / 2.0 return x_min, x_max, y_min, y_max
def make_bev_image(points: Optional[np.ndarray],
pcdet_top: Optional[Det3D],
fused_top: Optional[Det3D]) -> np.ndarray:
pts_xyz = points[:, :3] if (points is not None and points.shape[0] > 0) else None
x_min, x_max, y_min, y_max = compute_bev_window(pts_xyz, pcdet_top, fused_top)
textres = BEV_RESOLUTION W = max(160, int(math.ceil((y_max - y_min) / res))) H = max(160, int(math.ceil((x_max - x_min) / res))) bev_data = np.zeros((H, W, 3), dtype=np.uint8) if pts_xyz is not None and pts_xyz.shape[0] > 0: pts = _sample_if_too_many(pts_xyz, BEV_MAX_RENDER_POINTS) mask = (pts[:, 0] >= x_min) & (pts[:, 0] <= x_max) & (pts[:, 1] >= y_min) & (pts[:, 1] <= y_max) ptsw = pts[mask] if ptsw.shape[0] > 0: rr = ((x_max - ptsw[:, 0]) / res).astype(np.int32) cc = ((ptsw[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) bev_data = rasterize_density(rr, cc, H, W, gamma=BEV_DENSITY_GAMMA, dilate_ksize=BEV_DILATE_KSIZE, use_clahe=BEV_USE_CLAHE) bev = np.zeros((H + BEV_HEADER_H, W, 3), dtype=np.uint8) bev[:BEV_HEADER_H, :] = (16, 16, 16) bev[BEV_HEADER_H:, :] = bev_data draw_text_box_fit(bev, 8, 8, "BEV (local X-Y)", (255, 255, 255), font_scale=HEADER_FONT_SCALE, thickness=HEADER_TEXT_THICKNESS, bg=(16,16,16)) draw_text_box_fit(bev, 8, 38, f"X[{x_min:.1f},{x_max:.1f}] Y[{y_min:.1f},{y_max:.1f}] res={res:.2f}", (220, 220, 220), font_scale=0.55, thickness=2, bg=(16,16,16)) y_line = 58 if pcdet_top is not None: draw_text_box_fit(bev, 8, y_line, f"PCDet: {pcdet_top.cls_name} {pcdet_top.score:.2f}", COL_PCDET, font_scale=0.60, thickness=2, bg=(16,16,16)) y_line += 22 if fused_top is not None: draw_text_box_fit(bev, 8, y_line, f"FUSED: {fused_top.cls_name} {fused_top.score:.2f}", COL_FUSED, font_scale=0.60, thickness=2, bg=(16,16,16)) def draw_box_bev(det: Det3D, color, dashed: bool, thick: int, center_size: int): x, y, z, dx, dy, dz, yaw = det.box7.tolist() cyaw = math.cos(yaw); syaw = math.sin(yaw) local = np.array([[ dx/2, dy/2], [ dx/2, -dy/2], [-dx/2, -dy/2], [-dx/2, dy/2]], dtype=np.float32) R = np.array([[cyaw, -syaw], [syaw, cyaw]], dtype=np.float32) corners_xy = (local @ R.T) + np.array([x, y], dtype=np.float32) rr = ((x_max - corners_xy[:, 0]) / res).astype(np.int32) cc = ((corners_xy[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) pts_draw = np.stack([cc, rr], axis=1).astype(np.int32) pts_draw[:, 1] += BEV_HEADER_H if dashed: draw_dashed_poly(bev, pts_draw, color, thickness=thick, dash_len=DASH_LEN, gap_len=DASH_GAP) else: cv2.polylines(bev, [pts_draw.reshape(-1, 1, 2)], True, color, thick, cv2.LINE_AA) rc = int((x_max - x) / res) cc0 = int((y - y_min) / res) rc = int(np.clip(rc, 0, H - 1)) + BEV_HEADER_H cc0 = int(np.clip(cc0, 0, W - 1)) draw_center_marker_cross(bev, cc0, rc, color, size=center_size, thickness=1) # draw order: fused first, then pcdet dashed on top if fused_top is not None: draw_box_bev(fused_top, COL_FUSED, dashed=False, thick=LINE_THICK_FUSED, center_size=2) if pcdet_top is not None: draw_box_bev(pcdet_top, COL_PCDET, dashed=True, thick=LINE_THICK_PCDET, center_size=2) return bev
def compute_front_window(points_xyz: Optional[np.ndarray],
pcdet_top: Optional[Det3D],
fused_top: Optional[Det3D]) -> Tuple[float, float, float, float, float, float]:
"""
return x_min,x_max,z_min,z_max, y_center, slab_half
"""
dets = [d for d in [pcdet_top, fused_top] if d is not None]
if len(dets) == 0:
return 0, 1, 0, 1, 0.0, FRONT_SLAB_MIN_HALF
textcenters = np.array([[d.box7[0], d.box7[1], d.box7[2], d.box7[4]] for d in dets], dtype=np.float32) cx = float(np.mean(centers[:, 0])) cy = float(np.mean(centers[:, 1])) cz = float(np.mean(centers[:, 2])) dy_ref = float(np.max(centers[:, 3])) # use max dy to decide slab thickness slab_half = float(np.clip(max(dy_ref / 2.0 + FRONT_SLAB_Y_PAD, FRONT_SLAB_MIN_HALF), FRONT_SLAB_MIN_HALF, FRONT_SLAB_MAX_HALF)) # box extents in x/z boxes_np = np.stack([d.box7 for d in dets], axis=0).astype(np.float32) corners = boxes3d_to_corners(boxes_np) xs = corners[:, :, 0].reshape(-1) zs = corners[:, :, 2].reshape(-1) hx_box = float(np.max(np.abs(xs - cx))) hz_box = float(np.max(np.abs(zs - cz))) hx_pts = 0.0 hz_pts = 0.0 if points_xyz is not None and points_xyz.shape[0] > 0: pts = _sample_if_too_many(points_xyz, FRONT_MAX_RENDER_POINTS) # slab filter in y, then compute quantiles in abs distance mslab = np.abs(pts[:, 1] - cy) <= slab_half pts = pts[mslab] if pts.shape[0] > 0: ax = np.abs(pts[:, 0] - cx) az = np.abs(pts[:, 2] - cz) hx_pts = float(np.quantile(ax, FRONT_PTS_QUANTILE)) hz_pts = float(np.quantile(az, FRONT_PTS_QUANTILE)) span_x = 2.0 * (max(hx_box, hx_pts) + FRONT_WINDOW_MARGIN_X) span_z = 2.0 * (max(hz_box, hz_pts) + FRONT_WINDOW_MARGIN_Z) span_x = float(np.clip(span_x, FRONT_WINDOW_MIN_X, FRONT_WINDOW_MAX_X)) span_z = float(np.clip(span_z, FRONT_WINDOW_MIN_Z, FRONT_WINDOW_MAX_Z)) x_min = cx - span_x / 2.0 x_max = cx + span_x / 2.0 z_min = cz - span_z / 2.0 z_max = cz + span_z / 2.0 return x_min, x_max, z_min, z_max, cy, slab_half
def make_front_xz_crop(points: Optional[np.ndarray],
pcdet_top: Optional[Det3D],
fused_top: Optional[Det3D]) -> np.ndarray:
base = np.zeros((FRONT_MIN_HPX, FRONT_MIN_WPX, 3), dtype=np.uint8)
draw_text_box_fit(base, 6, 8, "FRONT (X-Z)", (255, 255, 255), font_scale=0.7, thickness=2)
if points is None or points.shape[0] == 0 or (pcdet_top is None and fused_top is None):
draw_text_box_fit(base, 6, 38, "no points / no det", (220, 220, 220), font_scale=0.6, thickness=2)
return base
textpts_xyz = points[:, :3] x_min, x_max, z_min, z_max, y_center, slab_half = compute_front_window(pts_xyz, pcdet_top, fused_top) # slab + window filter pts = _sample_if_too_many(pts_xyz, FRONT_MAX_RENDER_POINTS) m = (np.abs(pts[:, 1] - y_center) <= slab_half) & \ (pts[:, 0] >= x_min) & (pts[:, 0] <= x_max) & \ (pts[:, 2] >= z_min) & (pts[:, 2] <= z_max) crop = pts[m] if crop.shape[0] == 0: crop = pts[np.abs(pts[:, 1] - y_center) <= slab_half] crop = _sample_if_too_many(crop, FRONT_MAX_RENDER_POINTS) res = FRONT_RESOLUTION W = max(FRONT_MIN_WPX, int(math.ceil((x_max - x_min) / res))) H = max(FRONT_MIN_HPX, int(math.ceil((z_max - z_min) / res))) cc = ((crop[:, 0] - x_min) / res).astype(np.int32) rr = ((z_max - crop[:, 2]) / res).astype(np.int32) cc = np.clip(cc, 0, W - 1) rr = np.clip(rr, 0, H - 1) front_data = rasterize_density(rr, cc, H, W, gamma=FRONT_DENSITY_GAMMA, dilate_ksize=FRONT_DILATE_KSIZE, use_clahe=FRONT_USE_CLAHE) front = np.zeros((H + FRONT_HEADER_H, W, 3), dtype=np.uint8) front[:FRONT_HEADER_H, :] = (16, 16, 16) front[FRONT_HEADER_H:, :] = front_data draw_text_box_fit(front, 8, 8, "FRONT (X-Z) slab-compress", (255, 255, 255), font_scale=HEADER_FONT_SCALE, thickness=HEADER_TEXT_THICKNESS, bg=(16,16,16)) draw_text_box_fit(front, 8, 38, f"X[{x_min:.1f},{x_max:.1f}] Z[{z_min:.1f},{z_max:.1f}] slab|y-{y_center:.1f}|<={slab_half:.1f}m res={res:.3f}", (220, 220, 220), font_scale=0.55, thickness=2, bg=(16,16,16)) y_line = 60 if pcdet_top is not None: draw_text_box_fit(front, 8, y_line, f"PCDet: {pcdet_top.cls_name} {pcdet_top.score:.2f}", COL_PCDET, font_scale=0.60, thickness=2, bg=(16,16,16)) y_line += 22 if fused_top is not None: draw_text_box_fit(front, 8, y_line, f"FUSED: {fused_top.cls_name} {fused_top.score:.2f}", COL_FUSED, font_scale=0.60, thickness=2, bg=(16,16,16)) def draw_det_xz(det: Det3D, color, dashed: bool, thick: int, center_size: int): corners1 = boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] xs1 = corners1[:, 0] zs1 = corners1[:, 2] x1, x2 = float(xs1.min()), float(xs1.max()) z1, z2 = float(zs1.min()), float(zs1.max()) x1p = int((x1 - x_min) / res) x2p = int((x2 - x_min) / res) y1p = int((z_max - z2) / res) y2p = int((z_max - z1) / res) x1p = int(np.clip(x1p, 0, W - 1)) x2p = int(np.clip(x2p, 0, W - 1)) y1p = int(np.clip(y1p, 0, H - 1)) y2p = int(np.clip(y2p, 0, H - 1)) y1o = y1p + FRONT_HEADER_H y2o = y2p + FRONT_HEADER_H box = [x1p, y1o, x2p, y2o] if dashed: draw_dashed_rect(front, box, color, thickness=thick, dash_len=DASH_LEN, gap_len=DASH_GAP) else: cv2.rectangle(front, (x1p, y1o), (x2p, y2o), color, thick, cv2.LINE_AA) cxp = int(np.clip(0.5 * (x1p + x2p), 0, W - 1)) cyp = int(np.clip(0.5 * (y1o + y2o), FRONT_HEADER_H, H + FRONT_HEADER_H - 1)) draw_center_marker_cross(front, cxp, cyp, color, size=center_size, thickness=1) # draw order: fused first, pcdet dashed on top if fused_top is not None: draw_det_xz(fused_top, COL_FUSED, dashed=False, thick=LINE_THICK_FUSED, center_size=1) # red中心更小 if pcdet_top is not None: draw_det_xz(pcdet_top, COL_PCDET, dashed=True, thick=LINE_THICK_PCDET, center_size=2) return front
def _put_text_center(img, cx, y, text, color, font_scale, thickness):
(tw, th), _ = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, font_scale, thickness)
x = int(cx - tw / 2)
cv2.putText(img, text, (x, y + th), cv2.FONT_HERSHEY_SIMPLEX, font_scale, color, thickness, cv2.LINE_AA)
def render_legend_bar(total_w: int) -> np.ndarray:
bar = np.zeros((LEGEND_BAR_H, total_w, 3), dtype=np.uint8)
bar[:] = (18, 18, 18)
text# item layout (centered) box_w, box_h = 78, 44 item_w = 260 gap = 80 group_w = 3 * item_w + 2 * gap x0 = max(12, int((total_w - group_w) / 2)) y_box = 22 y_text1 = 78 y_text2 = 112 def draw_item(ix, color, dashed, title): cx = x0 + ix * (item_w + gap) + item_w // 2 bx1 = int(cx - box_w / 2) by1 = y_box bx2 = int(cx + box_w / 2) by2 = y_box + box_h if dashed: draw_dashed_rect(bar, [bx1, by1, bx2, by2], color, thickness=LINE_THICK_PCDET, dash_len=DASH_LEN, gap_len=DASH_GAP) else: cv2.rectangle(bar, (bx1, by1), (bx2, by2), color, LINE_THICK_YOLO, cv2.LINE_AA) _put_text_center(bar, cx, y_text1, title, (245, 245, 245), font_scale=LEGEND_FONT_MAIN, thickness=LEGEND_THICKNESS) _put_text_center(bar, cx, y_text2, "label: cls conf", (220, 220, 220), font_scale=LEGEND_FONT_SUB, thickness=2) draw_item(0, COL_YOLO, dashed=False, title="YOLO (2D)") draw_item(1, COL_PCDET, dashed=True, title="PCDet (3D)") draw_item(2, COL_FUSED, dashed=False, title="FUSED") cv2.line(bar, (0, 0), (total_w - 1, 0), (70, 70, 70), 2) return bar
def top1_yolo(dets: List[Det2D]) -> Optional[Det2D]:
return max(dets, key=lambda d: d.score) if dets else None
def top1_3d(dets: List[Det3D]) -> Optional[Det3D]:
return max(dets, key=lambda d: d.score) if dets else None
def make_debug_vis(frame_id: str, img_bgr: np.ndarray,
yolo_top: Optional[Det2D],
pcdet_top: Optional[Det3D],
fused_top: Optional[Det3D],
raw_points: Optional[np.ndarray],
out_path: str):
vis = img_bgr.copy()
H, W = vis.shape[:2]
textdraw_text_box_fit(vis, 12, H - 42, f"frame: {frame_id}", (0, 220, 220), font_scale=0.72, thickness=2) # YOLO (green solid) if yolo_top is not None: b = clip_box_xyxy(yolo_top.xyxy, W, H) x1, y1, x2, y2 = [int(round(v)) for v in b] cv2.rectangle(vis, (x1, y1), (x2, y2), COL_YOLO, LINE_THICK_YOLO, cv2.LINE_AA) draw_text_box_fit(vis, x1, y1 - 30, f"YOLO {yolo_top.cls_name} {yolo_top.score:.2f}", COL_YOLO, font_scale=DET_FONT_SCALE, thickness=DET_TEXT_THICKNESS) # FUSED (red solid) draw first if fused_top is not None and fused_top.proj_xyxy is not None: b = clip_box_xyxy(fused_top.proj_xyxy, W, H) x1, y1, x2, y2 = [int(round(v)) for v in b] cv2.rectangle(vis, (x1, y1), (x2, y2), COL_FUSED, LINE_THICK_FUSED, cv2.LINE_AA) draw_text_box_fit(vis, x2 - 10, y1 - 30, f"FUSED {fused_top.cls_name} {fused_top.score:.2f}", COL_FUSED, font_scale=DET_FONT_SCALE, thickness=DET_TEXT_THICKNESS) # PCDet (blue dashed) on top to guarantee visibility if pcdet_top is not None and pcdet_top.proj_xyxy is not None: b = clip_box_xyxy(pcdet_top.proj_xyxy, W, H) draw_dashed_rect(vis, b, COL_PCDET, thickness=LINE_THICK_PCDET, dash_len=DASH_LEN, gap_len=DASH_GAP) draw_text_box_fit(vis, int(b[0]), int(b[3]) + 6, f"PCDet {pcdet_top.cls_name} {pcdet_top.score:.2f}", COL_PCDET, font_scale=DET_FONT_SCALE, thickness=DET_TEXT_THICKNESS) # panels bev = make_bev_image(raw_points, pcdet_top, fused_top) front = make_front_xz_crop(raw_points, pcdet_top, fused_top) bev_w = max(160, int(W * BEV_PANEL_WIDTH_RATIO)) front_w = max(160, int(W * FRONT_PANEL_WIDTH_RATIO)) bev_r = fit_to_rect(bev, bev_w, H, pad_color=(0, 0, 0)) front_r = fit_to_rect(front, front_w, H, pad_color=(0, 0, 0)) top_canvas = cv2.hconcat([vis, bev_r, front_r]) legend = render_legend_bar(top_canvas.shape[1]) canvas = cv2.vconcat([top_canvas, legend]) cv2.imwrite(out_path, canvas)
def iou3d_matrix_cpu(boxes_a: np.ndarray, boxes_b: np.ndarray) -> np.ndarray:
N, M = boxes_a.shape[0], boxes_b.shape[0]
if N == 0 or M == 0:
return np.zeros((N, M), dtype=np.float32)
textfrom pcdet.ops.iou3d_nms import iou3d_nms_utils ta = torch.from_numpy(boxes_a).float() tb = torch.from_numpy(boxes_b).float() bev_iou = iou3d_nms_utils.boxes_bev_iou_cpu(ta, tb).cpu().numpy().astype(np.float32) area_a = (boxes_a[:, 3] * boxes_a[:, 4]).reshape(N, 1) area_b = (boxes_b[:, 3] * boxes_b[:, 4]).reshape(1, M) inter_area = np.where( bev_iou > 0, bev_iou * (area_a + area_b) / (1.0 + bev_iou + 1e-9), 0.0 ).astype(np.float32) zmax_a = boxes_a[:, 2] + boxes_a[:, 5] / 2.0 zmin_a = boxes_a[:, 2] - boxes_a[:, 5] / 2.0 zmax_b = boxes_b[:, 2] + boxes_b[:, 5] / 2.0 zmin_b = boxes_b[:, 2] - boxes_b[:, 5] / 2.0 overlap_h = np.maximum( 0.0, np.minimum(zmax_a.reshape(N, 1), zmax_b.reshape(1, M)) - np.maximum(zmin_a.reshape(N, 1), zmin_b.reshape(1, M)) ).astype(np.float32) inter_vol = inter_area * overlap_h vol_a = (boxes_a[:, 3] * boxes_a[:, 4] * boxes_a[:, 5]).reshape(N, 1) vol_b = (boxes_b[:, 3] * boxes_b[:, 4] * boxes_b[:, 5]).reshape(1, M) union_vol = vol_a + vol_b - inter_vol + 1e-9 return (inter_vol / union_vol).astype(np.float32)
def compute_ap_from_pr(rec: np.ndarray, prec: np.ndarray) -> float:
if rec.size == 0:
return 0.0
mrec = np.concatenate(([0.0], rec, [1.0]))
mpre = np.concatenate(([0.0], prec, [0.0]))
for i in range(mpre.size - 1, 0, -1):
mpre[i-1] = max(mpre[i-1], mpre[i])
idx = np.where(mrec[1:] != mrec[:-1])[0]
ap = np.sum((mrec[idx+1] - mrec[idx]) * mpre[idx+1])
return float(ap)
def eval_3d_map_mar(
all_dets: Dict[str, List[Tuple[str, np.ndarray, float]]],
all_gts: Dict[str, Dict[str, List[np.ndarray]]],
class_names: List[str],
iou_thr: float
) -> Dict:
results = {"iou_thr": iou_thr, "per_class": {}, "mAP": 0.0, "mAR": 0.0}
aps, ars = [], []
textfor cls in class_names: dets = all_dets.get(cls, []) gts_by_frame = all_gts.get(cls, {}) npos = sum(len(v) for v in gts_by_frame.values()) if npos == 0: results["per_class"][cls] = {"AP": None, "AR": None, "nGT": 0, "nDet": len(dets)} continue dets_sorted = sorted(dets, key=lambda x: x[2], reverse=True) tp = np.zeros((len(dets_sorted),), dtype=np.float32) fp = np.zeros((len(dets_sorted),), dtype=np.float32) matched = {fid: np.zeros((len(gts_by_frame[fid]),), dtype=bool) for fid in gts_by_frame.keys()} for i, (fid, box, score) in enumerate(dets_sorted): gt_list = gts_by_frame.get(fid, []) if len(gt_list) == 0: fp[i] = 1.0 continue gt_boxes = np.stack(gt_list, axis=0).astype(np.float32) det_box = box.reshape(1, 7).astype(np.float32) ious = iou3d_matrix_cpu(det_box, gt_boxes).reshape(-1) jmax = int(np.argmax(ious)) if ious[jmax] >= iou_thr and (not matched[fid][jmax]): tp[i] = 1.0 matched[fid][jmax] = True else: fp[i] = 1.0 tp_cum = np.cumsum(tp) fp_cum = np.cumsum(fp) rec = tp_cum / float(npos) prec = tp_cum / np.maximum(tp_cum + fp_cum, 1e-9) ap = compute_ap_from_pr(rec, prec) ar = float(rec[-1]) if rec.size > 0 else 0.0 results["per_class"][cls] = {"AP": ap, "AR": ar, "nGT": int(npos), "nDet": int(len(dets_sorted))} aps.append(ap); ars.append(ar) results["mAP"] = float(np.mean(aps)) if len(aps) > 0 else 0.0 results["mAR"] = float(np.mean(ars)) if len(ars) > 0 else 0.0 return results
def parse_pcdet_gt_label(txt_path: str) -> List[Det3D]:
dets = []
if not os.path.exists(txt_path):
return dets
with open(txt_path, "r", encoding="utf-8") as f:
lines = [ln.strip() for ln in f.readlines() if ln.strip()]
for ln in lines:
parts = ln.split()
if len(parts) < 8:
continue
nums = list(map(float, parts[:7]))
cls_name = canonical_class(parts[7])
dets.append(Det3D(box7=np.array(nums, dtype=np.float32), cls_name=cls_name, score=1.0))
return dets
def _extract_model_state(ckpt_obj):
if isinstance(ckpt_obj, dict):
if "model_state" in ckpt_obj:
return ckpt_obj["model_state"]
if "state_dict" in ckpt_obj:
return ckpt_obj["state_dict"]
return ckpt_obj
def _override_pcdet_score_thresh(cfg, thr: float = 0.0):
try:
if hasattr(cfg.MODEL, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.POST_PROCESSING:
cfg.MODEL.POST_PROCESSING.SCORE_THRESH = float(thr)
except Exception:
pass
try:
if hasattr(cfg.MODEL, "DENSE_HEAD") and hasattr(cfg.MODEL.DENSE_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.DENSE_HEAD.POST_PROCESSING:
cfg.MODEL.DENSE_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr)
except Exception:
pass
try:
if hasattr(cfg.MODEL, "ROI_HEAD") and hasattr(cfg.MODEL.ROI_HEAD, "POST_PROCESSING") and "SCORE_THRESH" in cfg.MODEL.ROI_HEAD.POST_PROCESSING:
cfg.MODEL.ROI_HEAD.POST_PROCESSING.SCORE_THRESH = float(thr)
except Exception:
pass
def _ensure_map_to_bev_proj_bn_registered(model, state_dict, device):
proj_w_key = "map_to_bev_module.proj.weight"
bn_w_key = "map_to_bev_module.bn.weight"
if proj_w_key not in state_dict or bn_w_key not in state_dict:
return
mtb = getattr(model, "map_to_bev_module", None)
if mtb is None:
return
if isinstance(getattr(mtb, "proj", None), nn.Module) and isinstance(getattr(mtb, "bn", None), nn.Module):
return
textw = state_dict[proj_w_key] out_ch, in_ch, kH, kW = w.shape use_bias = ("map_to_bev_module.proj.bias" in state_dict) padding = (kH // 2, kW // 2) if (kH > 1 or kW > 1) else (0, 0) mtb.proj = nn.Conv2d(in_ch, out_ch, kernel_size=(kH, kW), stride=1, padding=padding, bias=use_bias).to(device) mtb.bn = nn.BatchNorm2d(out_ch, eps=1e-3, momentum=0.01).to(device) print(f"[PCDet][COMPAT] Registered map_to_bev_module.proj/bn: Conv2d({in_ch}->{out_ch}, k={kH}x{kW})")
def load_yolo_model(weights_path: str):
try:
from ultralytics import YOLO
except Exception:
sys.path.insert(0, ULTRALYTICS_REPO)
from ultralytics import YOLO
return YOLO(weights_path)
def load_pcdet_model(cfg_path: str, ckpt_path: str, device: torch.device):
sys.path.insert(0, OPENPCDET_REPO)
from pcdet.config import cfg, cfg_from_yaml_file
from pcdet.datasets import DatasetTemplate
from pcdet.models import build_network, load_data_to_gpu
from pcdet.utils import common_utils
textclass DemoDataset(DatasetTemplate): def __init__(self, dataset_cfg, class_names, root_path, ext=".bin", logger=None): super().__init__(dataset_cfg=dataset_cfg, class_names=class_names, training=False, root_path=root_path, logger=logger) self.root_path = Path(root_path) self.ext = ext self.points_dir = self.root_path / "points" self.sample_file_list = sorted(glob.glob(str(self.points_dir / f"*{self.ext}"))) def __len__(self): return len(self.sample_file_list) def __getitem__(self, index): p = Path(self.sample_file_list[index]) points = np.fromfile(str(p), dtype=np.float32).reshape(-1, 4) input_dict = {"points": points, "frame_id": p.stem} data_dict = self.prepare_data(data_dict=input_dict) return data_dict logger = common_utils.create_logger() cfg_from_yaml_file(cfg_path, cfg) _override_pcdet_score_thresh(cfg, thr=0.0) dataset_root = str(Path(PCDET_POINTS_DIR).parent) dataset = DemoDataset(cfg.DATA_CONFIG, cfg.CLASS_NAMES, dataset_root, ext=".bin", logger=logger) model = build_network(model_cfg=cfg.MODEL, num_class=len(cfg.CLASS_NAMES), dataset=dataset) model.to(device).eval() ckpt_obj = torch.load(ckpt_path, map_location="cpu") state = _extract_model_state(ckpt_obj) _ensure_map_to_bev_proj_bn_registered(model, state, device) ret = model.load_state_dict(state, strict=False) print(f"[PCDet] load_state_dict done. missing={len(ret.missing_keys)} unexpected={len(ret.unexpected_keys)}") return cfg, dataset, model, load_data_to_gpu
@torch.no_grad()
def infer_yolo(yolo_model, img_bgr: np.ndarray) -> List[Det2D]:
img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
results = yolo_model(img_rgb, conf=YOLO_CONF_LOW, iou=0.7, verbose=False)
r = results[0]
dets: List[Det2D] = []
if r.boxes is None or len(r.boxes) == 0:
return dets
xyxy = r.boxes.xyxy.detach().cpu().numpy()
conf = r.boxes.conf.detach().cpu().numpy()
cls = r.boxes.cls.detach().cpu().numpy().astype(int)
for b, s, c in zip(xyxy, conf, cls):
if float(s) < YOLO_CONF_LOW:
continue
cls_name = CLASSES[c] if 0 <= c < len(CLASSES) else str(c)
dets.append(Det2D(xyxy=b.tolist(), cls_name=canonical_class(cls_name), score=float(s)))
dets = sorted(dets, key=lambda d: d.score, reverse=True)[:YOLO_KEEP_TOPK]
return dets
@torch.no_grad()
def infer_pcdet(cfg, dataset, model, load_data_to_gpu_fn, index: int, device: torch.device):
data_dict = dataset[index]
frame_id = data_dict["frame_id"]
textbin_path = str(Path(PCDET_POINTS_DIR) / f"{frame_id}.bin") raw_points = np.fromfile(bin_path, dtype=np.float32).reshape(-1, 4) if os.path.exists(bin_path) else None batch = dataset.collate_batch([data_dict]) if device.type == "cuda": load_data_to_gpu_fn(batch) pred_dicts, _ = model.forward(batch) pred = pred_dicts[0] boxes = pred["pred_boxes"].detach().cpu().numpy().astype(np.float32) scores = pred["pred_scores"].detach().cpu().numpy().astype(np.float32) labels = pred["pred_labels"].detach().cpu().numpy().astype(np.int32) label_base = 1 if labels.size > 0 and labels.min() == 0: label_base = 0 dets3d: List[Det3D] = [] for b, s, lb in zip(boxes, scores, labels): if float(s) < PCDET_CONF_LOW: continue idx_cls = int(lb) - 1 if label_base == 1 else int(lb) cls_name = cfg.CLASS_NAMES[idx_cls] if 0 <= idx_cls < len(cfg.CLASS_NAMES) else str(int(lb)) dets3d.append(Det3D(box7=b.copy(), cls_name=canonical_class(cls_name), score=float(s))) return frame_id, raw_points, dets3d
def fuse_frame(yolo_dets: List[Det2D],
pcdet_dets: List[Det3D],
img_w: int, img_h: int,
calib: Dict, T_lidar2cam: np.ndarray,
raw_points: Optional[np.ndarray]) -> List[Det3D]:
if len(pcdet_dets) > 0:
boxes7 = np.stack([d.box7 for d in pcdet_dets], axis=0)
proj = project_boxes3d_to_2d(boxes7, calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION)
for d, p in zip(pcdet_dets, proj):
d.proj_xyxy = p
text_, p2y = associate_yolo_pcdet(yolo_dets, pcdet_dets, img_w, img_h) fused: List[Det3D] = [] for pj, pdet in enumerate(pcdet_dets): if pj in p2y: ydet = yolo_dets[p2y[pj]] if pdet.proj_xyxy is None: continue q, _ = match_quality_2d(ydet.xyxy, pdet.proj_xyxy) fused_cls, fused_score = fuse_with_quality(ydet, pdet, q) if fused_score >= FUSED_KEEP_THRESH: box7 = pdet.box7.copy() box7_ref = refine_box_by_points_robust(raw_points, box7, q) proj_ref = project_boxes3d_to_2d(box7_ref.reshape(1, 7), calib, T_lidar2cam, img_w, img_h, use_distortion=USE_DISTORTION) proj_xyxy = proj_ref[0] if (len(proj_ref) > 0 and proj_ref[0] is not None) else pdet.proj_xyxy fused.append(Det3D( box7=box7_ref.copy(), cls_name=canonical_class(fused_cls), score=float(fused_score), proj_xyxy=proj_xyxy )) else: if pdet.score >= PCDET_CONF_HIGH_UNMATCHED: fused.append(Det3D( box7=pdet.box7.copy(), cls_name=pdet.cls_name, score=float(pdet.score), proj_xyxy=pdet.proj_xyxy )) return fused
def load_models():
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[INFO] device={device}")
textprint("[INFO] Loading YOLO...") yolo_model = load_yolo_model(YOLO_WEIGHTS) print("[INFO] Loading PCDet...") cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_pcdet_model(PCDET_CFG, PCDET_CKPT, device) print(f"[PCDet] CLASS_NAMES = {list(cfg.CLASS_NAMES)}") return device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn
def main():
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
ensure_dir(DEBUG_DIR)
textdevice, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = load_models() bin_files = sorted(glob.glob(os.path.join(PCDET_POINTS_DIR, "*.bin"))) frame_ids_all = [Path(p).stem for p in bin_files] valid_ids = [fid for fid in frame_ids_all if find_image_for_frame(fid) is not None] if not valid_ids: print("[ERROR] No matching (pointcloud, image) pairs found.") return eval_ids = valid_ids if (NUM_EVAL_FRAMES is None) else random.sample(valid_ids, k=min(NUM_EVAL_FRAMES, len(valid_ids))) debug_ids = random.sample(valid_ids, k=min(NUM_DEBUG_FRAMES, len(valid_ids))) idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} all_dets_pcd: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_dets_fus: Dict[str, List[Tuple[str, np.ndarray, float]]] = {c: [] for c in CLASSES} all_gts: Dict[str, Dict[str, List[np.ndarray]]] = {c: {} for c in CLASSES} chosen_T = None # ---------- Pass 1: eval collect ---------- print(f"[EVAL] collecting on {len(eval_ids)} frames ...") for k, fid in enumerate(eval_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T, raw_points) if (k % PROGRESS_EVERY) == 0: print(f"[EVAL] {k}/{len(eval_ids)} frame={fid} | yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused_dets)}") for d in pcdet_dets: c = canonical_class(d.cls_name) if c in all_dets_pcd and d.score >= EVAL_SCORE_THR_PCD: all_dets_pcd[c].append((fid, d.box7.copy(), float(d.score))) for d in fused_dets: c = canonical_class(d.cls_name) if c in all_dets_fus and d.score >= EVAL_SCORE_THR_FUSED: all_dets_fus[c].append((fid, d.box7.copy(), float(d.score))) gt_path = os.path.join(PCDET_GT_LABEL_DIR, f"{fid}.txt") gt_dets = parse_pcdet_gt_label(gt_path) for gd in gt_dets: c = canonical_class(gd.cls_name) if c in all_gts: all_gts[c].setdefault(fid, []).append(gd.box7.copy()) # ---------- Pass 2: debug visualization ---------- print(f"[DEBUG] saving {len(debug_ids)} visualizations into {DEBUG_DIR}/ ...") for k, fid in enumerate(debug_ids): if fid not in idx_map: continue img_path = find_image_for_frame(fid) if img_path is None: continue img = cv2.imread(img_path) if img is None: continue h, w = img.shape[:2] yolo_dets = infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if chosen_T is None: pts_xyz = raw_points[:, :3] if raw_points is not None else None chosen_T = get_extrinsic_matrix(CALIB, pts_xyz, w, h) fused_dets = fuse_frame(yolo_dets, pcdet_dets, w, h, CALIB, chosen_T, raw_points) y_top = top1_yolo(yolo_dets) p_top = top1_3d(pcdet_dets) f_top = top1_3d(fused_dets) out_path = os.path.join(DEBUG_DIR, f"{fid}.jpg") make_debug_vis(fid, img, y_top, p_top, f_top, raw_points, out_path) if (k % 5) == 0: print(f"[DEBUG] {k}/{len(debug_ids)} saved {out_path}") # ---------- Print metrics ---------- print("\n================= 3D Evaluation =================") print(f"[EvalFrames] {len(eval_ids)} | PCD_thr={EVAL_SCORE_THR_PCD} | FUSED_thr={EVAL_SCORE_THR_FUSED}") for thr in EVAL_IOU_THRESHOLDS: res_p = eval_3d_map_mar(all_dets_pcd, all_gts, CLASSES, iou_thr=thr) res_f = eval_3d_map_mar(all_dets_fus, all_gts, CLASSES, iou_thr=thr) print(f"\n[IOU={thr:.2f}] PCDet baseline: mAP={res_p['mAP']:.4f}, mAR={res_p['mAR']:.4f}") for c in CLASSES: pc = res_p["per_class"].get(c, {}) if pc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={pc.get('nDet', 0)}") else: print(f" - {c:14s} AP={pc['AP']:.4f} AR={pc['AR']:.4f} nGT={pc['nGT']} nDet={pc['nDet']}") print(f"[IOU={thr:.2f}] FUSED: mAP={res_f['mAP']:.4f}, mAR={res_f['mAR']:.4f}") for c in CLASSES: fc = res_f["per_class"].get(c, {}) if fc.get("AP") is None: print(f" - {c:14s} AP=None AR=None nGT=0 nDet={fc.get('nDet', 0)}") else: print(f" - {c:14s} AP={fc['AP']:.4f} AR={fc['AR']:.4f} nGT={fc['nGT']} nDet={fc['nDet']}") print(f"\n[INFO] Done. Debug images saved in: {DEBUG_DIR}/")
if name == "main":
main()
我按你给的 fusion.py 思路,单独写了一个“限界构建 + 入侵判定 + 可视化”脚本:它只处理一帧(一组点云+图像),并且:
C_equipment)c=50mm、竖向 h2=200mm(你还可再调)这些剖面的高度范围与文档里“车辆最大宽度 2300–2500mm、车体底部距轨面高度 3400–3700mm”等量级一致,方便你后续按车型再调。
已为你生成文件(放在和 fusion.py 同目录运行即可):
1)确保 fusion.py 和本脚本在同一目录(你的 fusion.py 就是你发的那份)
2)直接跑(默认从 .../data/custom/ImageSets/test.txt 取第一个可用 frame):
bashpython clearance_detect.py
3)指定某一帧:
bashpython clearance_detect.py --frame_id 000123
4)切换限界剖面(A/B/C + vehicle/equipment):
bashpython clearance_detect.py --profile C_equipment python clearance_detect.py --profile A_vehicle python clearance_detect.py --profile B_equipment
5)切换“建筑限界”扩张模式(会在设备限界上额外加横向/竖向裕度):
bashpython clearance_detect.py --mode building
输出:
clearance_vis/<frame_id>_clearance.jpgpython#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Clearance / Intrusion Check (限界检测) for ONE frame 本脚本基于你现有的 fusion.py(YOLO + PCDet + FUSED)继续扩展: 1) 轨道区域提取(优先:图像 ROI + Canny + HoughLinesP,失败则回退到点云 BEV 线性结构) 2) 轨道边缘与点云匹配(把点云投影到图像,在轨道 mask 内筛选轨道点云) 3) 按《单轨设计》附录 A/B/C 的“车辆限界/设备限界坐标”构造截面,并沿轨道方向外推成 3D 限界体 4) 使用“融合后的 3D 检测框”与限界体做综合关系判断(中心点、角点落入比例、AABB 近似重叠、距离) 5) 可视化:相机叠加 + BEV(点云密度 + 限界 footprint) + 截面(t-v)视图(八边形/多边形限界) 运行: 1) 保证 fusion.py 和本脚本在同一目录 2) python clearance_detect.py 或指定 frame: python clearance_detect.py --frame_id 000123 或指定限界剖面/类型: python clearance_detect.py --profile C_equipment --mode building 输出: clearance_vis/<frame_id>_clearance.jpg 终端打印每个 fused 目标与限界关系(INTRUDE/NEAR/OUTSIDE) """ import os import sys import glob import math import argparse from pathlib import Path from typing import Dict, List, Tuple, Optional from dataclasses import dataclass import numpy as np import cv2 # 依赖 fusion.py(把你发的那份保存为 fusion.py) import fusion as F # ========================= # Config # ========================= OUT_DIR = "clearance_vis" os.makedirs(OUT_DIR, exist_ok=True) # 使用 ImageSets/test.txt 选帧(frame_id=None 时) USE_IMAGESETS = True SPLIT_NAME = "test" # "test" / "train" / "val" # --- Track extraction (image) --- TRACK_ROI_Y0_RATIO = 0.35 # ROI 从图像高度的 0.35H 到底部 TRACK_CANNY1 = 60 TRACK_CANNY2 = 180 TRACK_HOUGH_THRESHOLD = 70 TRACK_HOUGH_MIN_LEN_RATIO = 0.18 # minLineLength = W * ratio TRACK_HOUGH_MAX_GAP = 35 TRACK_ANGLE_MIN_DEG = 20 # 过滤近水平线 TRACK_ANGLE_MAX_DEG = 160 TRACK_POLY_EXPAND_PX = 18 # polygon 外扩像素 # --- Track extraction fallback (point cloud) --- FALLBACK_X_RANGE = (0.0, 80.0) FALLBACK_Y_ABS = 6.0 FALLBACK_Z_TOP_Q = 0.95 FALLBACK_Z_BAND = 0.35 # --- Track frame estimation --- TRACK_S_QMIN = 0.02 TRACK_S_QMAX = 0.98 TRACK_S_MARGIN = 0.5 # m TRACK_Z_TOP_Q = 0.92 # 估计轨面/梁顶高度(轨道点云 z 的高分位) TRACK_TOP_BAND = 0.25 # m,用于估计轨道宽度时的 z band # --- Clearance profile & expand --- PROFILE_SCALE_TO_TRACK = True PROFILE_SCALE_CLAMP = (0.7, 1.35) # 建筑限界额外放大(典型:c=50mm, h2=200mm) BUILDING_EXTRA_Y = 0.05 # m BUILDING_EXTRA_Z = 0.20 # m # --- Visualization --- BEV_RES = 0.06 BEV_GAMMA = 0.55 BEV_DILATE = 3 TV_RES = 0.02 TV_GAMMA = 0.55 TV_DILATE = 3 TV_SLAB_HALF = 0.8 # m # colors COL_TRACK = (0, 255, 255) # yellow COL_ENVELOPE = (0, 215, 255) # gold-ish COL_TEXT = (245, 245, 245) # judge thresholds INTRUDE_CORNER_RATIO_THR = 0.20 NEAR_DIST_THR = 0.25 # sample points used for inside test SAMPLE_FACE_CENTERS = True # ========================= # Clearance profile tables (mm) from doc appendix A/B/C # (Y,Z) are HALF coordinates (Y>=0), polygon is closed along centerline. # We'll mirror to get full polygon in (t,v) plane. # ========================= PROFILES_MM: Dict[str, List[Tuple[float, float]]] = { "C_equipment": [ (0, 182), (938, 182), (1374, 350), (1583, 2059), (1587, 3082), (1573, 3442), (1590, 3888), (0, 3888), ], "C_vehicle": [ (0, 232), (878, 232), (1314, 400), (1493, 2059), (1480, 3047), (1460, 3392), (1470, 3828), (0, 3828), ], "A_equipment": [ (403, 227), (629, 210), (1156, 153), (1670, 1898), (1670, 2269), (1880, 3559), (1647, 3859), (0, 3735), ], "A_vehicle": [ (413, 262), (623, 254), (1133, 198), (1604, 1920), (1604, 2251), (1760, 3514), (1548, 3810), (0, 3707), ], "B_equipment": [ (392, 210), (674, 206), (1138, 143), (1665, 1912), (1665, 2247), (1830, 3549), (1620, 3840), (0, 3735), ], "B_vehicle": [ (392, 236), (656, 232), (1117, 184), (1605, 1935), (1605, 2246), (1751, 3517), (1540, 3804), (0, 3706), ], } def load_imageset_ids(txt_path: str) -> List[str]: if not os.path.exists(txt_path): return [] ids: List[str] = [] with open(txt_path, "r", encoding="utf-8") as f: for ln in f: ln = ln.strip() if not ln: continue ids.append(Path(ln).stem) return ids # ========================= # Track extraction in image (Hough) # ========================= def _line_len(x1, y1, x2, y2) -> float: return float(math.hypot(x2 - x1, y2 - y1)) def expand_quad(poly4: np.ndarray, expand_px: int, img_w: int, img_h: int) -> np.ndarray: if poly4 is None or poly4.shape[0] != 4: return poly4 c = poly4.mean(axis=0, keepdims=True) v = poly4 - c n = np.linalg.norm(v, axis=1, keepdims=True) + 1e-6 v_unit = v / n poly = poly4 + v_unit * float(expand_px) poly[:, 0] = np.clip(poly[:, 0], 0, img_w - 1) poly[:, 1] = np.clip(poly[:, 1], 0, img_h - 1) return poly def detect_track_polygon_hough(img_bgr: np.ndarray) -> Tuple[Optional[np.ndarray], Dict]: H, W = img_bgr.shape[:2] y0 = int(round(TRACK_ROI_Y0_RATIO * H)) roi = img_bgr[y0:H, :] roi_h, roi_w = roi.shape[:2] gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) gray = cv2.GaussianBlur(gray, (5, 5), 0) edges = cv2.Canny(gray, TRACK_CANNY1, TRACK_CANNY2, apertureSize=3, L2gradient=True) min_len = int(max(30, roi_w * TRACK_HOUGH_MIN_LEN_RATIO)) lines = cv2.HoughLinesP( edges, rho=1.0, theta=np.pi/180.0, threshold=TRACK_HOUGH_THRESHOLD, minLineLength=min_len, maxLineGap=TRACK_HOUGH_MAX_GAP ) info = {"roi_y0": y0, "n_lines_raw": 0 if lines is None else int(len(lines)), "left_n": 0, "right_n": 0} if lines is None or len(lines) < 4: return None, info cand = [] for (x1, y1, x2, y2) in lines.reshape(-1, 4): dx = x2 - x1 dy = y2 - y1 ang = abs(math.degrees(math.atan2(dy, dx))) if ang < TRACK_ANGLE_MIN_DEG or ang > TRACK_ANGLE_MAX_DEG: continue L = _line_len(x1, y1, x2, y2) if L < min_len: continue if abs(dy) < 1e-6: xb = 0.5 * (x1 + x2) else: t = (roi_h - 1 - y1) / float(dy) xb = x1 + t * dx cand.append((x1, y1, x2, y2, L, xb)) if len(cand) < 4: return None, info midx = roi_w * 0.5 left = [c for c in cand if c[5] < midx] right = [c for c in cand if c[5] >= midx] info["left_n"] = len(left) info["right_n"] = len(right) if len(left) < 2 or len(right) < 2: return None, info def fit_line_x_as_func_y(lines_list): ys, xs = [], [] for x1, y1, x2, y2, L, xb in lines_list: ys.extend([y1, y2]) xs.extend([x1, x2]) ys = np.array(ys, dtype=np.float32) xs = np.array(xs, dtype=np.float32) a, b = np.polyfit(ys, xs, deg=1) # x = a*y + b return float(a), float(b) aL, bL = fit_line_x_as_func_y(left) aR, bR = fit_line_x_as_func_y(right) def x_at(a, b, y): return a * float(y) + b y_top = 0.0 y_bot = float(roi_h - 1) xL_top = x_at(aL, bL, y_top) xL_bot = x_at(aL, bL, y_bot) xR_top = x_at(aR, bR, y_top) xR_bot = x_at(aR, bR, y_bot) if xL_bot > xR_bot: xL_top, xR_top = xR_top, xL_top xL_bot, xR_bot = xR_bot, xL_bot poly = np.array([ [xL_top, y_top + y0], [xR_top, y_top + y0], [xR_bot, y_bot + y0], [xL_bot, y_bot + y0], ], dtype=np.float32) poly_exp = expand_quad(poly, expand_px=TRACK_POLY_EXPAND_PX, img_w=W, img_h=H) info["poly"] = poly_exp.tolist() info["fit"] = {"aL": aL, "bL": bL, "aR": aR, "bR": bR} return poly_exp.astype(np.float32), info # ========================= # Track points extraction by image mask # ========================= def extract_track_points_by_polygon(points_xyz: np.ndarray, poly_img: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, max_points: int = 250000) -> np.ndarray: if points_xyz is None or points_xyz.shape[0] == 0 or poly_img is None: return np.zeros((0, 3), dtype=np.float32) pts = points_xyz if pts.shape[0] > max_points: idx = np.random.choice(pts.shape[0], max_points, replace=False) pts = pts[idx] img_pts, valid = F.project_points_lidar_to_img( pts, calib, T_lidar2cam, img_w, img_h, use_distortion=F.USE_DISTORTION ) mask_img = np.zeros((img_h, img_w), dtype=np.uint8) poly_int = np.round(poly_img).astype(np.int32).reshape(-1, 1, 2) cv2.fillPoly(mask_img, [poly_int], 255) uv = np.round(img_pts).astype(np.int32) u = uv[:, 0] v = uv[:, 1] in_img = (u >= 0) & (u < img_w) & (v >= 0) & (v < img_h) keep = valid & in_img if keep.sum() == 0: return np.zeros((0, 3), dtype=np.float32) u2 = u[keep] v2 = v[keep] inside = mask_img[v2, u2] > 0 pts_keep = pts[keep][inside] return pts_keep.astype(np.float32) def extract_track_points_fallback(points_xyz: np.ndarray) -> np.ndarray: if points_xyz is None or points_xyz.shape[0] == 0: return np.zeros((0, 3), dtype=np.float32) x = points_xyz[:, 0] y = points_xyz[:, 1] m = (x >= FALLBACK_X_RANGE[0]) & (x <= FALLBACK_X_RANGE[1]) & (np.abs(y) <= FALLBACK_Y_ABS) pts = points_xyz[m] if pts.shape[0] < 200: return pts.astype(np.float32) zq = float(np.quantile(pts[:, 2], FALLBACK_Z_TOP_Q)) m2 = np.abs(pts[:, 2] - zq) <= FALLBACK_Z_BAND return pts[m2].astype(np.float32) # ========================= # Track frame (PCA on XY) # ========================= @dataclass class TrackFrame: origin_xy: np.ndarray dir_xy: np.ndarray n_xy: np.ndarray z_top: float s_range: Tuple[float, float] def fit_track_frame(track_pts_xyz: np.ndarray) -> Optional[TrackFrame]: if track_pts_xyz is None or track_pts_xyz.shape[0] < 80: return None xy = track_pts_xyz[:, :2].astype(np.float64) z = track_pts_xyz[:, 2].astype(np.float64) origin = xy.mean(axis=0) X = xy - origin C = np.cov(X.T) eigvals, eigvecs = np.linalg.eigh(C) v = eigvecs[:, np.argmax(eigvals)] v = v / (np.linalg.norm(v) + 1e-9) if v[0] < 0: v = -v dir_xy = v.astype(np.float32) n_xy = np.array([-dir_xy[1], dir_xy[0]], dtype=np.float32) z_top = float(np.quantile(z, TRACK_Z_TOP_Q)) s = (X @ dir_xy.reshape(2, 1)).reshape(-1) s0 = float(np.quantile(s, TRACK_S_QMIN) - TRACK_S_MARGIN) s1 = float(np.quantile(s, TRACK_S_QMAX) + TRACK_S_MARGIN) return TrackFrame(origin_xy=origin.astype(np.float32), dir_xy=dir_xy, n_xy=n_xy, z_top=z_top, s_range=(s0, s1)) def world_to_track(pts_xyz: np.ndarray, tf: TrackFrame) -> np.ndarray: xy = pts_xyz[:, :2] - tf.origin_xy.reshape(1, 2) s = xy @ tf.dir_xy.reshape(2, 1) t = xy @ tf.n_xy.reshape(2, 1) v = (pts_xyz[:, 2] - tf.z_top).reshape(-1, 1) return np.concatenate([s, t, v], axis=1).astype(np.float32) def track_to_world(stv: np.ndarray, tf: TrackFrame) -> np.ndarray: s = stv[:, 0].reshape(-1, 1) t = stv[:, 1].reshape(-1, 1) v = stv[:, 2].reshape(-1, 1) xy = tf.origin_xy.reshape(1, 2) + s * tf.dir_xy.reshape(1, 2) + t * tf.n_xy.reshape(1, 2) z = tf.z_top + v.reshape(-1) return np.concatenate([xy, z.reshape(-1, 1)], axis=1).astype(np.float32) # ========================= # Clearance envelope construction # ========================= @dataclass class ClearanceEnvelope: profile_name: str mode: str poly_tv: np.ndarray s0: float s1: float t_max: float v_min: float contour_tv: np.ndarray vertices_world: np.ndarray edges: List[Tuple[int, int]] def build_profile_tv(profile_mm: List[Tuple[float, float]], scale_xy: float = 1.0, extra_y: float = 0.0, extra_z: float = 0.0) -> np.ndarray: yz = np.array(profile_mm, dtype=np.float32) y_mm = yz[:, 0] z_mm = yz[:, 1] z0 = float(np.min(z_mm)) z_shift = z_mm - z0 y_m = (y_mm / 1000.0) * scale_xy + extra_y z_m = (z_shift / 1000.0) * scale_xy + extra_z right = np.stack([y_m, -z_m], axis=1).astype(np.float32) interior = right[1:-1] left = interior[::-1].copy() left[:, 0] *= -1.0 return np.concatenate([right, left], axis=0).astype(np.float32) def estimate_track_half_width(track_pts_xyz: np.ndarray, tf: TrackFrame) -> float: stv = world_to_track(track_pts_xyz, tf) t = stv[:, 1] v = stv[:, 2] band = TRACK_TOP_BAND m = np.abs(v) <= band if m.sum() < 50: hw = float(np.quantile(np.abs(t), 0.90)) else: hw = float(np.quantile(np.abs(t[m]), 0.90)) return float(max(0.2, hw)) def build_envelope(tf: TrackFrame, profile_name: str = "C_equipment", mode: str = "equipment", track_pts_xyz: Optional[np.ndarray] = None) -> ClearanceEnvelope: if profile_name not in PROFILES_MM: raise ValueError(f"Unknown profile_name={profile_name}. Available: {list(PROFILES_MM.keys())}") profile_mm = PROFILES_MM[profile_name] scale = 1.0 if PROFILE_SCALE_TO_TRACK and track_pts_xyz is not None and track_pts_xyz.shape[0] > 80: hw_track = estimate_track_half_width(track_pts_xyz, tf) yz = np.array(profile_mm, dtype=np.float32) z0 = float(np.min(yz[:, 1])) top_mask = np.abs(yz[:, 1] - z0) < 1e-3 hw_profile = float(np.max(yz[top_mask, 0]) / 1000.0) if top_mask.sum() >= 1 else float(np.max(yz[:, 0]) / 1000.0) if hw_profile > 1e-3: scale = hw_track / hw_profile scale = float(np.clip(scale, PROFILE_SCALE_CLAMP[0], PROFILE_SCALE_CLAMP[1])) extra_y = BUILDING_EXTRA_Y if mode.lower() == "building" else 0.0 extra_z = BUILDING_EXTRA_Z if mode.lower() == "building" else 0.0 poly_tv = build_profile_tv(profile_mm, scale_xy=scale, extra_y=extra_y, extra_z=extra_z) s0, s1 = tf.s_range t_max = float(np.max(np.abs(poly_tv[:, 0]))) v_min = float(np.min(poly_tv[:, 1])) contour = poly_tv.reshape(-1, 1, 2).astype(np.float32) K = poly_tv.shape[0] stv0 = np.concatenate([np.full((K, 1), s0, dtype=np.float32), poly_tv], axis=1) stv1 = np.concatenate([np.full((K, 1), s1, dtype=np.float32), poly_tv], axis=1) V0 = track_to_world(stv0, tf) V1 = track_to_world(stv1, tf) vertices = np.concatenate([V0, V1], axis=0) edges: List[Tuple[int, int]] = [] for i in range(K): edges.append((i, (i + 1) % K)) for i in range(K): edges.append((K + i, K + ((i + 1) % K))) for i in range(K): edges.append((i, K + i)) return ClearanceEnvelope(profile_name=profile_name, mode=mode.lower(), poly_tv=poly_tv, s0=s0, s1=s1, t_max=t_max, v_min=v_min, contour_tv=contour, vertices_world=vertices, edges=edges) # ========================= # Inside test & scoring # ========================= def point_inside_envelope(stv: np.ndarray, env: ClearanceEnvelope) -> bool: s, t, v = float(stv[0]), float(stv[1]), float(stv[2]) if s < env.s0 or s > env.s1: return False d = cv2.pointPolygonTest(env.contour_tv, (t, v), measureDist=False) return d >= 0.0 def signed_dist_to_envelope(tv: Tuple[float, float], env: ClearanceEnvelope) -> float: t, v = float(tv[0]), float(tv[1]) return float(cv2.pointPolygonTest(env.contour_tv, (t, v), measureDist=True)) def sample_points_from_box(box7: np.ndarray) -> np.ndarray: corners = F.boxes3d_to_corners(box7.reshape(1, 7).astype(np.float32))[0] pts = [corners] cx, cy, cz = box7[0], box7[1], box7[2] pts.append(np.array([[cx, cy, cz]], dtype=np.float32)) if SAMPLE_FACE_CENTERS: mn = corners.min(axis=0) mx = corners.max(axis=0) c = np.array([cx, cy, cz], dtype=np.float32) face = np.array([ [mn[0], c[1], c[2]], [mx[0], c[1], c[2]], [c[0], mn[1], c[2]], [c[0], mx[1], c[2]], [c[0], c[1], mn[2]], [c[0], c[1], mx[2]], ], dtype=np.float32) pts.append(face) return np.concatenate(pts, axis=0) def eval_det_vs_envelope(det: F.Det3D, tf: TrackFrame, env: ClearanceEnvelope) -> Dict: pts_world = sample_points_from_box(det.box7) stv = world_to_track(pts_world, tf) inside_flags = np.array([point_inside_envelope(stv[i], env) for i in range(stv.shape[0])], dtype=bool) inside_ratio = float(inside_flags.mean()) center_xyz = det.box7[:3].reshape(1, 3).astype(np.float32) center_stv = world_to_track(center_xyz, tf)[0] center_in = point_inside_envelope(center_stv, env) dist_signed = signed_dist_to_envelope((center_stv[1], center_stv[2]), env) dist_outside = float(max(0.0, -dist_signed)) corners_world = F.boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] stv_c = world_to_track(corners_world, tf) s_min, t_min, v_min = stv_c.min(axis=0).tolist() s_max, t_max, v_max = stv_c.max(axis=0).tolist() es0, es1 = env.s0, env.s1 et0, et1 = -env.t_max, env.t_max ev0, ev1 = env.v_min, 0.0 ds = max(0.0, min(s_max, es1) - max(s_min, es0)) dt = max(0.0, min(t_max, et1) - max(t_min, et0)) dv = max(0.0, min(v_max, ev1) - max(v_min, ev0)) inter = ds * dt * dv vol_box = max(1e-6, (s_max - s_min) * (t_max - t_min) * (v_max - v_min)) overlap_aabb = float(np.clip(inter / vol_box, 0.0, 1.0)) dist_term = math.exp(-dist_outside / max(1e-6, NEAR_DIST_THR)) score = 0.45 * inside_ratio + 0.35 * overlap_aabb + 0.20 * (1.0 if center_in else 0.0) score = float(np.clip(score * 0.85 + 0.15 * dist_term, 0.0, 1.0)) if center_in or inside_ratio >= INTRUDE_CORNER_RATIO_THR or overlap_aabb >= 0.15: label = "INTRUDE" elif dist_outside <= NEAR_DIST_THR: label = "NEAR" else: label = "OUTSIDE" return { "label": label, "score_comp": score, "inside_ratio": inside_ratio, "overlap_aabb": overlap_aabb, "center_in": bool(center_in), "dist_outside": dist_outside, "center_stv": center_stv.tolist(), "box_stv_aabb": [float(s_min), float(s_max), float(t_min), float(t_max), float(v_min), float(v_max)], } # ========================= # Visualization helpers # ========================= def rasterize_density(rr: np.ndarray, cc: np.ndarray, H: int, W: int, gamma: float = 0.55, dilate_ksize: int = 3) -> np.ndarray: rr = rr.astype(np.int64) cc = cc.astype(np.int64) idx = rr * W + cc counts = np.bincount(idx, minlength=H * W).reshape(H, W).astype(np.float32) if counts.max() > 0: dens = np.log1p(counts) dens = dens / (dens.max() + 1e-6) dens = np.power(dens, gamma) gray = (dens * 255.0).astype(np.uint8) else: gray = np.zeros((H, W), dtype=np.uint8) if dilate_ksize is not None and dilate_ksize >= 2: k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (dilate_ksize, dilate_ksize)) gray = cv2.dilate(gray, k, iterations=1) return cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR) def draw_polyline(img: np.ndarray, pts: np.ndarray, color, thickness=2, closed=True): pts_i = np.round(pts).astype(np.int32).reshape(-1, 1, 2) cv2.polylines(img, [pts_i], closed, color, thickness, cv2.LINE_AA) def render_camera_overlay(img_bgr: np.ndarray, track_poly: Optional[np.ndarray], yolo: List[F.Det2D], pcdet: List[F.Det3D], fused: List[F.Det3D]) -> np.ndarray: vis = img_bgr.copy() H, W = vis.shape[:2] if track_poly is not None: draw_polyline(vis, track_poly, COL_TRACK, thickness=3, closed=True) cv2.putText(vis, "TRACK ROI", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 1.0, COL_TRACK, 2, cv2.LINE_AA) for d in yolo: b = F.clip_box_xyxy(d.xyxy, W, H) x1, y1, x2, y2 = [int(round(v)) for v in b] cv2.rectangle(vis, (x1, y1), (x2, y2), F.COL_YOLO, 3, cv2.LINE_AA) cv2.putText(vis, f"YOLO {d.cls_name} {d.score:.2f}", (x1, max(0, y1 - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.8, F.COL_YOLO, 2, cv2.LINE_AA) for d in pcdet: if d.proj_xyxy is None: continue b = F.clip_box_xyxy(d.proj_xyxy, W, H) F.draw_dashed_rect(vis, b, F.COL_PCDET, thickness=3, dash_len=18, gap_len=12) cv2.putText(vis, f"PCD {d.cls_name} {d.score:.2f}", (int(b[0]), int(b[3] + 20)), cv2.FONT_HERSHEY_SIMPLEX, 0.75, F.COL_PCDET, 2, cv2.LINE_AA) for d in fused: if d.proj_xyxy is None: continue b = F.clip_box_xyxy(d.proj_xyxy, W, H) x1, y1, x2, y2 = [int(round(v)) for v in b] cv2.rectangle(vis, (x1, y1), (x2, y2), F.COL_FUSED, 3, cv2.LINE_AA) cv2.putText(vis, f"FUS {d.cls_name} {d.score:.2f}", (x2 - 220, max(0, y1 - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.75, F.COL_FUSED, 2, cv2.LINE_AA) return vis def render_bev(points_xyz: np.ndarray, tf: TrackFrame, env: ClearanceEnvelope, dets_fused: List[F.Det3D], dets_pcd: List[F.Det3D]) -> np.ndarray: s0, s1 = env.s0, env.s1 hw = env.t_max rect_st = np.array([ [s0, -hw, 0.0], [s1, -hw, 0.0], [s1, hw, 0.0], [s0, hw, 0.0], ], dtype=np.float32) rect_xy_world = track_to_world(rect_st, tf)[:, :2] xs = points_xyz[:, 0] ys = points_xyz[:, 1] x_min = float(min(rect_xy_world[:, 0].min(), np.quantile(xs, 0.02))) x_max = float(max(rect_xy_world[:, 0].max(), np.quantile(xs, 0.98))) y_min = float(min(rect_xy_world[:, 1].min(), np.quantile(ys, 0.02))) y_max = float(max(rect_xy_world[:, 1].max(), np.quantile(ys, 0.98))) margin = 3.0 x_min -= margin; x_max += margin y_min -= margin; y_max += margin res = BEV_RES W = int(max(420, math.ceil((y_max - y_min) / res))) H = int(max(420, math.ceil((x_max - x_min) / res))) m = (xs >= x_min) & (xs <= x_max) & (ys >= y_min) & (ys <= y_max) pts = points_xyz[m] if pts.shape[0] > 250000: idx = np.random.choice(pts.shape[0], 250000, replace=False) pts = pts[idx] if pts.shape[0] > 0: rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H - 1) cc = np.clip(cc, 0, W - 1) img = rasterize_density(rr, cc, H, W, gamma=BEV_GAMMA, dilate_ksize=BEV_DILATE) else: img = np.zeros((H, W, 3), dtype=np.uint8) def xy_to_pix(xy): x, y = float(xy[0]), float(xy[1]) r = (x_max - x) / res c = (y - y_min) / res return np.array([c, r], dtype=np.float32) rect_pix = np.stack([xy_to_pix(p) for p in rect_xy_world], axis=0) draw_polyline(img, rect_pix, COL_ENVELOPE, thickness=3, closed=True) cv2.putText(img, f"Envelope footprint ({env.profile_name},{env.mode})", (12, 36), cv2.FONT_HERSHEY_SIMPLEX, 0.9, COL_ENVELOPE, 2, cv2.LINE_AA) c0_world = track_to_world(np.array([[s0, 0.0, 0.0]], dtype=np.float32), tf)[0, :2] c1_world = track_to_world(np.array([[s1, 0.0, 0.0]], dtype=np.float32), tf)[0, :2] p0 = xy_to_pix(c0_world) p1 = xy_to_pix(c1_world) cv2.line(img, tuple(np.round(p0).astype(int)), tuple(np.round(p1).astype(int)), (200, 200, 200), 2, cv2.LINE_AA) def draw_det_foot(det: F.Det3D, color, thick=2): corners = F.boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] xy = corners[:4, :2] pix = np.stack([xy_to_pix(p) for p in xy], axis=0) draw_polyline(img, pix, color, thickness=thick, closed=True) for d in dets_pcd: if d.score < 0.1: continue draw_det_foot(d, F.COL_PCDET, thick=2) for d in dets_fused: if d.score < 0.1: continue draw_det_foot(d, F.COL_FUSED, thick=3) return img def render_tv(points_xyz: np.ndarray, tf: TrackFrame, env: ClearanceEnvelope, det: Optional[F.Det3D]) -> np.ndarray: if det is None: s_center = 0.5 * (env.s0 + env.s1) else: s_center = float(world_to_track(det.box7[:3].reshape(1, 3).astype(np.float32), tf)[0, 0]) stv = world_to_track(points_xyz, tf) s = stv[:, 0] m = (s >= s_center - TV_SLAB_HALF) & (s <= s_center + TV_SLAB_HALF) pts = stv[m] if pts.shape[0] > 250000: idx = np.random.choice(pts.shape[0], 250000, replace=False) pts = pts[idx] t_lim = env.t_max + 0.8 v_top = 0.8 v_bot = env.v_min - 0.5 res = TV_RES W = int(max(420, math.ceil((2 * t_lim) / res))) H = int(max(420, math.ceil((v_top - v_bot) / res))) if pts.shape[0] > 0: tt = pts[:, 1] vv = pts[:, 2] mm = (tt >= -t_lim) & (tt <= t_lim) & (vv >= v_bot) & (vv <= v_top) sub = pts[mm] if sub.shape[0] > 0: cc = ((sub[:, 1] - (-t_lim)) / res).astype(np.int32) rr = ((v_top - sub[:, 2]) / res).astype(np.int32) cc = np.clip(cc, 0, W - 1) rr = np.clip(rr, 0, H - 1) img = rasterize_density(rr, cc, H, W, gamma=TV_GAMMA, dilate_ksize=TV_DILATE) else: img = np.zeros((H, W, 3), dtype=np.uint8) else: img = np.zeros((H, W, 3), dtype=np.uint8) def tv_to_pix(tv): t0, v0 = float(tv[0]), float(tv[1]) c = (t0 - (-t_lim)) / res r = (v_top - v0) / res return np.array([c, r], dtype=np.float32) poly_pix = np.stack([tv_to_pix(p) for p in env.poly_tv], axis=0) draw_polyline(img, poly_pix, COL_ENVELOPE, thickness=3, closed=True) cv2.putText(img, f"t-v section @ s={s_center:.2f}m (slab +- {TV_SLAB_HALF:.2f}m)", (12, 36), cv2.FONT_HERSHEY_SIMPLEX, 0.8, COL_TEXT, 2, cv2.LINE_AA) if det is not None: corners_world = F.boxes3d_to_corners(det.box7.reshape(1, 7).astype(np.float32))[0] stv_c = world_to_track(corners_world, tf) tv_c = stv_c[:, 1:3].astype(np.float32) hull = cv2.convexHull(tv_c.reshape(-1, 1, 2)).reshape(-1, 2) hull_pix = np.stack([tv_to_pix(p) for p in hull], axis=0) draw_polyline(img, hull_pix, F.COL_FUSED, thickness=3, closed=True) cv2.putText(img, f"FUSED {det.cls_name} {det.score:.2f}", (12, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.85, F.COL_FUSED, 2, cv2.LINE_AA) return img def stack_panels(cam: np.ndarray, bev: np.ndarray, tv: np.ndarray) -> np.ndarray: H = max(cam.shape[0], bev.shape[0], tv.shape[0]) def pad_to(img): h, w = img.shape[:2] if h == H: return img out = np.zeros((H, w, 3), dtype=np.uint8) out[:] = (0, 0, 0) y0 = (H - h) // 2 out[y0:y0 + h, :] = img return out return cv2.hconcat([pad_to(cam), pad_to(bev), pad_to(tv)]) # ========================= # Main # ========================= def main(): parser = argparse.ArgumentParser() parser.add_argument("--frame_id", type=str, default=None) parser.add_argument("--profile", type=str, default="C_equipment", choices=list(PROFILES_MM.keys())) parser.add_argument("--mode", type=str, default="equipment", choices=["equipment", "building"]) parser.add_argument("--topk", type=int, default=20) args = parser.parse_args() device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = F.load_models() idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} frame_id = args.frame_id if frame_id is None: if USE_IMAGESETS: dataset_root = str(Path(F.PCDET_POINTS_DIR).parent) imagesets_dir = os.path.join(dataset_root, "ImageSets") ids = load_imageset_ids(os.path.join(imagesets_dir, f"{SPLIT_NAME}.txt")) ids = [i for i in ids if i in idx_map and F.find_image_for_frame(i) is not None] if ids: frame_id = ids[0] if frame_id is None: bin_files = sorted(glob.glob(os.path.join(F.PCDET_POINTS_DIR, "*.bin"))) ids = [Path(p).stem for p in bin_files] ids = [i for i in ids if i in idx_map and F.find_image_for_frame(i) is not None] if not ids: print("[ERROR] No valid frames found.") return frame_id = ids[0] if frame_id not in idx_map: print(f"[ERROR] frame_id {frame_id} not in dataset index.") return img_path = F.find_image_for_frame(frame_id) if img_path is None: print(f"[ERROR] image not found for {frame_id}") return img = cv2.imread(img_path) if img is None: print(f"[ERROR] cv2.imread failed: {img_path}") return H, W = img.shape[:2] yolo_dets = F.infer_yolo(yolo_model, img) _, raw_points, pcdet_dets = F.infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[frame_id], device) if raw_points is None or raw_points.shape[0] == 0: print("[ERROR] raw_points empty") return T = F.get_extrinsic_matrix(F.CALIB, raw_points[:, :3], W, H) fused_dets = F.fuse_frame(yolo_dets, pcdet_dets, W, H, F.CALIB, T, raw_points) fused_dets = sorted(fused_dets, key=lambda d: d.score, reverse=True)[:max(1, args.topk)] pcdet_dets = sorted(pcdet_dets, key=lambda d: d.score, reverse=True)[:max(1, args.topk)] track_poly, _ = detect_track_polygon_hough(img) if track_poly is None: print("[TRACK] Hough failed, fallback to pointcloud-only track extraction.") track_pts = extract_track_points_fallback(raw_points[:, :3]) else: track_pts = extract_track_points_by_polygon(raw_points[:, :3], track_poly, F.CALIB, T, W, H) if track_pts.shape[0] < 80: print(f"[TRACK] polygon->points too few ({track_pts.shape[0]}), fallback.") track_pts = extract_track_points_fallback(raw_points[:, :3]) print(f"[TRACK] track_pts: {track_pts.shape[0]} | track_poly={'OK' if track_poly is not None else 'None'}") tf = fit_track_frame(track_pts) if tf is None: print("[ERROR] TrackFrame fit failed (too few points).") return env = build_envelope(tf, profile_name=args.profile, mode=args.mode, track_pts_xyz=track_pts) print("\n================= Clearance Envelope =================") print(f"frame_id: {frame_id}") print(f"profile: {env.profile_name} | mode: {env.mode} | scale_to_track: {PROFILE_SCALE_TO_TRACK}") print(f"track: z_top={tf.z_top:.3f}m | s_range=[{env.s0:.2f}, {env.s1:.2f}]m | t_max={env.t_max:.2f}m | height={-env.v_min:.2f}m") if track_poly is not None: print(f"track_poly(img): {np.round(track_poly).astype(int).tolist()}") print("\n================= Intrusion Results (FUSED) =================") if len(fused_dets) == 0: print("No fused detections.") else: for i, d in enumerate(fused_dets): r = eval_det_vs_envelope(d, tf, env) s, t, v = r["center_stv"] print(f"[{r['label']:7s}] #{i:02d} cls={d.cls_name:14s} detScore={d.score:.3f} compScore={r['score_comp']:.3f}") print(f" center(stv)=({s:.2f},{t:.2f},{v:.2f}) center_in={r['center_in']} dist_out={r['dist_outside']:.3f}m") print(f" inside_ratio={r['inside_ratio']:.3f} overlap_aabb={r['overlap_aabb']:.3f} box_stv_aabb={['%.2f'%x for x in r['box_stv_aabb']]}") cam_vis = render_camera_overlay(img, track_poly, yolo_dets, pcdet_dets[:5], fused_dets[:5]) bev_vis = render_bev(raw_points[:, :3], tf, env, fused_dets[:10], pcdet_dets[:10]) det_top = fused_dets[0] if fused_dets else None tv_vis = render_tv(raw_points[:, :3], tf, env, det_top) canvas = stack_panels(cam_vis, bev_vis, tv_vis) out_path = os.path.join(OUT_DIR, f"{frame_id}_clearance.jpg") cv2.imwrite(out_path, canvas) print(f"\n[OK] saved: {out_path}") if __name__ == "__main__": main()
如果你希望我把这套“限界可视化 + 入侵结果打印”直接并入你原来的 fusion.py 的 debug_vis 大图里(例如 BEV 里叠加限界 footprint、再加一个 t-v 面板、并把 INTRUDE/NEAR/OUTSIDE 写在图上),我也可以按你现在的布局继续改成“同一张大图输出”。
我的意思是这个限界是立体的,而且我看不到你提取的二维轨道区域,暂时来看你的限界是错误的方向,我想要的是利用某种方法从图像提取轨道边缘,然后将这个边缘投影到对应的轨道点云,限界沿轨道方向的截面是一个八边形,沿轨道中心居中,轨道中心是黑色的,黑色区域将轨道分为2侧,在点云中这部分表现为左右两簇是点云,中间是空白,空白上方是一簇点云,以及你如何判断侵界,重叠度等指标要打印,如何判断是否侵界呢,你判断的侵界评判指标呢,要打印出来,你要做的是分割轨道等的每一步头进行保存照片,这样我才能知道你的中间过程哪一个有问题,请给出代码,我要知道你提取的轨道线是否正确,轨道式几乎一条很长的直线或者有一些曲率的连通区域,请给出代码
根据你提供的《单轨设计.pdf》中车辆基本参数(例如最大宽度约 2.3–2.5m、车体最下部距轨面高度约 3.4–3.7m),我在代码里把“八边形限界”的默认宽/高做成可参数化的(你可以随时改成车辆限界/建筑限界的精确值)。(单轨设计.pdf)
下面给你一份**“单帧侵界检测 + 全流程中间结果落盘”**的完整脚本,它会做到你要求的几件事:
cv2.fitLine,同时保存 HoughLinesP 可视化)s_overlap / s_ratio(检测框沿轨道方向与限界的重叠比例)area_ratio(检测框在 (l,z) 截面与八边形的交叠面积比)volume_ratio = s_ratio * area_ratio(综合“体积侵入比例”)center_inside(中心点是否落入限界)signed_dist_center(中心点到限界边界的“带符号距离”,inside 为负)你原来的大脚本请保存为
fusion.py(就是你贴的那份融合+可视化代码)。
把下面脚本保存为intrusion_clearance.py,与fusion.py放同一目录。
运行(单帧):
bashpython intrusion_clearance.py --frame_id 000123
不指定帧(随机抽一帧能匹配到图像的):
bashpython intrusion_clearance.py
输出目录:
textdebug_intrusion/<frame_id>/ 00_raw.jpg 原图 01_roi.jpg ROI 可视化 02_dark_mask.png 黑色区域分割结果(轨道中心黑色区域) 03_dark_boundary.png 黑色区域边界(用于拟合轨道边缘) 04_edges_canny.png Canny 边缘 05_hough_lines.jpg HoughLinesP 结果(辅助你判断是否长直线/轻微曲率) 06_fit_lines.jpg 拟合出的左右轨道边缘线 + 轨道区域(绿色填充) 07_track_mask.png “左右边缘之间”的轨道区域 mask 08_proj_all_points.jpg 投影点云(灰点)+ 轨道区域叠加 09_proj_edge_points.jpg 匹配到轨道边缘的点云(黄点)+ 边缘线 10_clearance_bev.jpg BEV 上叠加 3D 限界线框 11_clearance_front.jpg X-Z 上叠加 3D 限界线框 12_clearance_3d.png 3D 截图:点云+限界线框+检测框(若 matplotlib 可用) intrusion_metrics.json/txt 侵界指标与结论(你要的重叠度、中心点等都在)
intrusion_clearance.py直接复制保存即可
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ intrusion_clearance.py 用法: 1) 确保你上一份融合代码保存为 fusion.py(就是你发的那份大脚本) 2) 把本文件保存为 intrusion_clearance.py,与 fusion.py 放在同一目录 3) 运行(指定一帧): python intrusion_clearance.py --frame_id 000123 或不指定帧(随机挑一帧能匹配到图像的): python intrusion_clearance.py 输出: debug_intrusion/<frame_id>/ 00_raw.jpg 01_roi.jpg 02_dark_mask.png 03_dark_boundary.png 04_edges_canny.png 05_hough_lines.jpg 06_fit_lines.jpg 07_track_mask.png 08_proj_all_points.jpg 09_proj_edge_points.jpg 10_clearance_bev.jpg 11_clearance_front.jpg 12_clearance_3d.png (若 matplotlib 可用) intrusion_metrics.json / intrusion_metrics.txt 核心思路(满足你这次的要求): - 从图像提取轨道黑色区域及其左右边缘(并保存所有中间过程) - 将边缘像素与投影后的点云匹配,得到轨道左右边缘点云 - 对轨道边缘点云做 PCA 得到轨道方向(保证限界沿轨道方向) - 构建沿轨道方向延伸的 3D 八边形限界(立体线框) - 用融合后的 3D 检测框做侵界判定(打印重叠度、中心点、体积比等指标) """ from __future__ import annotations import os import math import json import random import argparse from dataclasses import dataclass from pathlib import Path from typing import Dict, List, Optional, Tuple import numpy as np import cv2 # ========================= # 可调参数(先给一套默认) # ========================= # 轨道 ROI:典型“车道线”检测用的梯形 ROI(可按你的画面调整) ROI_Y_TOP_RATIO = 0.35 ROI_Y_BOTTOM_RATIO = 0.98 ROI_X_TOP_L_RATIO = 0.35 ROI_X_TOP_R_RATIO = 0.65 ROI_X_BOT_L_RATIO = 0.05 ROI_X_BOT_R_RATIO = 0.95 # 黑色区域阈值(自适应 + 限幅) BLACK_V_QUANTILE = 0.15 BLACK_V_BIAS = 12 BLACK_V_MIN = 25 BLACK_V_MAX = 110 # 边界/边缘提取 CANNY1 = 40 CANNY2 = 140 # Hough 仅用于辅助可视化(真正拟合用 fitLine) HOUGH_MIN_LEN_RATIO = 0.20 # 相对 H HOUGH_MAX_GAP = 25 HOUGH_THRESH = 80 # 将投影点匹配到边缘的像素距离阈值 EDGE_MATCH_PX = 4.0 # 生成中心线/方向的点云数量最小要求 MIN_EDGE_PTS_3D = 200 # 限界参数(单位:米) # 依据你提供的“单轨设计.pdf”表3:最大宽度 2300~2500mm,车体底部距轨面 3400~3700mm(这里取上限更安全) TRAIN_MAX_WIDTH_M = 2.5 TRAIN_BOTTOM_TO_RAIL_M = 3.7 # 安全裕度 CLEAR_SIDE_MARGIN_M = 0.35 CLEAR_TOP_MARGIN_M = 0.10 CLEAR_HEIGHT_EXTRA_M = 0.30 # 八边形上下倒角高度 OCT_CHAMFER_TOP_H = 0.55 OCT_CHAMFER_BOTTOM_H = 0.55 # 侵界阈值(可以按你经验调) INTRUDE_VOL_RATIO_THR = 0.02 # 体积比超过就判侵界 INTRUDE_AREA_RATIO_THR = 0.05 # 截面交叠面积比 INTRUDE_CENTER_INSIDE_FORCE = True # 中心点落入直接判侵界 # ========================= # 数据结构 # ========================= @dataclass class Line2D: """用点(x0,y0) + 方向(vx,vy)表示的 2D 直线""" vx: float vy: float x0: float y0: float def x_at(self, y: float) -> float: if abs(self.vy) < 1e-6: return float(self.x0) t = (y - self.y0) / self.vy return float(self.x0 + self.vx * t) def dist_to_point(self, x: np.ndarray, y: np.ndarray) -> np.ndarray: # 点到直线距离:| (p-p0) x v | / |v| vx, vy = self.vx, self.vy denom = math.hypot(vx, vy) + 1e-9 return np.abs(vx * (self.y0 - y) - vy * (self.x0 - x)) / denom @dataclass class Track2DInfo: ok: bool roi_mask: np.ndarray dark_mask: np.ndarray boundary_mask: np.ndarray edges_canny: np.ndarray left_line: Optional[Line2D] right_line: Optional[Line2D] track_mask: np.ndarray edge_line_mask: np.ndarray # 用于 distanceTransform 匹配的“边缘”掩膜 dbg: Dict @dataclass class Track3DModel: ok: bool origin: np.ndarray # (3,) 轨道中心线参考点(z=轨面高度) t: np.ndarray # (3,) 轨道切向(沿轨道方向) n: np.ndarray # (3,) 横向(左) up: np.ndarray # (3,) 竖向(上) s_min: float s_max: float rail_half_w: float z_ref: float poly_lz: np.ndarray # (8,2) 八边形截面(l, z_rel)z_rel 相对 z_ref(0=轨面) dbg: Dict @dataclass class IntrusionResult: det_name: str det_score: float s_overlap: float s_ratio: float area_ratio: float volume_ratio: float center_inside: bool min_signed_dist_center: float intrude: bool reason: str # ========================= # 基础几何(凸包/凸多边形交) # ========================= def _cross(o: np.ndarray, a: np.ndarray, b: np.ndarray) -> float: return float((a[0]-o[0])*(b[1]-o[1]) - (a[1]-o[1])*(b[0]-o[0])) def convex_hull(points: np.ndarray) -> np.ndarray: """ 单调链凸包,返回 CCW 顺序的点 (M,2) """ if points is None or len(points) == 0: return np.zeros((0, 2), dtype=np.float32) pts = np.unique(points.astype(np.float64), axis=0) if pts.shape[0] < 3: return pts.astype(np.float32) pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))] lower = [] for p in pts: while len(lower) >= 2 and _cross(lower[-2], lower[-1], p) <= 0: lower.pop() lower.append(p) upper = [] for p in pts[::-1]: while len(upper) >= 2 and _cross(upper[-2], upper[-1], p) <= 0: upper.pop() upper.append(p) hull = np.array(lower[:-1] + upper[:-1], dtype=np.float64) return hull.astype(np.float32) def polygon_area(poly: np.ndarray) -> float: if poly is None or poly.shape[0] < 3: return 0.0 x = poly[:, 0] y = poly[:, 1] return float(0.5 * np.abs(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))) def ensure_ccw(poly: np.ndarray) -> np.ndarray: if poly is None or poly.shape[0] < 3: return poly x = poly[:, 0]; y = poly[:, 1] signed = float(0.5 * (np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))) if signed < 0: return poly[::-1].copy() return poly def inside_half_plane(p: np.ndarray, a: np.ndarray, b: np.ndarray) -> bool: # clip polygon assumed CCW, inside is left side of edge a->b return _cross(a, b, p) >= -1e-9 def line_intersection(p1: np.ndarray, p2: np.ndarray, a: np.ndarray, b: np.ndarray) -> np.ndarray: """ 线段 p1-p2 与 直线 a-b 求交点(假设不平行时有交) """ x1, y1 = p1 x2, y2 = p2 x3, y3 = a x4, y4 = b den = (x1-x2)*(y3-y4) - (y1-y2)*(x3-x4) if abs(den) < 1e-12: return p2.copy() px = ((x1*y2 - y1*x2)*(x3-x4) - (x1-x2)*(x3*y4 - y3*x4)) / den py = ((x1*y2 - y1*x2)*(y3-y4) - (y1-y2)*(x3*y4 - y3*x4)) / den return np.array([px, py], dtype=np.float32) def convex_polygon_intersection(subject: np.ndarray, clip: np.ndarray) -> np.ndarray: """ Sutherland–Hodgman clipping (convex) poly must be CCW. """ if subject is None or subject.shape[0] < 3: return np.zeros((0, 2), dtype=np.float32) if clip is None or clip.shape[0] < 3: return np.zeros((0, 2), dtype=np.float32) subj = ensure_ccw(subject).astype(np.float32) clp = ensure_ccw(clip).astype(np.float32) out = subj for i in range(clp.shape[0]): a = clp[i] b = clp[(i+1) % clp.shape[0]] inp = out if inp.shape[0] == 0: break out_list = [] for j in range(inp.shape[0]): p = inp[j] q = inp[(j+1) % inp.shape[0]] pin = inside_half_plane(p, a, b) qin = inside_half_plane(q, a, b) if qin: if not pin: out_list.append(line_intersection(p, q, a, b)) out_list.append(q.copy()) elif pin: out_list.append(line_intersection(p, q, a, b)) out = np.array(out_list, dtype=np.float32) return out def point_in_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> bool: if poly is None or poly.shape[0] < 3: return False poly = ensure_ccw(poly) for i in range(poly.shape[0]): a = poly[i] b = poly[(i+1) % poly.shape[0]] if _cross(a, b, pt) < -1e-8: return False return True def point_to_segment_distance(pt: np.ndarray, a: np.ndarray, b: np.ndarray) -> float: # distance from pt to segment a-b ax, ay = a bx, by = b px, py = pt vx = bx - ax vy = by - ay wx = px - ax wy = py - ay c1 = vx*wx + vy*wy if c1 <= 0: return float(math.hypot(px-ax, py-ay)) c2 = vx*vx + vy*vy if c2 <= c1: return float(math.hypot(px-bx, py-by)) t = c1 / (c2 + 1e-12) projx = ax + t * vx projy = ay + t * vy return float(math.hypot(px-projx, py-projy)) def signed_distance_to_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> float: """ inside -> negative, outside -> positive 用最小到边距离作为幅值 """ if poly is None or poly.shape[0] < 3: return float("inf") poly = ensure_ccw(poly) dmin = float("inf") for i in range(poly.shape[0]): a = poly[i] b = poly[(i+1) % poly.shape[0]] d = point_to_segment_distance(pt, a, b) dmin = min(dmin, d) inside = point_in_convex_polygon(pt, poly) return -dmin if inside else dmin # ========================= # 图像:ROI、黑色区域、边缘拟合 # ========================= def make_roi_mask(h: int, w: int) -> np.ndarray: y_top = int(h * ROI_Y_TOP_RATIO) y_bot = int(h * ROI_Y_BOTTOM_RATIO) x_tl = int(w * ROI_X_TOP_L_RATIO) x_tr = int(w * ROI_X_TOP_R_RATIO) x_bl = int(w * ROI_X_BOT_L_RATIO) x_br = int(w * ROI_X_BOT_R_RATIO) poly = np.array([[x_bl, y_bot], [x_br, y_bot], [x_tr, y_top], [x_tl, y_top]], dtype=np.int32) mask = np.zeros((h, w), dtype=np.uint8) cv2.fillConvexPoly(mask, poly, 255) return mask def adaptive_black_threshold(v_channel: np.ndarray, roi_mask: np.ndarray) -> int: vals = v_channel[roi_mask > 0].reshape(-1).astype(np.float32) if vals.size == 0: return 60 thr = float(np.quantile(vals, BLACK_V_QUANTILE) + BLACK_V_BIAS) thr = int(np.clip(thr, BLACK_V_MIN, BLACK_V_MAX)) return thr def extract_track_edges_from_image(img_bgr: np.ndarray, save_dir: str) -> Track2DInfo: os.makedirs(save_dir, exist_ok=True) H, W = img_bgr.shape[:2] dbg: Dict = {} # 00 raw cv2.imwrite(os.path.join(save_dir, "00_raw.jpg"), img_bgr) # ROI roi_mask = make_roi_mask(H, W) roi_vis = img_bgr.copy() roi_cnts, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(roi_vis, roi_cnts, -1, (0, 255, 255), 3, cv2.LINE_AA) cv2.imwrite(os.path.join(save_dir, "01_roi.jpg"), roi_vis) hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV) v = hsv[:, :, 2] thr_v = adaptive_black_threshold(v, roi_mask) dbg["thr_v"] = int(thr_v) dark = ((v < thr_v) & (roi_mask > 0)).astype(np.uint8) * 255 # 选取“靠近底部中心”的连通域(避免把其他黑色物体当轨道) num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(dark, connectivity=8) pick = -1 anchor = (int(W*0.5), int(H*0.85)) if num_labels > 1: a_lbl = labels[anchor[1], anchor[0]] if a_lbl != 0: pick = int(a_lbl) else: # 选择面积最大,且 centroid 在 ROI 下半部的 best = -1 best_area = 0 for i in range(1, num_labels): area = int(stats[i, cv2.CC_STAT_AREA]) cy = float(centroids[i][1]) if cy < H * 0.55: continue if area > best_area: best_area = area best = i pick = best if pick > 0: dark = (labels == pick).astype(np.uint8) * 255 # 形态学清理 k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (9, 9)) dark = cv2.morphologyEx(dark, cv2.MORPH_CLOSE, k, iterations=2) dark = cv2.morphologyEx(dark, cv2.MORPH_OPEN, k, iterations=1) cv2.imwrite(os.path.join(save_dir, "02_dark_mask.png"), dark) # boundary(黑色区域边界)——更贴近“轨道左右边缘” kb = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)) boundary = cv2.morphologyEx(dark, cv2.MORPH_GRADIENT, kb) boundary = (boundary > 0).astype(np.uint8) * 255 cv2.imwrite(os.path.join(save_dir, "03_dark_boundary.png"), boundary) # Canny edges(用于 Hough 可视化) gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY) blur = cv2.GaussianBlur(gray, (5, 5), 1.2) edges = cv2.Canny(blur, CANNY1, CANNY2) edges = cv2.bitwise_and(edges, roi_mask) cv2.imwrite(os.path.join(save_dir, "04_edges_canny.png"), edges) # Hough lines (辅助看是否提取到长直线) min_len = int(H * HOUGH_MIN_LEN_RATIO) lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=HOUGH_THRESH, minLineLength=min_len, maxLineGap=HOUGH_MAX_GAP) hough_vis = img_bgr.copy() lines_list = [] if lines is not None: for (x1, y1, x2, y2) in lines[:, 0]: lines_list.append((int(x1), int(y1), int(x2), int(y2))) cv2.line(hough_vis, (x1, y1), (x2, y2), (0, 128, 255), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(save_dir, "05_hough_lines.jpg"), hough_vis) dbg["hough_n"] = int(0 if lines is None else len(lines)) # 用 boundary 点拟合左右两条边 ys, xs = np.where(boundary > 0) if ys.size < 200: # fallback: 用 edges 的点 ys, xs = np.where(edges > 0) # 仅保留 ROI 下半部 keep = ys >= int(H * ROI_Y_TOP_RATIO) xs = xs[keep] ys = ys[keep] # 下采样以加速 fitLine if xs.size > 25000: idx = np.random.choice(xs.size, 25000, replace=False) xs = xs[idx]; ys = ys[idx] x_mid = W * 0.5 margin = W * 0.03 left_mask = xs < (x_mid - margin) right_mask = xs > (x_mid + margin) def fit_line_from_points(xp: np.ndarray, yp: np.ndarray) -> Optional[Line2D]: if xp.size < 50: return None pts = np.stack([xp, yp], axis=1).astype(np.float32).reshape(-1, 1, 2) # cv2.fitLine returns (vx, vy, x0, y0) vx, vy, x0, y0 = cv2.fitLine(pts, cv2.DIST_L2, 0, 0.01, 0.01).reshape(-1).tolist() return Line2D(float(vx), float(vy), float(x0), float(y0)) left_line = fit_line_from_points(xs[left_mask], ys[left_mask]) right_line = fit_line_from_points(xs[right_mask], ys[right_mask]) # 若拟合失败,尝试用 Hough segments 作为点集再拟合 if (left_line is None or right_line is None) and (lines is not None): pts_left = [] pts_right = [] for (x1, y1, x2, y2) in lines_list: xm = 0.5 * (x1 + x2) if xm < x_mid: pts_left.extend([(x1, y1), (x2, y2)]) else: pts_right.extend([(x1, y1), (x2, y2)]) if left_line is None and len(pts_left) >= 50: pl = np.array(pts_left, dtype=np.float32) left_line = fit_line_from_points(pl[:, 0], pl[:, 1]) if right_line is None and len(pts_right) >= 50: pr = np.array(pts_right, dtype=np.float32) right_line = fit_line_from_points(pr[:, 0], pr[:, 1]) ok = (left_line is not None) and (right_line is not None) # 保证 left/right 不反 y_bot = int(H * ROI_Y_BOTTOM_RATIO) if ok: xl = left_line.x_at(y_bot) xr = right_line.x_at(y_bot) if xl > xr: left_line, right_line = right_line, left_line # 生成 track_mask(两条边之间填充),以及 edge_line_mask(两条边线本身) track_mask = np.zeros((H, W), dtype=np.uint8) edge_line_mask = np.zeros((H, W), dtype=np.uint8) if ok: y_top = int(H * ROI_Y_TOP_RATIO) ys_samp = np.linspace(y_top, y_bot, 50).astype(np.int32) left_pts = [] right_pts = [] for yy in ys_samp: xl = int(np.clip(left_line.x_at(float(yy)), 0, W-1)) xr = int(np.clip(right_line.x_at(float(yy)), 0, W-1)) left_pts.append([xl, int(yy)]) right_pts.append([xr, int(yy)]) poly = np.array(left_pts + right_pts[::-1], dtype=np.int32) cv2.fillPoly(track_mask, [poly], 255) # 轨道左右边缘线 mask(给 distanceTransform 用) cv2.polylines(edge_line_mask, [np.array(left_pts, dtype=np.int32)], False, 255, 7, cv2.LINE_AA) cv2.polylines(edge_line_mask, [np.array(right_pts, dtype=np.int32)], False, 255, 7, cv2.LINE_AA) cv2.imwrite(os.path.join(save_dir, "07_track_mask.png"), track_mask) # 可视化拟合直线(是否方向正确) fit_vis = img_bgr.copy() if ok: y_top = int(H * ROI_Y_TOP_RATIO) y_bot = int(H * ROI_Y_BOTTOM_RATIO) def draw_line(line: Line2D, color): x1 = int(np.clip(line.x_at(y_top), 0, W-1)) x2 = int(np.clip(line.x_at(y_bot), 0, W-1)) cv2.line(fit_vis, (x1, y_top), (x2, y_bot), color, 4, cv2.LINE_AA) draw_line(left_line, (255, 0, 0)) # 左边缘:蓝 draw_line(right_line, (0, 0, 255)) # 右边缘:红 # 叠加 track_mask 半透明看区域是否对 overlay = fit_vis.copy() overlay[track_mask > 0] = (0.4*overlay[track_mask > 0] + 0.6*np.array([0, 255, 0])).astype(np.uint8) fit_vis = overlay cv2.imwrite(os.path.join(save_dir, "06_fit_lines.jpg"), fit_vis) return Track2DInfo( ok=ok, roi_mask=roi_mask, dark_mask=dark, boundary_mask=boundary, edges_canny=edges, left_line=left_line, right_line=right_line, track_mask=track_mask, edge_line_mask=edge_line_mask, dbg=dbg ) # ========================= # 点云:投影匹配轨道边缘点 # ========================= def project_points(points_xyz: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray, img_w: int, img_h: int, use_distortion: bool): """ 这里直接调用 fusion.py 里的投影函数(避免重复标定逻辑) """ import fusion as F uv, valid = F.project_points_lidar_to_img(points_xyz, calib, T_lidar2cam, img_w, img_h, use_distortion=use_distortion) return uv, valid def match_edge_points_by_dist_transform(points_xyz: np.ndarray, uv: np.ndarray, valid: np.ndarray, edge_mask: np.ndarray) -> np.ndarray: """ edge_mask: 255=edge 返回匹配到 edge 的点云下标(bool mask) """ H, W = edge_mask.shape[:2] # distanceTransform:0像素为“边缘”,非0为背景 src = np.ones((H, W), dtype=np.uint8) * 255 src[edge_mask > 0] = 0 dist = cv2.distanceTransform(src, cv2.DIST_L2, 5) u = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) u2 = u[inside] v2 = v[inside] d = dist[v2, u2] keep_inside = d <= EDGE_MATCH_PX sel = np.zeros((points_xyz.shape[0],), dtype=bool) idx_inside = np.where(inside)[0] sel[idx_inside[keep_inside]] = True return sel def build_track3d_from_edge_points(edge_pts_xyz: np.ndarray, left_edge_xyz: np.ndarray, right_edge_xyz: np.ndarray) -> Track3DModel: dbg: Dict = {} if edge_pts_xyz is None or edge_pts_xyz.shape[0] < MIN_EDGE_PTS_3D: return Track3DModel( ok=False, origin=np.zeros(3, dtype=np.float32), t=np.array([1, 0, 0], dtype=np.float32), n=np.array([0, 1, 0], dtype=np.float32), up=np.array([0, 0, 1], dtype=np.float32), s_min=0.0, s_max=1.0, rail_half_w=0.5, z_ref=0.0, poly_lz=np.zeros((0, 2), dtype=np.float32), dbg={"err": "not enough edge pts"} ) # PCA in XY xy = edge_pts_xyz[:, :2].astype(np.float64) mu = np.mean(xy, axis=0) X = xy - mu # SVD _, _, vt = np.linalg.svd(X, full_matrices=False) v = vt[0] # principal t_xy = v / (np.linalg.norm(v) + 1e-12) # orient t to roughly +X (optional) if t_xy[0] < 0: t_xy = -t_xy t = np.array([t_xy[0], t_xy[1], 0.0], dtype=np.float32) t = t / (np.linalg.norm(t) + 1e-12) n = np.array([-t[1], t[0], 0.0], dtype=np.float32) n = n / (np.linalg.norm(n) + 1e-12) up = np.array([0.0, 0.0, 1.0], dtype=np.float32) # z_ref from edge points median z_ref = float(np.median(edge_pts_xyz[:, 2])) # origin at mean XY + z_ref origin = np.array([mu[0], mu[1], z_ref], dtype=np.float32) # compute s,l for all edge points (relative to origin) rel = edge_pts_xyz - origin.reshape(1, 3) s = (rel @ t.reshape(3, 1)).reshape(-1) l = (rel @ n.reshape(3, 1)).reshape(-1) # use quantile range to avoid outliers s_min = float(np.quantile(s, 0.02)) s_max = float(np.quantile(s, 0.98)) if s_max - s_min < 3.0: # fallback s_min = float(np.min(s)) s_max = float(np.max(s)) # estimate rail width using left/right edge subsets in lateral coordinate rail_half_w = 0.55 if left_edge_xyz is not None and left_edge_xyz.shape[0] >= 50 and right_edge_xyz is not None and right_edge_xyz.shape[0] >= 50: l_left = ((left_edge_xyz - origin.reshape(1, 3)) @ n.reshape(3, 1)).reshape(-1) l_right = ((right_edge_xyz - origin.reshape(1, 3)) @ n.reshape(3, 1)).reshape(-1) # robust medians med_l = float(np.median(l_left)) med_r = float(np.median(l_right)) # ensure left < right if med_l > med_r: med_l, med_r = med_r, med_l center_offset = 0.5 * (med_l + med_r) rail_half_w = 0.5 * (med_r - med_l) # shift origin to make centerline l=0 origin = origin + n * center_offset dbg["med_l"] = med_l dbg["med_r"] = med_r dbg["center_offset"] = center_offset dbg["rail_half_w"] = rail_half_w dbg["z_ref"] = z_ref dbg["s_min"] = s_min dbg["s_max"] = s_max dbg["t_xy"] = t_xy.tolist() # build octagon cross-section in (l, z_rel) # top at z_rel=0 (轨面),top width = rail_half_w + margin w0 = float(max(rail_half_w + CLEAR_TOP_MARGIN_M, 0.2)) # max half width based on train width + safety w1 = float(max(TRAIN_MAX_WIDTH_M * 0.5 + CLEAR_SIDE_MARGIN_M, w0 + 0.15)) H_clear = float(TRAIN_BOTTOM_TO_RAIL_M + CLEAR_HEIGHT_EXTRA_M) ht = float(min(OCT_CHAMFER_TOP_H, max(0.05, 0.25 * H_clear))) hb = float(min(OCT_CHAMFER_BOTTOM_H, max(0.05, 0.25 * H_clear))) z0 = 0.0 z1 = -ht z2 = -(H_clear - hb) z3 = -H_clear poly = np.array([ [-w0, z0], [ w0, z0], [ w1, z1], [ w1, z2], [ w0, z3], [-w0, z3], [-w1, z2], [-w1, z1], ], dtype=np.float32) return Track3DModel( ok=True, origin=origin.astype(np.float32), t=t, n=n, up=up, s_min=s_min, s_max=s_max, rail_half_w=float(rail_half_w), z_ref=float(z_ref), poly_lz=poly, dbg=dbg ) # ========================= # 限界 wireframe 生成与可视化 # ========================= def clearance_vertices_world(track: Track3DModel, s: float) -> np.ndarray: """ 返回该 s 截面的 8 个顶点 (8,3) in lidar/world frame """ base = track.origin + track.t * s # base at z_ref already verts = [] for l, z_rel in track.poly_lz: p = base + track.n * float(l) + track.up * float(z_rel) verts.append(p) return np.stack(verts, axis=0).astype(np.float32) def clearance_wireframe(track: Track3DModel, num_slices: int = 12) -> Tuple[np.ndarray, List[Tuple[int, int]]]: """ 返回: V: (num_slices*8, 3) 顶点 E: 边列表(顶点索引对) """ ss = np.linspace(track.s_min, track.s_max, num_slices).astype(np.float32) Vs = [] for s in ss: Vs.append(clearance_vertices_world(track, float(s))) V = np.concatenate(Vs, axis=0) E = [] # 每个截面内部的 8 边 for k in range(num_slices): base = k * 8 for i in range(8): E.append((base + i, base + (i+1) % 8)) # 相邻截面连接 for k in range(num_slices - 1): base0 = k * 8 base1 = (k+1) * 8 for i in range(8): E.append((base0 + i, base1 + i)) return V, E def overlay_clearance_on_bev(bev_img: np.ndarray, track: Track3DModel, x_min: float, x_max: float, y_min: float, y_max: float, res: float, header_h: int, color=(0, 255, 255), thick: int = 2): """ 在 fusion.py 的 BEV 图上叠加限界线框(投影到 X-Y)。 bev_img 是 BGR,已经包含 header。 """ if not track.ok: return V, E = clearance_wireframe(track, num_slices=14) # map world (x,y) -> pixel (row,col) # rr = (x_max - x)/res ; cc = (y - y_min)/res H_map = bev_img.shape[0] - header_h W_map = bev_img.shape[1] for (i, j) in E: p = V[i]; q = V[j] x1, y1 = float(p[0]), float(p[1]) x2, y2 = float(q[0]), float(q[1]) rr1 = int((x_max - x1) / res) cc1 = int((y1 - y_min) / res) rr2 = int((x_max - x2) / res) cc2 = int((y2 - y_min) / res) rr1 = int(np.clip(rr1, 0, H_map-1)) + header_h rr2 = int(np.clip(rr2, 0, H_map-1)) + header_h cc1 = int(np.clip(cc1, 0, W_map-1)) cc2 = int(np.clip(cc2, 0, W_map-1)) cv2.line(bev_img, (cc1, rr1), (cc2, rr2), color, thick, cv2.LINE_AA) def overlay_clearance_on_front_xz(front_img: np.ndarray, track: Track3DModel, x_min: float, x_max: float, z_min: float, z_max: float, res: float, header_h: int, color=(0, 255, 255), thick: int = 2): """ 在 fusion.py 的 FRONT(X-Z) 图上叠加限界线框(投影到 X-Z)。 注意:如果轨道方向不是严格沿 X,这个视角会有一定“错觉”,但仍能看到高度/宽度范围。 """ if not track.ok: return V, E = clearance_wireframe(track, num_slices=14) H_map = front_img.shape[0] - header_h W_map = front_img.shape[1] for (i, j) in E: p = V[i]; q = V[j] x1, z1 = float(p[0]), float(p[2]) x2, z2 = float(q[0]), float(q[2]) cc1 = int((x1 - x_min) / res) rr1 = int((z_max - z1) / res) cc2 = int((x2 - x_min) / res) rr2 = int((z_max - z2) / res) cc1 = int(np.clip(cc1, 0, W_map-1)) cc2 = int(np.clip(cc2, 0, W_map-1)) rr1 = int(np.clip(rr1, 0, H_map-1)) + header_h rr2 = int(np.clip(rr2, 0, H_map-1)) + header_h cv2.line(front_img, (cc1, rr1), (cc2, rr2), color, thick, cv2.LINE_AA) def render_3d_scene(points_xyz: np.ndarray, track: Track3DModel, det_boxes7: List[np.ndarray], out_png: str): """ 用 matplotlib 画一个 3D 截图(点云 + 限界线框 + 检测框线框) """ try: import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # noqa: F401 except Exception as e: print(f"[WARN] matplotlib not available, skip 3d render: {e}") return # sample points pts = points_xyz if pts.shape[0] > 20000: idx = np.random.choice(pts.shape[0], 20000, replace=False) pts = pts[idx] fig = plt.figure(figsize=(10, 8)) ax = fig.add_subplot(111, projection='3d') ax.scatter(pts[:, 0], pts[:, 1], pts[:, 2], s=0.4) # clearance wireframe if track.ok: V, E = clearance_wireframe(track, num_slices=18) for (i, j) in E: p = V[i]; q = V[j] ax.plot([p[0], q[0]], [p[1], q[1]], [p[2], q[2]]) # det boxes wireframe (just edges) if det_boxes7: import fusion as F boxes = np.stack(det_boxes7, axis=0).astype(np.float32) corners = F.boxes3d_to_corners(boxes) # (N,8,3) # box edges indices edges = [(0, 1), (1, 2), (2, 3), (3, 0), (4, 5), (5, 6), (6, 7), (7, 4), (0, 4), (1, 5), (2, 6), (3, 7)] for c in corners: for (i, j) in edges: p = c[i]; q = c[j] ax.plot([p[0], q[0]], [p[1], q[1]], [p[2], q[2]]) # axis equal-ish def set_axes_equal(ax): x_limits = ax.get_xlim3d() y_limits = ax.get_ylim3d() z_limits = ax.get_zlim3d() x_range = abs(x_limits[1] - x_limits[0]) x_middle = np.mean(x_limits) y_range = abs(y_limits[1] - y_limits[0]) y_middle = np.mean(y_limits) z_range = abs(z_limits[1] - z_limits[0]) z_middle = np.mean(z_limits) plot_radius = 0.5 * max([x_range, y_range, z_range]) ax.set_xlim3d([x_middle - plot_radius, x_middle + plot_radius]) ax.set_ylim3d([y_middle - plot_radius, y_middle + plot_radius]) ax.set_zlim3d([z_middle - plot_radius, z_middle + plot_radius]) set_axes_equal(ax) ax.set_xlabel("X") ax.set_ylabel("Y") ax.set_zlabel("Z") plt.tight_layout() fig.savefig(out_png, dpi=160) plt.close(fig) # ========================= # 侵界判定 # ========================= def transform_world_to_track(track: Track3DModel, pts_xyz: np.ndarray) -> np.ndarray: """ world(x,y,z) -> track coords (s,l,z_rel) """ rel = pts_xyz - track.origin.reshape(1, 3) s = rel @ track.t.reshape(3, 1) l = rel @ track.n.reshape(3, 1) z = rel @ track.up.reshape(3, 1) return np.concatenate([s, l, z], axis=1).astype(np.float32) def intrusion_check_for_box(track: Track3DModel, box7: np.ndarray, det_name: str, det_score: float) -> IntrusionResult: import fusion as F corners = F.boxes3d_to_corners(box7.reshape(1, 7).astype(np.float32))[0] # (8,3) corners_slz = transform_world_to_track(track, corners) # (8,3) (s,l,z) s_vals = corners_slz[:, 0] lz = corners_slz[:, 1:3] # (8,2) det_s_min = float(np.min(s_vals)) det_s_max = float(np.max(s_vals)) det_s_len = max(1e-6, det_s_max - det_s_min) # overlap along track direction s0 = max(det_s_min, track.s_min) s1 = min(det_s_max, track.s_max) s_overlap = max(0.0, s1 - s0) s_ratio = float(s_overlap / det_s_len) # cross-section intersection in (l,z) det_poly = convex_hull(lz) det_area = polygon_area(det_poly) clear_poly = ensure_ccw(track.poly_lz.copy()) inter_poly = convex_polygon_intersection(det_poly, clear_poly) if det_area > 0 else np.zeros((0, 2), dtype=np.float32) inter_area = polygon_area(inter_poly) area_ratio = float(inter_area / (det_area + 1e-9)) if det_area > 0 else 0.0 volume_ratio = float(area_ratio * s_ratio) # center point check center_xyz = box7[:3].reshape(1, 3).astype(np.float32) center_slz = transform_world_to_track(track, center_xyz)[0] center_lz = center_slz[1:3] center_inside = (track.s_min <= float(center_slz[0]) <= track.s_max) and point_in_convex_polygon(center_lz, clear_poly) min_sd_center = float(signed_distance_to_convex_polygon(center_lz, clear_poly)) # 综合判定逻辑 + reason intrude = False reason = "" if s_overlap <= 1e-4: intrude = False reason = "no_s_overlap" else: if INTRUDE_CENTER_INSIDE_FORCE and center_inside: intrude = True reason = "center_inside" elif volume_ratio >= INTRUDE_VOL_RATIO_THR: intrude = True reason = f"volume_ratio>={INTRUDE_VOL_RATIO_THR:.2f}" elif area_ratio >= INTRUDE_AREA_RATIO_THR and s_ratio >= 0.25: intrude = True reason = f"area_ratio>={INTRUDE_AREA_RATIO_THR:.2f}&s_ratio>=0.25" else: intrude = False reason = "below_thresholds" return IntrusionResult( det_name=det_name, det_score=float(det_score), s_overlap=float(s_overlap), s_ratio=float(s_ratio), area_ratio=float(area_ratio), volume_ratio=float(volume_ratio), center_inside=bool(center_inside), min_signed_dist_center=float(min_sd_center), intrude=bool(intrude), reason=reason ) # ========================= # 主流程:单帧跑通 + 保存所有中间图 # ========================= def parse_args(): ap = argparse.ArgumentParser() ap.add_argument("--frame_id", type=str, default="", help="指定帧号(不含扩展名),例如 000123") ap.add_argument("--out_dir", type=str, default="debug_intrusion", help="输出目录") ap.add_argument("--seed", type=int, default=42) return ap.parse_args() def main(): args = parse_args() random.seed(args.seed) np.random.seed(args.seed) import fusion as F os.makedirs(args.out_dir, exist_ok=True) # 1) load models (YOLO + PCDet) device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = F.load_models() # 2) choose one frame bin_files = sorted(list(Path(F.PCDET_POINTS_DIR).glob("*.bin"))) frame_ids_all = [p.stem for p in bin_files] valid_ids = [fid for fid in frame_ids_all if F.find_image_for_frame(fid) is not None] if not valid_ids: print("[ERROR] No matching (pointcloud, image) pairs found.") return idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} if args.frame_id: fid = args.frame_id if fid not in valid_ids: print(f"[ERROR] frame_id={fid} not found or no image match.") print(f" valid examples: {valid_ids[:5]} ... (total {len(valid_ids)})") return else: fid = random.choice(valid_ids) if fid not in idx_map: print(f"[ERROR] frame_id={fid} not in dataset index.") return frame_dir = os.path.join(args.out_dir, fid) os.makedirs(frame_dir, exist_ok=True) print(f"[FRAME] {fid} -> {frame_dir}") # 3) load image & infer img_path = F.find_image_for_frame(fid) img = cv2.imread(img_path) if img is None: print(f"[ERROR] cannot read image: {img_path}") return H, W = img.shape[:2] # 轨道提取(保存中间图) track2d = extract_track_edges_from_image(img, frame_dir) if not track2d.ok: print("[WARN] track2d extraction failed (left/right line not found). You can inspect debug images.") else: print(f"[TRACK2D] ok | thr_v={track2d.dbg.get('thr_v')} | hough_n={track2d.dbg.get('hough_n')}") # YOLO & PCDet yolo_dets = F.infer_yolo(yolo_model, img) frame_id, raw_points, pcdet_dets = F.infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if raw_points is None or raw_points.shape[0] == 0: print("[ERROR] raw_points missing.") return # extrinsic choose chosen_T = F.get_extrinsic_matrix(F.CALIB, raw_points[:, :3], W, H) # fuse fused_dets = F.fuse_frame(yolo_dets, pcdet_dets, W, H, F.CALIB, chosen_T, raw_points) fused_keep = [d for d in fused_dets if d.score >= 0.10] print(f"[DETS] yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused_dets)} keep={len(fused_keep)}") # 4) project point cloud to image pts_xyz = raw_points[:, :3].astype(np.float32) uv, valid = project_points(pts_xyz, F.CALIB, chosen_T, W, H, use_distortion=F.USE_DISTORTION) # proj all points overlay (debug) proj_vis = img.copy() uu = np.round(uv[:, 0]).astype(np.int32) vv = np.round(uv[:, 1]).astype(np.int32) inside = valid & (uu >= 0) & (uu < W) & (vv >= 0) & (vv < H) uu2 = uu[inside]; vv2 = vv[inside] for x, y in zip(uu2[::10], vv2[::10]): # downsample draw cv2.circle(proj_vis, (int(x), int(y)), 1, (200, 200, 200), -1, cv2.LINE_AA) # overlay track mask if track2d.track_mask is not None and track2d.track_mask.sum() > 0: m = track2d.track_mask > 0 proj_vis[m] = (0.45*proj_vis[m] + 0.55*np.array([0, 255, 0])).astype(np.uint8) cv2.imwrite(os.path.join(frame_dir, "08_proj_all_points.jpg"), proj_vis) # 5) match edge points edge_mask = track2d.edge_line_mask.copy() # 如果没有线 mask,退化用 boundary if edge_mask.sum() == 0: edge_mask = track2d.boundary_mask.copy() sel_edge = match_edge_points_by_dist_transform(pts_xyz, uv, valid, edge_mask) edge_pts_xyz = pts_xyz[sel_edge] # 分左右(用 2D 直线中线分) left_xyz = np.zeros((0, 3), dtype=np.float32) right_xyz = np.zeros((0, 3), dtype=np.float32) if track2d.ok: left_line = track2d.left_line right_line = track2d.right_line u_edge = uu[sel_edge] v_edge = vv[sel_edge] # midline x = (xl(y)+xr(y))/2 xl = np.array([left_line.x_at(float(y)) for y in v_edge], dtype=np.float32) xr = np.array([right_line.x_at(float(y)) for y in v_edge], dtype=np.float32) xm = 0.5 * (xl + xr) is_left = u_edge.astype(np.float32) < xm left_xyz = edge_pts_xyz[is_left] right_xyz = edge_pts_xyz[~is_left] print(f"[EDGE3D] total={edge_pts_xyz.shape[0]} left={left_xyz.shape[0]} right={right_xyz.shape[0]}") # proj edge pts overlay proj_edge = img.copy() # draw fitted lines again if track2d.ok: y_top = int(H * ROI_Y_TOP_RATIO) y_bot = int(H * ROI_Y_BOTTOM_RATIO) for ln, col in [(track2d.left_line, (255, 0, 0)), (track2d.right_line, (0, 0, 255))]: x1 = int(np.clip(ln.x_at(y_top), 0, W-1)) x2 = int(np.clip(ln.x_at(y_bot), 0, W-1)) cv2.line(proj_edge, (x1, y_top), (x2, y_bot), col, 4, cv2.LINE_AA) # draw edge points for x, y in zip(uu[sel_edge][::2], vv[sel_edge][::2]): cv2.circle(proj_edge, (int(x), int(y)), 1, (0, 255, 255), -1, cv2.LINE_AA) cv2.imwrite(os.path.join(frame_dir, "09_proj_edge_points.jpg"), proj_edge) # 6) build 3D track + clearance track3d = build_track3d_from_edge_points(edge_pts_xyz, left_xyz, right_xyz) if not track3d.ok: print("[ERROR] track3d build failed: not enough matched edge pts.") with open(os.path.join(frame_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: f.write("track3d build failed\n") return print("[TRACK3D]") print(f" origin={track3d.origin.tolist()}") print(f" t={track3d.t.tolist()} n={track3d.n.tolist()} up={track3d.up.tolist()}") print(f" s_range=[{track3d.s_min:.2f},{track3d.s_max:.2f}] rail_half_w={track3d.rail_half_w:.2f} z_ref={track3d.z_ref:.2f}") print(f" octagon(l,z_rel)=\n{track3d.poly_lz}") # 7) intrusion check on fused dets results: List[IntrusionResult] = [] for d in fused_keep: r = intrusion_check_for_box(track3d, d.box7.copy(), det_name=d.cls_name, det_score=d.score) results.append(r) # print metrics print("\n========== Intrusion Metrics ==========") if not results: print("No fused detections (score>=0.10).") for i, r in enumerate(results): print(f"[{i}] {r.det_name} score={r.det_score:.2f} intrude={r.intrude} ({r.reason})") print(f" s_overlap={r.s_overlap:.2f} s_ratio={r.s_ratio:.3f}") print(f" area_ratio={r.area_ratio:.3f} volume_ratio={r.volume_ratio:.3f}") print(f" center_inside={r.center_inside} signed_dist_center={r.min_signed_dist_center:.3f} m") # save metrics metrics = { "frame_id": fid, "track2d_ok": bool(track2d.ok), "track2d_dbg": track2d.dbg, "track3d_dbg": track3d.dbg, "intrusion_rules": { "INTRUDE_VOL_RATIO_THR": INTRUDE_VOL_RATIO_THR, "INTRUDE_AREA_RATIO_THR": INTRUDE_AREA_RATIO_THR, "INTRUDE_CENTER_INSIDE_FORCE": INTRUDE_CENTER_INSIDE_FORCE, }, "detections": [ { "cls": r.det_name, "score": r.det_score, "intrude": r.intrude, "reason": r.reason, "s_overlap": r.s_overlap, "s_ratio": r.s_ratio, "area_ratio": r.area_ratio, "volume_ratio": r.volume_ratio, "center_inside": r.center_inside, "signed_dist_center": r.min_signed_dist_center, } for r in results ] } with open(os.path.join(frame_dir, "intrusion_metrics.json"), "w", encoding="utf-8") as f: json.dump(metrics, f, ensure_ascii=False, indent=2) with open(os.path.join(frame_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: for i, r in enumerate(results): f.write(f"[{i}] {r.det_name} score={r.det_score:.2f} intrude={r.intrude} ({r.reason})\n") f.write(f" s_overlap={r.s_overlap:.2f} s_ratio={r.s_ratio:.3f}\n") f.write(f" area_ratio={r.area_ratio:.3f} volume_ratio={r.volume_ratio:.3f}\n") f.write(f" center_inside={r.center_inside} signed_dist_center={r.min_signed_dist_center:.3f} m\n") # 8) save BEV/FRONT with clearance overlay (复用 fusion.py 的渲染) p_top = max(pcdet_dets, key=lambda x: x.score) if pcdet_dets else None f_top = max(fused_keep, key=lambda x: x.score) if fused_keep else None bev = F.make_bev_image(raw_points, p_top, f_top) # 得到该 BEV 的 window 参数,以便叠加线框 x_min, x_max, y_min, y_max = F.compute_bev_window(pts_xyz, p_top, f_top) overlay_clearance_on_bev(bev, track3d, x_min, x_max, y_min, y_max, res=F.BEV_RESOLUTION, header_h=F.BEV_HEADER_H, color=(0, 255, 255), thick=2) cv2.imwrite(os.path.join(frame_dir, "10_clearance_bev.jpg"), bev) front = F.make_front_xz_crop(raw_points, p_top, f_top) # 同理拿到 FRONT window 参数 fxmin, fxmax, fzmin, fzmax, _, _ = F.compute_front_window(pts_xyz, p_top, f_top) overlay_clearance_on_front_xz(front, track3d, fxmin, fxmax, fzmin, fzmax, res=F.FRONT_RESOLUTION, header_h=F.FRONT_HEADER_H, color=(0, 255, 255), thick=2) cv2.imwrite(os.path.join(frame_dir, "11_clearance_front.jpg"), front) # 9) 3D screenshot(点云+限界+检测框) det_boxes7 = [d.box7.copy() for d in fused_keep] render_3d_scene(pts_xyz, track3d, det_boxes7, os.path.join(frame_dir, "12_clearance_3d.png")) print(f"\n[INFO] Done. All debug images saved in: {frame_dir}") if __name__ == "__main__": main()
你提到“我看不到你提取的二维轨道区域”“限界方向错”,现在你可以按这个顺序检查:
06_fit_lines.jpg09_proj_edge_points.jpgEDGE_MATCH_PX / ROI / 黑色阈值参数)12_clearance_3d.png + 10/11如果你跑完后把某一帧的 06_fit_lines.jpg 和 09_proj_edge_points.jpg 的效果描述一下(比如“左边缘线明显偏到轨枕上/轨外”等),我可以直接告诉你该调哪些参数(ROI、黑色阈值、边界提取方式,或者用“曲率情况下的分段中心线拟合”替代直线 PCA)。
我现在用红色线标记的就是轨道区域,你要去总结他的特征去重新找寻这个区域,我现在通过看你的中间过程,这个操目前是错误的。目前代码报错如下:[INFO] Loading PCDet...
[PCDet][COMPAT] Registered map_to_bev_module.proj/bn: Conv2d(128->256, k=1x1)
[PCDet] load_state_dict done. missing=0 unexpected=0
[PCDet] CLASS_NAMES = ['Drone', 'Plastic_sheet', 'Kite', 'Balloon', 'Bird']
[FRAME] 2025042116142702 -> debug_intrusion/2025042116142702
[WARN] track2d extraction failed (left/right line not found). You can inspect debug images.
[CALIB] auto | lidar2cam_inlier=1.000 cam2lidar_inlier=0.224 -> choose lidar2cam
[DETS] yolo=1 pcdet=1 fused=1 keep=1
[EDGE3D] total=132 left=0 right=0
[ERROR] track3d build failed: not enough matched edge pts.
现在这个轨道区域分割我使用语义分割算法进行分割轨道,我的轨道区域语义分割训练好的数据包括两个类别,| Class | IoU | Acc |
+--------------+-------+-------+
| background | 99.7 | 99.85 |
| steel rail | 93.55 | 96.58 |,我现在也有使用pidnet这个训练好的权重文件,/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth,/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/last_checkpoint以及配置文件/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py,通过这个代码可以准确分割轨道区域,然后你可以继续参考我之前写过的一个识别轨道点云构建限界的代码:#!/usr/bin/env python
"""
xj.py — 悬挂式轨道板限界生成(弱监督分割 + 原有限界流程)(最终版)
运行 python xxx.py <annotated_dir> --learn 完成模板学习与分割器训练;然后 python xj.py <unlabeled_dir> 进行限界生成。
"""
import os, json, argparse, warnings
import numpy as np
import open3d as o3d
from scipy.interpolate import splprep, splev
from scipy.spatial import KDTree as SciPyKDTree
from sklearn.neighbors import KDTree
from sklearn.decomposition import PCA
from sklearn.exceptions import ConvergenceWarning
from sklearn.ensemble import RandomForestClassifier
from joblib import dump, load
FEATURE_FILE = "track_model_features.json"
CLF_FILE = "track_point_clf.pkl"
N_SLICES_PER_FILE = 100
MIN_SLICE_PTS = 40
DBSCAN_EPS_CLUST = 0.25
DBSCAN_MIN_PTS_CLUST = 10
CENTERLINE_PTS_CNT = 160
CENTERLINE_WIN = 120
DUP_PT_TH = 0.06
CENTERLINE_SMOOTH = 300
ENVELOPE_SLICES = 2000
MESH_SMOOTH_ITER = 300
MAIN_CLU_EPS = 1.0
MAIN_CLU_MIN_PTS = 80
AUTO_SLICE_N = 100
W_WIDTH, W_HEIGHT, W_DENS, W_NORM = 0.4, 0.3, 0.2, 0.1
CONV_THR = 0.35
FALLBACK_WINDOW = 1
ADAPT_EPS_SCALE = 1.5
ADAPT_MINPTS_DIV = 2
OUTLIER_RADIUS, OUTLIER_NB = 0.25, 6
NOISE_RADIUS, SPARSE_PTS_TH = 0.8, 5
UP_VEC = np.array([0,0,1.0])
TRACK_NAMES = {"track", "轨道", "轨道板", "rail", "railway", "轨"}
def find_track_id(ann):
"""从标注的 index_category_dict(或类似键)动态找出“轨道”类ID;找不到时回退为 1。"""
mapping = ann.get("index_category_dict") or ann.get("index_to_category") or {}
# 键可能是字符串数字;值是类名
for k, v in mapping.items():
if isinstance(v, str) and v.strip().lower() in TRACK_NAMES:
try:
return int(k)
except:
pass
# 若只有两个类且包含 "1",通常轨道是1
if "1" in mapping:
return 1
# 实在找不到就保底 1(你的数据正是1)
return 1
def get_point_labels(ann, n_points):
"""稳健读取逐点标签数组,兼容多种键名;长度不一致返回 None。"""
for key in ("categorys", "categories", "labels", "point_categories", "points_category"):
if key in ann:
arr = ann[key]
try:
a = np.asarray(arr, dtype=int)
except:
a = np.asarray([int(x) for x in arr])
if a.shape[0] == n_points:
return a
return None
def load_data(pcd_path, jpath=None):
pcd = o3d.io.read_point_cloud(pcd_path)
ann = None
if jpath and os.path.isfile(jpath):
ann = json.load(open(jpath,'r',encoding='utf-8'))
return pcd, ann
def extract_track_pcd(pcd, ann):
# 若无标注,交由后续自动检测处理
if ann is None:
return None
labels = get_point_labels(ann, len(pcd.points))
if labels is None:
return None
track_id = find_track_id(ann)
ids = np.where(labels == track_id)[0]
return pcd.select_by_index(ids.tolist()) if ids.size > 0 else None
def pca_axis(pts):
try:
comp = PCA(3).fit(pts).components_
ax = comp[0]
if abs(ax.dot(UP_VEC))>0.9:
ax = comp[1]
return ax/np.linalg.norm(ax)
except:
return np.array([0,1,0])
def cluster_dbscan(pcd, eps, min_pts):
with warnings.catch_warnings():
warnings.simplefilter("ignore", ConvergenceWarning)
return np.array(pcd.cluster_dbscan(eps=eps, min_points=min_pts))
def remove_radius_outliers(pcd):
cl,_ = pcd.remove_radius_outlier(nb_points=OUTLIER_NB, radius=OUTLIER_RADIUS)
return cl
def purge_sparse_noise(trk_pcd):
pts = np.asarray(trk_pcd.points)
if len(pts)<300: return trk_pcd
lab = cluster_dbscan(trk_pcd, MAIN_CLU_EPS, MAIN_CLU_MIN_PTS)
core = np.where(lab!=-1)[0]
if core.size==0: return trk_pcd
center = pts[core].mean(0)
keep=[]
for l in np.unique(lab):
idx = np.where(lab==l)[0]
if len(idx)<=SPARSE_PTS_TH and np.linalg.norm(pts[idx]-center,axis=1).min()>NOISE_RADIUS:
continue
keep += idx.tolist()
return trk_pcd.select_by_index(keep)
def compute_point_features(pts):
pcd = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(pts))
pcd.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(0.1,30))
normals = np.asarray(pcd.normals)
z_rel = pts[:,2] - np.median(pts[:,2])
texttree = KDTree(pts) dists, idxs = tree.query(pts, k=20) curv = np.zeros(len(pts),dtype=float) for i in range(len(pts)): nb = pts[idxs[i]] cov = np.cov(nb.T) eig,_ = np.linalg.eigh(cov) lam = np.sort(eig) curv[i] = lam[0]/(lam.sum()+1e-9) neighbors = tree.query_radius(pts, r=0.1) dens = np.array([len(nb) for nb in neighbors], dtype=float) return np.vstack([z_rel, normals[:,2], curv, dens]).T
def train_point_classifier(root):
Xs, ys = [], []
print(">>> 训练点云分割分类器")
for dp,_,fs in os.walk(root):
for fn in fs:
if not fn.lower().endswith(('.ply','.pcd')): continue
path = os.path.join(dp,fn)
pcd,ann = load_data(path, os.path.splitext(path)[0]+'.json')
if ann is None:
continue
pts = np.asarray(pcd.points)
labels_all = get_point_labels(ann, len(pts))
if labels_all is None:
continue
track_id = find_track_id(ann)
textfeats = compute_point_features(pts) labels_bin = (labels_all == track_id).astype(int) idx1, idx0 = np.where(labels_bin==1)[0], np.where(labels_bin==0)[0] if idx1.size == 0 or idx0.size == 0: continue np.random.shuffle(idx1); np.random.shuffle(idx0) sel = np.concatenate([idx1[:min(2000, idx1.size)], idx0[:min(2000, idx0.size)]]) Xs.append(feats[sel]); ys.append(labels_bin[sel]) if not Xs: raise RuntimeError("未找到可用于训练的带逐点标签的标注文件,无法训练分割器。") X = np.vstack(Xs); y = np.concatenate(ys) clf = RandomForestClassifier(n_estimators=50, random_state=0) clf.fit(X,y) dump(clf, CLF_FILE) print(f">>> 分类器保存到 {CLF_FILE}")
def classify_points(pcd):
# 若分类器不存在,返回空,触发回退流程
if not os.path.isfile(CLF_FILE):
return np.array([], dtype=int)
pts = np.asarray(pcd.points)
feats = compute_point_features(pts)
clf = load(CLF_FILE)
prob = clf.predict_proba(feats)[:,1]
return np.where(prob>0.5)[0]
def slice_features(points, axis, z0, z1, voxel_size=0.05):
seg = points[(points@axis>=z0)&(points@axis<z1)]
if len(seg)<MIN_SLICE_PTS:
return np.nan,np.nan,np.nan,np.nan
p = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(seg))
lab=cluster_dbscan(p, DBSCAN_EPS_CLUST, DBSCAN_MIN_PTS_CLUST)
cls=[p.select_by_index(np.where(lab==l)[0]) for l in np.unique(lab) if l!=-1]
if len(cls)<2:
width=np.nan
else:
cls.sort(key=lambda c:len(c.points), reverse=True)
a1,a2=cls[0].get_axis_aligned_bounding_box(),cls[1].get_axis_aligned_bounding_box()
w1,w2=a1.get_extent()[0],a2.get_extent()[0]
gap=abs(a1.get_center()[0]-a2.get_center()[0])-w1/2-w2/2
width=2*max(w1,w2)+gap if gap>0 else np.nan
height=np.median(seg[:,2])
down=p.voxel_down_sample(voxel_size)
density=len(seg)/len(down.points) if len(down.points) else np.nan
p.estimate_normals(search_param=o3d.geometry.KDTreeSearchParamHybrid(0.1,30))
flatness=1.0-np.abs(np.asarray(p.normals)[:,2]).mean()
return width,height,density,flatness
def learn_features(root):
Ws,Hs,Ds,Ns=[],[],[],[]
print("=== 阶段1:学习轨道模板 ===")
for dp,_,fs in os.walk(root):
for fn in fs:
if not fn.lower().endswith(('.ply','.pcd')): continue
path=os.path.join(dp,fn); jp=os.path.splitext(path)[0]+'.json'
if not os.path.isfile(jp): continue
pcd,ann=load_data(path,jp); trk=extract_track_pcd(pcd,ann)
if trk is None or trk.is_empty(): continue
pts=np.asarray(trk.points)
if len(pts)<300: continue
ax=pca_axis(pts); proj=pts@ax
lo,hi=proj.min(),proj.max()
w,h,d,n=[],[],[],[]
for z0,z1 in zip(np.linspace(lo,hi,N_SLICES_PER_FILE,endpoint=False),
np.linspace(lo,hi,N_SLICES_PER_FILE+1)[1:]):
wf,hf,df,nf=slice_features(pts,ax,z0,z1)
w.append(wf); h.append(hf); d.append(df); n.append(nf)
Ws.append(w); Hs.append(h); Ds.append(d); Ns.append(n)
if not Ws:
raise RuntimeError("未找到有效标注文件用于模板学习。")
tpl_w=np.nanmedian(np.vstack([np.nan_to_num(s,nan=np.nanmedian(s)) for s in Ws]),axis=0).tolist()
tpl_h=np.nanmedian(np.vstack(Hs),axis=0).tolist()
tpl_d=np.nanmedian(np.vstack(Ds),axis=0).tolist()
tpl_n=np.nanmedian(np.vstack(Ns),axis=0).tolist()
std_top=float(np.nanmedian(tpl_w))
json.dump({
"standard_top_width":std_top,
"tpl_width":tpl_w,
"tpl_height":tpl_h,
"tpl_dens":tpl_d,
"tpl_norm":tpl_n
}, open(FEATURE_FILE,'w'), indent=2)
print(f"√ 模板保存到 {FEATURE_FILE}")
train_point_classifier(root)
def auto_detect_track(pcd, cfg):
idx=classify_points(pcd)
if len(idx)>=300:
return pcd.select_by_index(idx)
textstd_top=cfg["standard_top_width"] tpl_w=np.array(cfg["tpl_width"]); tpl_w = tpl_w/(np.linalg.norm(tpl_w)+1e-9) tpl_h0=np.array(cfg["tpl_height"]); tpl_h=(tpl_h0-tpl_h0.mean())/(tpl_h0.std()+1e-6) tpl_d=np.array(cfg["tpl_dens"]); tpl_d = tpl_d/(np.linalg.norm(tpl_d)+1e-9) tpl_n=np.array(cfg["tpl_norm"]); tpl_n = tpl_n/(np.linalg.norm(tpl_n)+1e-9) pts=np.asarray(pcd.points) ax=pca_axis(pts); proj=pts@ax lo,hi=proj.min(),proj.max() W,H,D,N,slices=[],[],[],[],[] for z0,z1 in zip(np.linspace(lo,hi,AUTO_SLICE_N,endpoint=False), np.linspace(lo,hi,AUTO_SLICE_N+1)[1:]): w,h,d,n=slice_features(pts,ax,z0,z1) W.append(np.nan_to_num(w,nan=std_top)); H.append(h); D.append(d); N.append(n) slices.append((z0,z1)) W,H,D,N = map(np.asarray,(W,H,D,N)) Wn=(W-np.nanmean(W))/(np.nanstd(W)+1e-6) Hn=(H-np.nanmean(H))/(np.nanstd(H)+1e-6) Dn=D/(np.linalg.norm(D)+1e-6) Nn=N/(np.linalg.norm(N)+1e-6) score=(W_WIDTH*np.correlate(Wn,tpl_w,"same")+ W_HEIGHT*np.correlate(Hn,tpl_h,"same")+ W_DENS*np.correlate(Dn,tpl_d,"same")+ W_NORM*np.correlate(Nn,tpl_n,"same"))/(W_WIDTH+W_HEIGHT+W_DENS+W_NORM) idxs=np.where(score>CONV_THR)[0] if idxs.size==0: pk=np.argmax(score) start=max(0,pk-FALLBACK_WINDOW); end=min(len(score)-1,pk+FALLBACK_WINDOW) idxs=np.arange(start,end+1) zmin,zmax=slices[idxs[0]][0],slices[idxs[-1]][1] cand=pts[(proj>=zmin)&(proj<=zmax)] cand_pcd=o3d.geometry.PointCloud(o3d.utility.Vector3dVector(cand)) eps,mp=MAIN_CLU_EPS,MAIN_CLU_MIN_PTS for _ in range(2): lab=cluster_dbscan(cand_pcd,eps,mp) val=[l for l in np.unique(lab) if l!=-1] if val: best=max(val,key=lambda l:(lab==l).sum()) return cand_pcd.select_by_index(np.where(lab==best)[0]) eps*=ADAPT_EPS_SCALE; mp=max(5,mp//ADAPT_MINPTS_DIV) return None
def build_centerline(trk):
pts=np.asarray(trk.points)
axis=pca_axis(pts); proj=pts@axis
ord_pts=pts[np.argsort(proj)]
win=max(CENTERLINE_WIN,len(ord_pts)//CENTERLINE_PTS_CNT)
step=max(1,win//4)
cents=[ord_pts[i:i+win].mean(0) for i in range(0,len(ord_pts)-win,step)]
uniq=[cents[0]]
for p in cents[1:]:
if np.linalg.norm(p-uniq[-1])>DUP_PT_TH:
uniq.append(p)
if len(uniq)<2:
c=pts.mean(0); uniq=[c-axis0.6, c+axis0.6]
if len(uniq)<4:
cl=np.linspace(uniq[0],uniq[-1],ENVELOPE_SLICES)
hz=np.full(len(cl),pts[:,2].min())
return cl,hz
textarr=np.array(uniq).T try: tck,u=splprep(arr, s=CENTERLINE_SMOOTH, k=3) cl=np.array(splev(np.linspace(u.min(),u.max(),ENVELOPE_SLICES),tck)).T except ValueError: cl = np.linspace(uniq[0], uniq[-1], ENVELOPE_SLICES) tree = SciPyKDTree(pts) _, idxs = tree.query(cl) hz = pts[idxs,2] return cl, hz
def build_mesh(cl,hz,top_w,env_h):
bot_w=top_w1.2; up=UP_VEC; verts=[]
for i,p in enumerate(cl):
tan=cl[i+1]-p if i<len(cl)-1 else p-cl[i-1]
if np.linalg.norm(tan)<1e-6: continue
tan/=np.linalg.norm(tan); side=np.cross(tan,up)
if np.linalg.norm(side)<1e-6: continue
side/=np.linalg.norm(side)
p0=np.array([p[0],p[1],hz[i]])
verts += [
p0+ sidetop_w/2, p0-sidetop_w/2,
p0-sidebot_w/2-upenv_h, p0+sidebot_w/2-upenv_h
]
if not verts: return None
V=np.asarray(verts); tri=[]; n=len(V)//4
for i in range(n-1):
a,b=i4,(i+1)*4
tri += [[a,b+1,a+1],[a,b,b+1],
[a+2,a+3,b+3],[a+2,b+3,b+2],
[a+1,b+2,a+2],[a+1,b+1,b+2],
[a+3,a,b],[a+3,b,b+3]]
mesh=o3d.geometry.TriangleMesh(
o3d.utility.Vector3dVector(V),
o3d.utility.Vector3iVector(np.asarray(tri,dtype=int))
)
mesh=mesh.filter_smooth_taubin(MESH_SMOOTH_ITER)
mesh.compute_vertex_normals()
return mesh
def generate(root):
cfg=json.load(open(FEATURE_FILE,'r',encoding='utf-8'))
std_top=cfg["standard_top_width"]
print(f"=== 阶段2:生成限界(标准顶宽 {std_top:.3f}m)===")
for dp,_,fs in os.walk(root):
for fn in fs:
if not fn.lower().endswith(('.ply','.pcd')): continue
path=os.path.join(dp,fn); jp=os.path.splitext(path)[0]+'.json'
print(f"\n→ {fn}")
pcd, ann = load_data(path,jp)
trk = extract_track_pcd(pcd,ann)
if trk is None or trk.is_empty():
pcd = remove_radius_outliers(pcd)
trk = auto_detect_track(pcd,cfg)
if trk is None:
print(" • 自动检测失败,跳过"); continue
print(" • 自动检测成功")
trk = purge_sparse_noise(trk)
cl,hz = build_centerline(trk)
env_h=(pcd.get_max_bound()[2]-pcd.get_min_bound()[2])/12.0
mesh=build_mesh(cl,hz,std_top,env_h)
if mesh is None:
print(" • mesh 构建失败"); continue
trk.paint_uniform_color([0.1,0.8,0.1])
mat=o3d.visualization.rendering.MaterialRecord()
mat.shader="defaultLitTransparency"; mat.base_color=[0.95,0.2,0.2,0.55]
o3d.visualization.draw([
{"name":"scene","geometry":pcd},
{"name":"track","geometry":trk},
{"name":"envelope","geometry":mesh,"material":mat}
], show_skybox=False, bg_color=(0.05,0.05,0.05,1))
if name=="main":
ap = argparse.ArgumentParser(description="自适应轨道板限界生成器")
ap.add_argument("root", help="包含 .ply/.pcd 的目录")
ap.add_argument("--learn", action="store_true", help="学习模板+分割器")
args = ap.parse_args()
if args.learn:
learn_features(args.root)
else:
generate(args.root),这个代码会学习轨道标注好的json和pkl,我会传输这个两个文件,请根据上述文件,去重新整理限界构建和异物侵界检测的代码,主要的思路还是利用语义分割分割轨道区域,然后利用相机内参和外参去投影到点云区域,因为点云可能因为设备安装角度问题左右点云数量不一样多,所以这个语义分割结果是作为他的轨道点云的范围,也就是说这个分割出的轨道就是限界的上顶面,然后你根据之前的要求构建8面体的立体即可,理解吗,同时利用后续的侵界判断去判别,并在终端打印结果,
理解。你现在的轨道区域不要再用“黑色阈值 + Hough”这种弱特征去找了,因为你已经有 PIDNet(mmseg) 能稳定分割出 steel rail(两条钢轨),这才是最可靠的“轨道证据”。
下面我给你一份重新整理后的单帧脚本,核心完全按你说的做:
steel rail mask(两条轨)T_lidar2cam:
s_overlap、s_ratioarea_ratio(l-v)(检测框截面与八边形交叠面积比)volume_ratio = s_ratio * area_ratiocenter_insidesigned_dist_center(中心点到限界边界的带符号距离:inside 为负)INTRUDE / NOT_INTRUDE + reason并且:每一步都保存图片,你能直接定位是哪一步错(分割错/边缘错/投影匹配错/外参错/限界方向错)。
intrusion_clearance_pidnet.py放在你的
fusion.py同目录运行(继续复用你现有 YOLO + PCDet + 融合)
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ intrusion_clearance_pidnet.py --------------------------------------------------------- 用 PIDNet(mmseg) 语义分割 steel rail -> 提取轨道边缘/中心线 -> 投影匹配到点云 -> 构建八边形立体限界 -> FUSED目标侵界判定 并保存全部中间过程图片。 运行示例: python intrusion_clearance_pidnet.py --frame_id 2025042116142702 你给的默认路径(可通过参数改): --seg_config /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py --seg_ckpt /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth 或者 --seg_ckpt .../last_checkpoint (脚本会自动读出真实 .pth) 输出目录: debug_intrusion_pidnet/<frame_id>/ 00_raw.jpg 01_seg_overlay.jpg 02_rail_mask.png 03_corridor_mask.png 04_edge_polylines.jpg 05_edge_masks.png 06_proj_all_points.jpg 07_proj_rail_points.jpg 08_proj_edge_points.jpg 09_bev_envelope.jpg 10_front_envelope.jpg 11_3d_scene.png (若 matplotlib 可用) intrusion_metrics.json intrusion_metrics.txt """ from __future__ import annotations import os import math import json import random import argparse from pathlib import Path from dataclasses import dataclass from typing import Dict, List, Tuple, Optional import numpy as np import cv2 # 复用你原来的融合代码 import fusion as F # ========================= # 可调参数(你可按数据微调) # ========================= # ROI(只用于约束边缘提取,不影响语义分割本身) ROI_ENABLE = True ROI_Y_TOP_RATIO = 0.30 ROI_Y_BOT_RATIO = 0.98 ROI_X_TOP_L_RATIO = 0.30 ROI_X_TOP_R_RATIO = 0.70 ROI_X_BOT_L_RATIO = 0.03 ROI_X_BOT_R_RATIO = 0.97 # segmentation class id:你的两类里 steel rail=1 RAIL_CLASS_ID_DEFAULT = 1 # mask 清理 MASK_CLOSE_K = 9 MASK_OPEN_K = 7 MASK_DILATE_PX = 5 # 给投影点“容错”,外参略偏时很关键 KEEP_TOPK_CC = 6 # 连通域最多保留多少个(避免假阳性) # 边缘匹配到点云(像素距离阈值) EDGE_MATCH_PX = 5.0 EDGE_LINE_THICK = 9 # 画边缘线mask厚度 # 中心线采样行数(越大越平滑) CENTERLINE_N_ROWS = 80 # 轨道点云(rail mask 内)最少点数 MIN_RAIL_PTS = 200 # ========== 八边形限界参数(米)========== # 你可以之后把这些换成你“附录限界坐标”对应的真实尺寸 # 这里给一套“上顶面由轨道宽度决定,往下扩到列车最大宽度”的合理默认 TRAIN_MAX_WIDTH_M = 2.5 # 车体最大宽度(你文档提到 2.3~2.5m) HANG_HEIGHT_M = 3.7 # 轨面到车体最下部(3.4~3.7m) SIDE_MARGIN_M = 0.35 # 横向安全裕度 TOP_MARGIN_M = 0.05 # 顶面额外裕度 HEIGHT_EXTRA_M = 0.30 # 竖向额外裕度 TOP_CHAMFER_H = 0.55 # 上倒角高度 BOT_CHAMFER_H = 0.55 # 下倒角高度 BOTTOM_WIDTH_RATIO = 0.85 # 底部相对最大宽度比例(保证“八边形”而不是六边形) # ========== 侵界判定阈值 ========== INTRUDE_VOL_RATIO_THR = 0.02 INTRUDE_AREA_RATIO_THR = 0.05 FORCE_CENTER_INSIDE_INTRUDE = True # debug 可视化 OUT_ROOT = "debug_intrusion_pidnet" os.makedirs(OUT_ROOT, exist_ok=True) # ========================= # 小工具:ROI mask # ========================= def make_roi_mask(h: int, w: int) -> np.ndarray: if not ROI_ENABLE: return np.ones((h, w), dtype=np.uint8) * 255 y_top = int(h * ROI_Y_TOP_RATIO) y_bot = int(h * ROI_Y_BOT_RATIO) x_tl = int(w * ROI_X_TOP_L_RATIO) x_tr = int(w * ROI_X_TOP_R_RATIO) x_bl = int(w * ROI_X_BOT_L_RATIO) x_br = int(w * ROI_X_BOT_R_RATIO) poly = np.array([[x_bl, y_bot], [x_br, y_bot], [x_tr, y_top], [x_tl, y_top]], dtype=np.int32) m = np.zeros((h, w), dtype=np.uint8) cv2.fillConvexPoly(m, poly, 255) return m def overlay_mask(img_bgr: np.ndarray, mask01: np.ndarray, color=(0, 0, 255), alpha=0.55) -> np.ndarray: vis = img_bgr.copy() m = mask01.astype(bool) if m.any(): vis[m] = (vis[m] * (1 - alpha) + np.array(color, dtype=np.float32) * alpha).astype(np.uint8) return vis def median_filter_1d(arr: np.ndarray, k: int = 7) -> np.ndarray: if arr.size == 0: return arr k = max(3, int(k) | 1) r = k // 2 out = arr.copy().astype(np.float32) for i in range(arr.size): lo = max(0, i - r) hi = min(arr.size, i + r + 1) out[i] = float(np.median(arr[lo:hi])) return out # ========================= # mmseg 推理(兼容 mmseg 0.x/1.x) # ========================= def resolve_checkpoint(ckpt_path: str) -> str: ckpt = ckpt_path if os.path.basename(ckpt_path) == "last_checkpoint": # 文件里通常是一行 /path/to/xxx.pth with open(ckpt_path, "r", encoding="utf-8") as f: line = f.read().strip() if line and os.path.isfile(line): ckpt = line return ckpt def init_mmseg_model(config_path: str, checkpoint_path: str, device: str): ckpt = resolve_checkpoint(checkpoint_path) if not os.path.isfile(config_path): raise FileNotFoundError(f"seg_config not found: {config_path}") if not os.path.isfile(ckpt): raise FileNotFoundError(f"seg_checkpoint not found: {ckpt}") # 优先 mmseg>=1.0 try: from mmseg.apis import init_model # type: ignore model = init_model(config_path, ckpt, device=device) return model except Exception: pass # 兼容 mmseg 0.x try: from mmseg.apis import init_segmentor # type: ignore model = init_segmentor(config_path, ckpt, device=device) return model except Exception as e: raise RuntimeError( "mmseg init failed. 请确认你在安装了 mmsegmentation 的环境里运行,并且 config/ckpt 路径正确。\n" f"raw error: {e}" ) def infer_rail_mask(model, img_bgr: np.ndarray) -> np.ndarray: """ 返回 pred label map: (H,W) int32 """ # mmseg>=1.0 try: from mmseg.apis import inference_model # type: ignore out = inference_model(model, img_bgr) pred = out.pred_sem_seg.data # tensor -> numpy if hasattr(pred, "cpu"): pred = pred.cpu().numpy() pred = np.array(pred) if pred.ndim == 3: pred = pred[0] return pred.astype(np.int32) except Exception: pass # mmseg 0.x try: from mmseg.apis import inference_segmentor # type: ignore out = inference_segmentor(model, img_bgr) pred = out[0] return np.array(pred, dtype=np.int32) except Exception as e: raise RuntimeError(f"mmseg inference failed: {e}") # ========================= # 从 rail mask 构造:走廊mask、左右边缘曲线、中心线曲线 # ========================= @dataclass class RailMaskInfo: rail_mask01: np.ndarray # (H,W) 0/1, steel rail corridor01: np.ndarray # (H,W) 0/1, fill between left&right per row left_poly: np.ndarray # (N,2) int (x,y) right_poly: np.ndarray # (N,2) mid_poly: np.ndarray # (N,2) left_edge_mask: np.ndarray # (H,W) 0/255 right_edge_mask: np.ndarray dbg: Dict def keep_topk_connected_components(mask01: np.ndarray, k: int) -> np.ndarray: """ 保留面积最大的 k 个连通域(rails 可能被切分成多段,所以 k 给大一点) """ m = (mask01 > 0).astype(np.uint8) n, lab, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8) if n <= 1: return m areas = stats[1:, cv2.CC_STAT_AREA] order = np.argsort(-areas) keep_ids = (order[:min(k, order.size)] + 1).tolist() out = np.zeros_like(m) for cid in keep_ids: out[lab == cid] = 1 return out def build_polylines_from_mask(rail01: np.ndarray, roi_mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict]: """ 按行扫描提取 left/right/mid (支持一定曲率) """ H, W = rail01.shape[:2] y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0 y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1) xs_left = [] xs_right = [] ys = [] for y in range(y_top, y_bot + 1): if roi_mask[y, :].max() == 0: continue row = rail01[y, :] & (roi_mask[y, :] > 0) idx = np.where(row > 0)[0] if idx.size < 6: continue xl = int(idx.min()) xr = int(idx.max()) if xr - xl < 10: continue xs_left.append(xl) xs_right.append(xr) ys.append(y) dbg = {"rows_valid": len(ys)} if len(ys) < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = np.array(xs_left, dtype=np.float32) xs_right = np.array(xs_right, dtype=np.float32) ys = np.array(ys, dtype=np.int32) widths = xs_right - xs_left med_w = float(np.median(widths)) dbg["median_width_px"] = med_w # 宽度异常行剔除(避免偶发假阳性把边缘拉飞) ok = (widths > 0.45 * med_w) & (widths < 1.60 * med_w) xs_left = xs_left[ok] xs_right = xs_right[ok] ys = ys[ok] dbg["rows_kept"] = int(ys.size) if ys.size < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = median_filter_1d(xs_left, 9) xs_right = median_filter_1d(xs_right, 9) xs_mid = 0.5 * (xs_left + xs_right) left_poly = np.stack([xs_left, ys.astype(np.float32)], axis=1).round().astype(np.int32) right_poly = np.stack([xs_right, ys.astype(np.float32)], axis=1).round().astype(np.int32) mid_poly = np.stack([xs_mid, ys.astype(np.float32)], axis=1).round().astype(np.int32) return left_poly, right_poly, mid_poly, dbg def build_masks_from_rail_pred(pred: np.ndarray, rail_id: int, img_h: int, img_w: int, roi_mask: np.ndarray) -> RailMaskInfo: # resize if needed if pred.shape[0] != img_h or pred.shape[1] != img_w: pred_rs = cv2.resize(pred.astype(np.int32), (img_w, img_h), interpolation=cv2.INTER_NEAREST) else: pred_rs = pred rail01 = (pred_rs == int(rail_id)).astype(np.uint8) # ROI 限制 if ROI_ENABLE: rail01 = (rail01 & (roi_mask > 0)).astype(np.uint8) # 形态学清理 kc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_CLOSE_K, MASK_CLOSE_K)) ko = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_OPEN_K, MASK_OPEN_K)) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_CLOSE, kc, iterations=2) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_OPEN, ko, iterations=1) # 保留较大连通域(rails 可能断裂,保留 topK) rail01 = keep_topk_connected_components(rail01, KEEP_TOPK_CC).astype(np.uint8) # corridor:每行用 min/max 填充(把中间黑色空隙也包含进来) H, W = rail01.shape[:2] corridor = np.zeros((H, W), dtype=np.uint8) y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0 y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1) for y in range(y_top, y_bot + 1): if ROI_ENABLE and roi_mask[y, :].max() == 0: continue idx = np.where(rail01[y, :] > 0)[0] if idx.size < 6: continue xl = int(idx.min()) xr = int(idx.max()) if xr - xl < 10: continue corridor[y, xl:xr + 1] = 1 # corridor 再做 close 填孔 corridor = cv2.morphologyEx(corridor, cv2.MORPH_CLOSE, kc, iterations=1) # 提取边缘曲线(left/right/mid) left_poly, right_poly, mid_poly, dbg = build_polylines_from_mask(rail01, roi_mask) # 构造边缘mask(用于 distanceTransform 匹配点云) left_edge_mask = np.zeros((H, W), dtype=np.uint8) right_edge_mask = np.zeros((H, W), dtype=np.uint8) if left_poly.shape[0] >= 10: cv2.polylines(left_edge_mask, [left_poly.reshape(-1, 1, 2)], isClosed=False, color=255, thickness=EDGE_LINE_THICK, lineType=cv2.LINE_AA) if right_poly.shape[0] >= 10: cv2.polylines(right_edge_mask, [right_poly.reshape(-1, 1, 2)], isClosed=False, color=255, thickness=EDGE_LINE_THICK, lineType=cv2.LINE_AA) return RailMaskInfo( rail_mask01=rail01, corridor01=corridor, left_poly=left_poly, right_poly=right_poly, mid_poly=mid_poly, left_edge_mask=left_edge_mask, right_edge_mask=right_edge_mask, dbg=dbg ) # ========================= # 投影:点云->图像,按mask筛点云 # ========================= def select_points_by_mask(points_xyz: np.ndarray, uv: np.ndarray, valid: np.ndarray, mask01: np.ndarray) -> np.ndarray: H, W = mask01.shape[:2] u = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return np.zeros((0, 3), dtype=np.float32) u2 = u[idx]; v2 = v[idx] keep = mask01[v2, u2] > 0 return points_xyz[idx[keep]].astype(np.float32) def match_points_near_edge(points_xyz: np.ndarray, uv: np.ndarray, valid: np.ndarray, edge_mask255: np.ndarray, max_px: float) -> np.ndarray: """ 通过 distanceTransform:挑投影点距离 edge<=max_px 的点云 """ H, W = edge_mask255.shape[:2] src = np.ones((H, W), dtype=np.uint8) * 255 src[edge_mask255 > 0] = 0 dist = cv2.distanceTransform(src, cv2.DIST_L2, 5) u = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return np.zeros((0, 3), dtype=np.float32) u2 = u[idx]; v2 = v[idx] d = dist[v2, u2] keep = d <= float(max_px) return points_xyz[idx[keep]].astype(np.float32) # ========================= # 几何:平面拟合 + 光线求交 # ========================= def fit_plane_svd(points_xyz: np.ndarray, iters: int = 2, keep_q: float = 0.90) -> Tuple[np.ndarray, np.ndarray]: """ 简单鲁棒平面拟合: - SVD 得法向 - 按到平面距离剔除 top(1-keep_q) 再迭代 返回:plane_point p0, plane_normal n(单位向量,n[2]>0) """ pts = points_xyz.astype(np.float64) if pts.shape[0] < 10: p0 = np.median(pts, axis=0) n = np.array([0, 0, 1.0], dtype=np.float64) return p0.astype(np.float32), n.astype(np.float32) for _ in range(max(1, iters)): p0 = np.median(pts, axis=0) X = pts - p0 _, _, vt = np.linalg.svd(X, full_matrices=False) n = vt[-1] n = n / (np.linalg.norm(n) + 1e-12) if n[2] < 0: n = -n # 过滤离群 d = np.abs((pts - p0) @ n.reshape(3, 1)).reshape(-1) thr = float(np.quantile(d, keep_q)) pts = pts[d <= thr] if pts.shape[0] < 30: break return p0.astype(np.float32), n.astype(np.float32) def undistort_pixels_to_normalized(uv: np.ndarray, calib: Dict) -> np.ndarray: """ uv: (N,2) 像素坐标 返回 normalized coords (N,2) in camera frame """ K = np.array(calib["camera_matrix"], dtype=np.float64) dist = np.array(calib.get("dist_coeffs", [0, 0, 0, 0, 0]), dtype=np.float64).reshape(-1) pts = uv.reshape(-1, 1, 2).astype(np.float64) und = cv2.undistortPoints(pts, K, dist) # -> normalized und = und.reshape(-1, 2) return und.astype(np.float32) def pixels_to_rays_in_lidar(uv: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: """ 把像素点转换为激光雷达坐标系下的光线: ray_origin: (3,) ray_dirs: (N,3) normalized """ # cam->lidar T = np.array(T_lidar2cam, dtype=np.float64) R = T[:3, :3] t = T[:3, 3] R_c2l = R.T o_lidar = (-R.T @ t).reshape(3) # normalized camera coords xy = undistort_pixels_to_normalized(uv, calib) if F.USE_DISTORTION else None if xy is None: K = np.array(calib["camera_matrix"], dtype=np.float64) fx, fy = K[0, 0], K[1, 1] cx, cy = K[0, 2], K[1, 2] x = (uv[:, 0] - cx) / fx y = (uv[:, 1] - cy) / fy xy = np.stack([x, y], axis=1).astype(np.float32) dirs_cam = np.concatenate([xy.astype(np.float64), np.ones((xy.shape[0], 1), dtype=np.float64)], axis=1) dirs_lidar = (R_c2l @ dirs_cam.T).T nrm = np.linalg.norm(dirs_lidar, axis=1, keepdims=True) + 1e-12 dirs_lidar = dirs_lidar / nrm return o_lidar.astype(np.float32), dirs_lidar.astype(np.float32) def intersect_rays_with_plane(ray_origin: np.ndarray, ray_dirs: np.ndarray, plane_p0: np.ndarray, plane_n: np.ndarray) -> np.ndarray: """ ray: p = o + t d plane: n·(p - p0) = 0 返回 (N,3),无效的设为 nan """ o = ray_origin.reshape(1, 3).astype(np.float64) d = ray_dirs.astype(np.float64) p0 = plane_p0.reshape(1, 3).astype(np.float64) n = plane_n.reshape(1, 3).astype(np.float64) denom = (d * n).sum(axis=1) # (N,) num = ((p0 - o) * n).sum(axis=1) t = np.full_like(num, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num[ok] / denom[ok] # 只保留前向 t[t <= 0] = np.nan P = o + d * t.reshape(-1, 1) return P.astype(np.float32) # ========================= # 轨道3D模型 + 八边形限界 # ========================= @dataclass class TrackModel: ok: bool origin: np.ndarray # (3,) t: np.ndarray # (3,) tangent n: np.ndarray # (3,) lateral up: np.ndarray # (3,) plane normal s_min: float s_max: float half_w_top: float poly_lv: np.ndarray # (8,2) in (l,v) where v=0 is top plane, v<0 downward dbg: Dict def pca_direction(points_xyz: np.ndarray, up: np.ndarray) -> np.ndarray: """ 取主方向,并投影到轨道面内(与 up 正交) """ pts = points_xyz.astype(np.float64) mu = pts.mean(axis=0) X = pts - mu _, _, vt = np.linalg.svd(X, full_matrices=False) t = vt[0] # 去掉 up 分量 upv = up.astype(np.float64) t = t - upv * (t @ upv) t = t / (np.linalg.norm(t) + 1e-12) if t[0] < 0: t = -t return t.astype(np.float32) def build_octagon(half_w_top: float) -> np.ndarray: """ 构建八边形截面 (l,v):v=0 顶面,v<0 向下 """ w_top = float(max(0.10, half_w_top + TOP_MARGIN_M)) w_mid = float(max(w_top + 0.10, 0.5 * TRAIN_MAX_WIDTH_M + SIDE_MARGIN_M)) w_bot = float(max(w_top, w_mid * BOTTOM_WIDTH_RATIO)) H = float(HANG_HEIGHT_M + HEIGHT_EXTRA_M) ht = float(min(TOP_CHAMFER_H, max(0.10, 0.25 * H))) hb = float(min(BOT_CHAMFER_H, max(0.10, 0.25 * H))) v0 = 0.0 v1 = -ht v2 = -(H - hb) v3 = -H poly = np.array([ [-w_top, v0], [ w_top, v0], [ w_mid, v1], [ w_mid, v2], [ w_bot, v3], [-w_bot, v3], [-w_mid, v2], [-w_mid, v1], ], dtype=np.float32) return poly def build_track_model(rail_info: RailMaskInfo, calib: Dict, T_lidar2cam: np.ndarray, rail_pts_xyz: np.ndarray, img_h: int, img_w: int) -> TrackModel: dbg: Dict = {} if rail_info.mid_poly.shape[0] < 20: return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32), np.array([0,1,0],dtype=np.float32), np.array([0,0,1],dtype=np.float32), 0.0, 1.0, 0.5, np.zeros((0,2),dtype=np.float32), {"err": "mid_poly too short"}) if rail_pts_xyz.shape[0] < MIN_RAIL_PTS: dbg["warn"] = f"rail_pts too few ({rail_pts_xyz.shape[0]}). plane may be unstable." # 1) 用 rail_pts 拟合轨道顶面平面 p0, up = fit_plane_svd(rail_pts_xyz, iters=2, keep_q=0.90) dbg["plane_p0"] = p0.tolist() dbg["plane_up"] = up.tolist() # 2) 用中心线像素与平面求交得到中心线3D点(抗左右点云不均) # 取若干行采样点 mid = rail_info.mid_poly # 下采样到 CENTERLINE_N_ROWS if mid.shape[0] > CENTERLINE_N_ROWS: idx = np.linspace(0, mid.shape[0]-1, CENTERLINE_N_ROWS).astype(np.int32) mid_s = mid[idx] else: mid_s = mid uv_mid = mid_s[:, :2].astype(np.float32) o_lidar, d_lidar = pixels_to_rays_in_lidar(uv_mid, calib, T_lidar2cam) cl_world = intersect_rays_with_plane(o_lidar, d_lidar, p0, up) ok = np.isfinite(cl_world).all(axis=1) cl_world = cl_world[ok] dbg["centerline_pts"] = int(cl_world.shape[0]) if cl_world.shape[0] < 10: return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32), np.array([0,1,0],dtype=np.float32), up, 0.0, 1.0, 0.5, np.zeros((0,2),dtype=np.float32), {"err": "centerline ray-plane intersection too few", **dbg}) # 3) 轨道方向 t:对中心线3D点做 PCA,再投影到轨道面内 t = pca_direction(cl_world, up) # 横向 n n = np.cross(up, t) n = n / (np.linalg.norm(n) + 1e-12) # 4) origin:中心线点均值(本身在平面上) origin = cl_world.mean(axis=0).astype(np.float32) # 5) s range(沿 t 的投影范围) s = (cl_world - origin.reshape(1, 3)) @ t.reshape(3, 1) s = s.reshape(-1) s_min = float(np.quantile(s, 0.02)) s_max = float(np.quantile(s, 0.98)) # 加一点 margin margin = max(1.5, 0.10 * (s_max - s_min)) s_min -= margin s_max += margin dbg["s_min"] = s_min dbg["s_max"] = s_max # 6) 用左右边缘像素求交 -> 得到半宽(更稳,不依赖点云左右对称) left = rail_info.left_poly right = rail_info.right_poly if left.shape[0] > 10 and right.shape[0] > 10: # 对齐长度取同样数量 k = min(left.shape[0], right.shape[0], 80) idl = np.linspace(0, left.shape[0]-1, k).astype(np.int32) idr = np.linspace(0, right.shape[0]-1, k).astype(np.int32) uvL = left[idl, :2].astype(np.float32) uvR = right[idr, :2].astype(np.float32) oL, dL = pixels_to_rays_in_lidar(uvL, calib, T_lidar2cam) oR, dR = pixels_to_rays_in_lidar(uvR, calib, T_lidar2cam) pL = intersect_rays_with_plane(oL, dL, p0, up) pR = intersect_rays_with_plane(oR, dR, p0, up) okL = np.isfinite(pL).all(axis=1) okR = np.isfinite(pR).all(axis=1) ok2 = okL & okR pL = pL[ok2] pR = pR[ok2] if pL.shape[0] >= 10: lL = (pL - origin.reshape(1, 3)) @ n.reshape(3, 1) lR = (pR - origin.reshape(1, 3)) @ n.reshape(3, 1) w = (lR - lL).reshape(-1) half_w_top = float(np.median(np.abs(w) * 0.5)) else: half_w_top = 0.55 dbg["warn_half_w"] = "edge ray-plane too few, fallback=0.55" else: half_w_top = 0.55 dbg["warn_half_w"] = "edge poly too short, fallback=0.55" dbg["half_w_top"] = half_w_top poly_lv = build_octagon(half_w_top) return TrackModel(True, origin, t.astype(np.float32), n.astype(np.float32), up.astype(np.float32), s_min, s_max, float(half_w_top), poly_lv, dbg) def world_to_track_coords(track: TrackModel, pts_xyz: np.ndarray) -> np.ndarray: """ world -> (s,l,v) """ rel = pts_xyz - track.origin.reshape(1, 3) s = rel @ track.t.reshape(3, 1) l = rel @ track.n.reshape(3, 1) v = rel @ track.up.reshape(3, 1) return np.concatenate([s, l, v], axis=1).astype(np.float32) # ========================= # 凸多边形工具(l-v 截面交叠) # ========================= def _cross(o: np.ndarray, a: np.ndarray, b: np.ndarray) -> float: return float((a[0]-o[0])*(b[1]-o[1]) - (a[1]-o[1])*(b[0]-o[0])) def convex_hull(points: np.ndarray) -> np.ndarray: if points is None or len(points) == 0: return np.zeros((0, 2), dtype=np.float32) pts = np.unique(points.astype(np.float64), axis=0) if pts.shape[0] < 3: return pts.astype(np.float32) pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))] lower = [] for p in pts: while len(lower) >= 2 and _cross(np.array(lower[-2]), np.array(lower[-1]), p) <= 0: lower.pop() lower.append(p) upper = [] for p in pts[::-1]: while len(upper) >= 2 and _cross(np.array(upper[-2]), np.array(upper[-1]), p) <= 0: upper.pop() upper.append(p) hull = np.array(lower[:-1] + upper[:-1], dtype=np.float64) return hull.astype(np.float32) def polygon_area(poly: np.ndarray) -> float: if poly is None or poly.shape[0] < 3: return 0.0 x = poly[:, 0]; y = poly[:, 1] return float(0.5 * abs(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))) def ensure_ccw(poly: np.ndarray) -> np.ndarray: if poly is None or poly.shape[0] < 3: return poly x = poly[:, 0]; y = poly[:, 1] signed = float(0.5 * (np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))) return poly[::-1].copy() if signed < 0 else poly def inside_half_plane(p: np.ndarray, a: np.ndarray, b: np.ndarray) -> bool: return _cross(a, b, p) >= -1e-9 def line_intersection(p1: np.ndarray, p2: np.ndarray, a: np.ndarray, b: np.ndarray) -> np.ndarray: x1, y1 = p1; x2, y2 = p2 x3, y3 = a; x4, y4 = b den = (x1-x2)*(y3-y4) - (y1-y2)*(x3-x4) if abs(den) < 1e-12: return p2.copy() px = ((x1*y2 - y1*x2)*(x3-x4) - (x1-x2)*(x3*y4 - y3*x4)) / den py = ((x1*y2 - y1*x2)*(y3-y4) - (y1-y2)*(x3*y4 - y3*x4)) / den return np.array([px, py], dtype=np.float32) def convex_polygon_intersection(subject: np.ndarray, clip: np.ndarray) -> np.ndarray: if subject is None or subject.shape[0] < 3: return np.zeros((0, 2), dtype=np.float32) if clip is None or clip.shape[0] < 3: return np.zeros((0, 2), dtype=np.float32) subj = ensure_ccw(subject).astype(np.float32) clp = ensure_ccw(clip).astype(np.float32) out = subj for i in range(clp.shape[0]): a = clp[i] b = clp[(i+1) % clp.shape[0]] inp = out if inp.shape[0] == 0: break out_list = [] for j in range(inp.shape[0]): p = inp[j] q = inp[(j+1) % inp.shape[0]] pin = inside_half_plane(p, a, b) qin = inside_half_plane(q, a, b) if qin: if not pin: out_list.append(line_intersection(p, q, a, b)) out_list.append(q.copy()) elif pin: out_list.append(line_intersection(p, q, a, b)) out = np.array(out_list, dtype=np.float32) return out def point_in_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> bool: poly = ensure_ccw(poly) for i in range(poly.shape[0]): a = poly[i]; b = poly[(i+1) % poly.shape[0]] if _cross(a, b, pt) < -1e-8: return False return True def point_to_segment_distance(pt: np.ndarray, a: np.ndarray, b: np.ndarray) -> float: ax, ay = a; bx, by = b; px, py = pt vx, vy = bx-ax, by-ay wx, wy = px-ax, py-ay c1 = vx*wx + vy*wy if c1 <= 0: return float(math.hypot(px-ax, py-ay)) c2 = vx*vx + vy*vy if c2 <= c1: return float(math.hypot(px-bx, py-by)) t = c1 / (c2 + 1e-12) projx = ax + t*vx projy = ay + t*vy return float(math.hypot(px-projx, py-projy)) def signed_distance_to_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> float: poly = ensure_ccw(poly) dmin = float("inf") for i in range(poly.shape[0]): a = poly[i]; b = poly[(i+1) % poly.shape[0]] dmin = min(dmin, point_to_segment_distance(pt, a, b)) inside = point_in_convex_polygon(pt, poly) return -dmin if inside else dmin # ========================= # 侵界检测 # ========================= @dataclass class IntrusionMetrics: cls: str score: float intrude: bool reason: str s_overlap: float s_ratio: float area_ratio: float volume_ratio: float center_inside: bool signed_dist_center: float def intrusion_for_det(track: TrackModel, det: F.Det3D) -> IntrusionMetrics: box7 = det.box7.astype(np.float32) corners = F.boxes3d_to_corners(box7.reshape(1, 7))[0] # (8,3) slv = world_to_track_coords(track, corners) # (8,3) s_vals = slv[:, 0] lv = slv[:, 1:3] det_s_min = float(np.min(s_vals)) det_s_max = float(np.max(s_vals)) det_s_len = max(1e-6, det_s_max - det_s_min) s0 = max(det_s_min, track.s_min) s1 = min(det_s_max, track.s_max) s_overlap = max(0.0, s1 - s0) s_ratio = float(s_overlap / det_s_len) det_poly = convex_hull(lv) det_area = polygon_area(det_poly) clear_poly = ensure_ccw(track.poly_lv.copy()) inter = convex_polygon_intersection(det_poly, clear_poly) if det_area > 1e-9 else np.zeros((0, 2), np.float32) inter_area = polygon_area(inter) area_ratio = float(inter_area / (det_area + 1e-9)) if det_area > 1e-9 else 0.0 volume_ratio = float(area_ratio * s_ratio) center = box7[:3].reshape(1, 3) c_slv = world_to_track_coords(track, center)[0] c_lv = c_slv[1:3] center_inside = (track.s_min <= float(c_slv[0]) <= track.s_max) and point_in_convex_polygon(c_lv, clear_poly) signed_dist = float(signed_distance_to_convex_polygon(c_lv, clear_poly)) # 判定逻辑(会打印/落盘) if s_overlap <= 1e-6: intrude = False reason = "no_s_overlap" else: if FORCE_CENTER_INSIDE_INTRUDE and center_inside: intrude = True reason = "center_inside" elif volume_ratio >= INTRUDE_VOL_RATIO_THR: intrude = True reason = f"volume_ratio>={INTRUDE_VOL_RATIO_THR:.2f}" elif (area_ratio >= INTRUDE_AREA_RATIO_THR) and (s_ratio >= 0.25): intrude = True reason = f"area_ratio>={INTRUDE_AREA_RATIO_THR:.2f}&s_ratio>=0.25" else: intrude = False reason = "below_thresholds" return IntrusionMetrics( cls=det.cls_name, score=float(det.score), intrude=bool(intrude), reason=reason, s_overlap=float(s_overlap), s_ratio=float(s_ratio), area_ratio=float(area_ratio), volume_ratio=float(volume_ratio), center_inside=bool(center_inside), signed_dist_center=float(signed_dist) ) # ========================= # 可视化:保存中间过程 # ========================= def save_edge_debug_images(out_dir: str, img: np.ndarray, roi_mask: np.ndarray, rail: RailMaskInfo): # 01 segmentation overlay vis = overlay_mask(img, rail.rail_mask01, color=(0, 0, 255), alpha=0.55) if ROI_ENABLE: cnts, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) cv2.drawContours(vis, cnts, -1, (0, 255, 255), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "01_seg_overlay.jpg"), vis) # masks cv2.imwrite(os.path.join(out_dir, "02_rail_mask.png"), (rail.rail_mask01 * 255).astype(np.uint8)) cv2.imwrite(os.path.join(out_dir, "03_corridor_mask.png"), (rail.corridor01 * 255).astype(np.uint8)) # polylines overlay poly_vis = img.copy() if rail.left_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.left_poly.reshape(-1, 1, 2)], False, (255, 0, 0), 3, cv2.LINE_AA) if rail.right_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.right_poly.reshape(-1, 1, 2)], False, (0, 0, 255), 3, cv2.LINE_AA) if rail.mid_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.mid_poly.reshape(-1, 1, 2)], False, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "04_edge_polylines.jpg"), poly_vis) # edge masks edge_rgb = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) edge_rgb[rail.left_edge_mask > 0] = (255, 0, 0) edge_rgb[rail.right_edge_mask > 0] = (0, 0, 255) cv2.imwrite(os.path.join(out_dir, "05_edge_masks.png"), edge_rgb) def draw_projected_points(img: np.ndarray, uv: np.ndarray, valid: np.ndarray, color, step: int = 8): H, W = img.shape[:2] u = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return idx = idx[::max(1, step)] for i in idx: cv2.circle(img, (int(u[i]), int(v[i])), 1, color, -1, cv2.LINE_AA) def render_bev_density(points_xyz: np.ndarray, out_path: str, track: Optional[TrackModel] = None, dets: Optional[List[F.Det3D]] = None, res: float = 0.06): pts = points_xyz if pts.shape[0] > 250000: idx = np.random.choice(pts.shape[0], 250000, replace=False) pts = pts[idx] x = pts[:, 0]; y = pts[:, 1] x_min = float(np.quantile(x, 0.02)); x_max = float(np.quantile(x, 0.98)) y_min = float(np.quantile(y, 0.02)); y_max = float(np.quantile(y, 0.98)) margin = 3.0 x_min -= margin; x_max += margin y_min -= margin; y_max += margin H = int(max(480, math.ceil((x_max - x_min) / res))) W = int(max(480, math.ceil((y_max - y_min) / res))) rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H-1) cc = np.clip(cc, 0, W-1) idx = rr * W + cc cnt = np.bincount(idx, minlength=H*W).reshape(H, W).astype(np.float32) img = np.log1p(cnt) if img.max() > 0: img = img / img.max() img = (img**0.55 * 255).astype(np.uint8) bev = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) def xy_to_pix(xy): xx, yy = float(xy[0]), float(xy[1]) r = (x_max - xx) / res c = (yy - y_min) / res return int(round(c)), int(round(r)) # overlay envelope wireframe (XY) if track is not None and track.ok: # 取一些 s 切片画八边形外轮廓 ss = np.linspace(track.s_min, track.s_max, 14).astype(np.float32) for s in ss: base = track.origin + track.t * float(s) poly_xy = [] for l, v in track.poly_lv: p = base + track.n * float(l) + track.up * float(v) poly_xy.append(p[:2]) poly_xy = np.array(poly_xy, dtype=np.float32) poly_pix = np.array([xy_to_pix(p) for p in poly_xy], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(bev, [poly_pix], True, (0, 255, 255), 1, cv2.LINE_AA) # det footprint if dets: for d in dets: corners = F.boxes3d_to_corners(d.box7.reshape(1, 7).astype(np.float32))[0] xy = corners[:4, :2] pix = np.array([xy_to_pix(p) for p in xy], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(bev, [pix], True, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(out_path, bev) def render_front_xz_density(points_xyz: np.ndarray, out_path: str, track: Optional[TrackModel] = None, dets: Optional[List[F.Det3D]] = None, res: float = 0.06): pts = points_xyz if pts.shape[0] > 250000: idx = np.random.choice(pts.shape[0], 250000, replace=False) pts = pts[idx] x = pts[:, 0]; z = pts[:, 2] x_min = float(np.quantile(x, 0.02)); x_max = float(np.quantile(x, 0.98)) z_min = float(np.quantile(z, 0.02)); z_max = float(np.quantile(z, 0.98)) margin = 1.5 x_min -= margin; x_max += margin z_min -= margin; z_max += margin H = int(max(480, math.ceil((z_max - z_min) / res))) W = int(max(480, math.ceil((x_max - x_min) / res))) rr = ((z_max - pts[:, 2]) / res).astype(np.int32) cc = ((pts[:, 0] - x_min) / res).astype(np.int32) rr = np.clip(rr, 0, H-1) cc = np.clip(cc, 0, W-1) idx = rr * W + cc cnt = np.bincount(idx, minlength=H*W).reshape(H, W).astype(np.float32) img = np.log1p(cnt) if img.max() > 0: img = img / img.max() img = (img**0.55 * 255).astype(np.uint8) front = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) def xz_to_pix(xz): xx, zz = float(xz[0]), float(xz[1]) c = (xx - x_min) / res r = (z_max - zz) / res return int(round(c)), int(round(r)) # envelope wireframe (X-Z) if track is not None and track.ok: ss = np.linspace(track.s_min, track.s_max, 14).astype(np.float32) for s in ss: base = track.origin + track.t * float(s) poly_xz = [] for l, v in track.poly_lv: p = base + track.n * float(l) + track.up * float(v) poly_xz.append([p[0], p[2]]) poly_xz = np.array(poly_xz, dtype=np.float32) poly_pix = np.array([xz_to_pix(p) for p in poly_xz], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(front, [poly_pix], True, (0, 255, 255), 1, cv2.LINE_AA) # det boxes (XZ 用四角近似) if dets: for d in dets: corners = F.boxes3d_to_corners(d.box7.reshape(1, 7).astype(np.float32))[0] xz = corners[[0,1,2,3], :][:, [0,2]] pix = np.array([xz_to_pix(p) for p in xz], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(front, [pix], True, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(out_path, front) def render_3d_scene(points_xyz: np.ndarray, track: TrackModel, dets: List[F.Det3D], out_png: str): try: import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # noqa: F401 except Exception as e: print(f"[WARN] matplotlib not available, skip 3d render: {e}") return pts = points_xyz if pts.shape[0] > 20000: idx = np.random.choice(pts.shape[0], 20000, replace=False) pts = pts[idx] fig = plt.figure(figsize=(10, 8)) ax = fig.add_subplot(111, projection="3d") ax.scatter(pts[:,0], pts[:,1], pts[:,2], s=0.4) # envelope wireframe if track.ok: ss = np.linspace(track.s_min, track.s_max, 18) prev = None for s in ss: base = track.origin + track.t * float(s) V = [] for l, v in track.poly_lv: p = base + track.n*float(l) + track.up*float(v) V.append(p) V = np.array(V) # loop for i in range(8): p = V[i]; q = V[(i+1)%8] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) if prev is not None: for i in range(8): p = prev[i]; q = V[i] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) prev = V # det boxes edges = [(0,1),(1,2),(2,3),(3,0),(4,5),(5,6),(6,7),(7,4),(0,4),(1,5),(2,6),(3,7)] for d in dets: c = F.boxes3d_to_corners(d.box7.reshape(1,7).astype(np.float32))[0] for i,j in edges: p=c[i]; q=c[j] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) ax.set_xlabel("X"); ax.set_ylabel("Y"); ax.set_zlabel("Z") plt.tight_layout() fig.savefig(out_png, dpi=160) plt.close(fig) # ========================= # 主流程 # ========================= def parse_args(): ap = argparse.ArgumentParser() ap.add_argument("--frame_id", type=str, default="", help="指定帧号(不含扩展名),例如 2025042116142702") ap.add_argument("--seg_config", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py") ap.add_argument("--seg_ckpt", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth") ap.add_argument("--seg_device", type=str, default="cuda:0") ap.add_argument("--rail_id", type=int, default=RAIL_CLASS_ID_DEFAULT) ap.add_argument("--seed", type=int, default=42) ap.add_argument("--topk", type=int, default=20, help="侵界判定最多取多少个 fused det") return ap.parse_args() def main(): args = parse_args() random.seed(args.seed) np.random.seed(args.seed) # 1) init seg print("[INFO] Loading PIDNet(mmseg)...") seg_model = init_mmseg_model(args.seg_config, args.seg_ckpt, args.seg_device) # 2) load det models device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = F.load_models() idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # choose frame bin_files = sorted(list(Path(F.PCDET_POINTS_DIR).glob("*.bin"))) frame_ids = [p.stem for p in bin_files if F.find_image_for_frame(p.stem) is not None and p.stem in idx_map] if not frame_ids: print("[ERROR] no matched (img,bin) frames.") return fid = args.frame_id if args.frame_id else random.choice(frame_ids) if fid not in idx_map: print(f"[ERROR] frame_id {fid} not in dataset index.") return out_dir = os.path.join(OUT_ROOT, fid) os.makedirs(out_dir, exist_ok=True) # read image img_path = F.find_image_for_frame(fid) img = cv2.imread(img_path) if img is None: print(f"[ERROR] cannot read image: {img_path}") return H, W = img.shape[:2] cv2.imwrite(os.path.join(out_dir, "00_raw.jpg"), img) # roi mask roi_mask = make_roi_mask(H, W) # 3) segmentation inference pred = infer_rail_mask(seg_model, img) rail_info = build_masks_from_rail_pred(pred, args.rail_id, H, W, roi_mask) save_edge_debug_images(out_dir, img, roi_mask, rail_info) print(f"[SEG] rows_valid={rail_info.dbg.get('rows_valid')} rows_kept={rail_info.dbg.get('rows_kept')} median_width_px={rail_info.dbg.get('median_width_px')}") # 4) det inference yolo_dets = F.infer_yolo(yolo_model, img) _, raw_points, pcdet_dets = F.infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if raw_points is None or raw_points.shape[0] == 0: print("[ERROR] raw_points empty.") return # extrinsic choose T = F.get_extrinsic_matrix(F.CALIB, raw_points[:, :3], W, H) # fuse fused = F.fuse_frame(yolo_dets, pcdet_dets, W, H, F.CALIB, T, raw_points) fused = sorted(fused, key=lambda d: d.score, reverse=True)[:max(1, args.topk)] fused_keep = [d for d in fused if d.score >= 0.10] print(f"[DETS] yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused)} keep(score>=0.10)={len(fused_keep)}") # 5) project points to image pts_xyz = raw_points[:, :3].astype(np.float32) uv, valid = F.project_points_lidar_to_img(pts_xyz, F.CALIB, T, W, H, use_distortion=F.USE_DISTORTION) # debug: all points overlay vis_all = img.copy() draw_projected_points(vis_all, uv, valid, (200, 200, 200), step=12) vis_all = overlay_mask(vis_all, rail_info.rail_mask01, color=(0,0,255), alpha=0.35) cv2.imwrite(os.path.join(out_dir, "06_proj_all_points.jpg"), vis_all) # 6) select rail top points by rail mask (dilate for tolerance) rail_mask255 = (rail_info.rail_mask01 * 255).astype(np.uint8) if MASK_DILATE_PX > 0: kd = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_DILATE_PX, MASK_DILATE_PX)) rail_mask255 = cv2.dilate(rail_mask255, kd, iterations=1) rail_mask01_dil = (rail_mask255 > 0).astype(np.uint8) rail_pts = select_points_by_mask(pts_xyz, uv, valid, rail_mask01_dil) print(f"[RAIL_PTS] selected={rail_pts.shape[0]}") vis_rail = img.copy() draw_projected_points(vis_rail, uv, valid, (120, 120, 120), step=20) # rail points overlay(绿色) # 为了快,用 mask 反算 idx 再画 Hh, Ww = rail_mask01_dil.shape u = np.round(uv[:,0]).astype(np.int32) v = np.round(uv[:,1]).astype(np.int32) inside = valid & (u>=0)&(u<Ww)&(v>=0)&(v<Hh) idx = np.where(inside)[0] keep = rail_mask01_dil[v[idx], u[idx]] > 0 idx_keep = idx[keep] for i in idx_keep[::6]: cv2.circle(vis_rail, (int(u[i]), int(v[i])), 1, (0,255,0), -1, cv2.LINE_AA) # edges polylines if rail_info.left_poly.shape[0] > 0: cv2.polylines(vis_rail, [rail_info.left_poly.reshape(-1,1,2)], False, (255,0,0), 2, cv2.LINE_AA) if rail_info.right_poly.shape[0] > 0: cv2.polylines(vis_rail, [rail_info.right_poly.reshape(-1,1,2)], False, (0,0,255), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "07_proj_rail_points.jpg"), vis_rail) # 7) match edge points (left/right separately) left_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.left_edge_mask, EDGE_MATCH_PX) right_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.right_edge_mask, EDGE_MATCH_PX) print(f"[EDGE_PTS] left={left_edge_pts.shape[0]} right={right_edge_pts.shape[0]}") vis_edge = img.copy() draw_projected_points(vis_edge, uv, valid, (120, 120, 120), step=22) # 将边缘点画出来(蓝/红) # 这里为了速度,同样用“再次按 edge mask 采样 idx”方式画 def draw_edge_points(edge_mask255, color): src = np.ones((H, W), dtype=np.uint8) * 255 src[edge_mask255 > 0] = 0 dist = cv2.distanceTransform(src, cv2.DIST_L2, 5) uu = np.round(uv[:,0]).astype(np.int32) vv = np.round(uv[:,1]).astype(np.int32) ins = valid & (uu>=0)&(uu<W)&(vv>=0)&(vv<H) ii = np.where(ins)[0] d = dist[vv[ii], uu[ii]] kk = d <= EDGE_MATCH_PX jj = ii[kk] for j in jj[::4]: cv2.circle(vis_edge, (int(uu[j]), int(vv[j])), 1, color, -1, cv2.LINE_AA) draw_edge_points(rail_info.left_edge_mask, (255,0,0)) draw_edge_points(rail_info.right_edge_mask, (0,0,255)) if rail_info.mid_poly.shape[0] > 0: cv2.polylines(vis_edge, [rail_info.mid_poly.reshape(-1,1,2)], False, (0,255,0), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "08_proj_edge_points.jpg"), vis_edge) # 8) build track model + envelope track = build_track_model(rail_info, F.CALIB, T, rail_pts, H, W) if not track.ok: print("[ERROR] build_track_model failed:", track.dbg) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: f.write("build_track_model failed\n") f.write(json.dumps(track.dbg, ensure_ascii=False, indent=2)) return print("\n[TRACK_MODEL]") print(f" origin={track.origin.tolist()}") print(f" t={track.t.tolist()}") print(f" n={track.n.tolist()}") print(f" up={track.up.tolist()}") print(f" s_range=[{track.s_min:.2f}, {track.s_max:.2f}] half_w_top={track.half_w_top:.3f}") print(f" octagon(l,v):\n{track.poly_lv}") print(f" dbg={track.dbg}") # 9) intrusion metrics metrics: List[IntrusionMetrics] = [] for d in fused_keep: metrics.append(intrusion_for_det(track, d)) print("\n========== INTRUSION METRICS ==========") if not metrics: print("No fused detections (score>=0.10).") for i, m in enumerate(metrics): print(f"[{i}] cls={m.cls:14s} detScore={m.score:.3f} INTRUDE={m.intrude} ({m.reason})") print(f" s_overlap={m.s_overlap:.2f}m s_ratio={m.s_ratio:.3f}") print(f" area_ratio(l-v)={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}") print(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}m") # save metrics out_json = { "frame_id": fid, "seg_dbg": rail_info.dbg, "track_dbg": track.dbg, "rules": { "INTRUDE_VOL_RATIO_THR": INTRUDE_VOL_RATIO_THR, "INTRUDE_AREA_RATIO_THR": INTRUDE_AREA_RATIO_THR, "FORCE_CENTER_INSIDE_INTRUDE": FORCE_CENTER_INSIDE_INTRUDE, }, "dets": [m.__dict__ for m in metrics] } with open(os.path.join(out_dir, "intrusion_metrics.json"), "w", encoding="utf-8") as f: json.dump(out_json, f, ensure_ascii=False, indent=2) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: for i, m in enumerate(metrics): f.write(f"[{i}] cls={m.cls} score={m.score:.3f} intrude={m.intrude} reason={m.reason}\n") f.write(f" s_overlap={m.s_overlap:.3f} s_ratio={m.s_ratio:.3f}\n") f.write(f" area_ratio={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}\n") f.write(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}\n") # 10) bev/front debug with envelope render_bev_density(pts_xyz, os.path.join(out_dir, "09_bev_envelope.jpg"), track=track, dets=fused_keep, res=0.06) render_front_xz_density(pts_xyz, os.path.join(out_dir, "10_front_envelope.jpg"), track=track, dets=fused_keep, res=0.06) # 11) 3d scene (optional) render_3d_scene(pts_xyz, track, fused_keep, os.path.join(out_dir, "11_3d_scene.png")) print(f"\n[DONE] saved all debug files in: {out_dir}") if __name__ == "__main__": main()
运行后看目录:debug_intrusion_pidnet/<frame_id>/
MASK_DILATE_PX 需要增大容错对每个 FUSED 目标会打印类似:
s_overlap:检测框在轨道方向上与限界的重叠长度(米)s_ratio:s_overlap / det_len_sarea_ratio(l-v):检测框在“横向-竖向截面(l-v)”与八边形的交叠面积占比volume_ratio = s_ratio * area_ratio:综合侵入比例center_inside:中心点是否落进立体限界signed_dist_center:中心点到八边形边界的带符号距离(inside 为负)判定逻辑(写死在脚本里,也落盘在 json):
no_s_overlap → 不侵界center_inside=True 且 FORCE_CENTER_INSIDE_INTRUDE=True → 直接侵界volume_ratio >= INTRUDE_VOL_RATIO_THR → 侵界area_ratio >= INTRUDE_AREA_RATIO_THR 且 s_ratio>=0.25 → 侵界你可以把阈值改成更保守/更宽松。
你之前失败是因为“黑色阈值找轨道”导致 left/right line 都没找到,所以后面 edge 点云很少、左右都为 0。
这版用 PIDNet 的 rail mask,left/right/mid 来自 mask 本身,正常情况下会直接解决:
track2d extraction failed 不会再出现EDGE3D total=132 left=0 right=0 这种极端会大幅改善如果你跑完后 把 04_edge_polylines.jpg 和 08_proj_edge_points.jpg 的效果(是否贴合/是否偏移)描述一下,我可以直接告诉你应该调哪几个参数(最常见是 MASK_DILATE_PX、EDGE_MATCH_PX、ROI 梯形)。但你不需要再把 fusion.py 发我了,这份脚本已经把“分割→边缘→投影→八边形立体限界→侵界打印”串起来了。
代码报错如下:Traceback (most recent call last):
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1350, in <module>
main()
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1157, in main
seg_model = init_mmseg_model(args.seg_config, args.seg_ckpt, args.seg_device)
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 185, in init_mmseg_model
raise RuntimeError(
RuntimeError: mmseg init failed. 请确认你在安装了 mmsegmentation 的环境里运行,并且 config/ckpt 路径正确。
raw error: No module named 'mmseg'
是否需要像之前导入YOLO一样导入:def load_yolo_model(weights_path: str):
try:
from ultralytics import YOLO
except Exception:
sys.path.insert(0, ULTRALYTICS_REPO)
from ultralytics import YOLO
return YOLO(weights_path)
我现在给你提供mmseg的库和虚拟环境(mmseg3.0) jd@jd-x11dai-n:/media/jd/4997BB1603CFE2C4/lw/mmsegmentation$,请给出修改后的代码
你这个报错的根因很明确:当前运行 qinjiepanding.py 的 Python 解释器里没有安装 mmseg(所以 import mmseg 直接失败)。
像你 YOLO 那样简单 sys.path.insert() 只有在“当前解释器里 mmengine/mmcv/torch 等依赖也都齐全”时才可能跑通;但你已经把 mmseg 放在另一个虚拟环境 mmseg3.0 里了——最稳的做法是:
✅ 主程序(PCDet+YOLO+融合+侵界)继续在你现在环境跑
✅ 分割(mmseg/PIDNet)改为用 mmseg3.0 的 python 子进程跑,输出 mask(.npy),主程序再读回来用
这样不要求你把 PCDet 也装到 mmseg3.0 环境里,也不会互相污染依赖。
下面给你“直接可用”的修改版:你只需要把这几段代码粘进 qinjiepanding.py(或你现在的脚本)即可。
pythonimport sys import subprocess import textwrap
把下面整段代码加到你的脚本里(建议放在你原来的 init_mmseg_model 附近,并且以后不要再直接 import mmseg):
pythondef resolve_checkpoint(ckpt_path: str) -> str: """兼容 mmseg work_dir 的 last_checkpoint 文件(里面存的是实际 pth 路径)""" if os.path.basename(ckpt_path) == "last_checkpoint": with open(ckpt_path, "r", encoding="utf-8") as f: real = f.read().strip() if real and os.path.isfile(real): return real return ckpt_path def _write_mmseg_infer_helper(helper_path: str): """写一个独立的小脚本,专门在 mmseg3.0 虚拟环境里跑推理并输出 npy""" code = r''' import os, sys, argparse import numpy as np import cv2 def resolve_checkpoint(ckpt_path: str) -> str: if os.path.basename(ckpt_path) == "last_checkpoint": with open(ckpt_path, "r", encoding="utf-8") as f: real = f.read().strip() if real and os.path.isfile(real): return real return ckpt_path def main(): ap = argparse.ArgumentParser() ap.add_argument("--img", required=True) ap.add_argument("--config", required=True) ap.add_argument("--ckpt", required=True) ap.add_argument("--out", required=True) ap.add_argument("--device", default="cuda:0") ap.add_argument("--mmseg_repo", default="") args = ap.parse_args() if args.mmseg_repo and os.path.isdir(args.mmseg_repo): sys.path.insert(0, args.mmseg_repo) ckpt = resolve_checkpoint(args.ckpt) # ====== init model (兼容 mmseg 0.x / 1.x) ====== model = None try: from mmseg.apis import init_model model = init_model(args.config, ckpt, device=args.device) from mmseg.apis import inference_model use_new = True except Exception: use_new = False if model is None: try: from mmseg.apis import init_segmentor model = init_segmentor(args.config, ckpt, device=args.device) except Exception as e: raise RuntimeError(f"mmseg init failed in helper: {e}") img = cv2.imread(args.img) if img is None: raise RuntimeError(f"cannot read image: {args.img}") # ====== inference ====== if use_new: out = inference_model(model, img) pred = out.pred_sem_seg.data if hasattr(pred, "cpu"): pred = pred.cpu().numpy() pred = np.array(pred) if pred.ndim == 3: pred = pred[0] else: from mmseg.apis import inference_segmentor out = inference_segmentor(model, img) pred = np.array(out[0], dtype=np.int32) pred = pred.astype(np.int32) np.save(args.out, pred) print(f"[mmseg_helper] saved pred to {args.out} shape={pred.shape} dtype={pred.dtype}") if __name__ == "__main__": main() ''' with open(helper_path, "w", encoding="utf-8") as f: f.write(code) def infer_rail_mask_by_subprocess( img_path: str, out_npy: str, seg_config: str, seg_ckpt: str, seg_device: str, mmseg_python: str, mmseg_repo: str = "" ) -> np.ndarray: """ 用 mmseg3.0 的 python 子进程跑分割,返回 pred label map (H,W) int32 """ seg_ckpt = resolve_checkpoint(seg_ckpt) if not os.path.isfile(mmseg_python): raise FileNotFoundError(f"--mmseg_python not found: {mmseg_python}") if not os.path.isfile(seg_config): raise FileNotFoundError(f"--seg_config not found: {seg_config}") if not os.path.isfile(seg_ckpt): raise FileNotFoundError(f"--seg_ckpt not found: {seg_ckpt}") if not os.path.isfile(img_path): raise FileNotFoundError(f"image not found: {img_path}") helper_path = os.path.join(os.path.dirname(out_npy), "_mmseg_infer_helper.py") if not os.path.isfile(helper_path): _write_mmseg_infer_helper(helper_path) cmd = [ mmseg_python, helper_path, "--img", img_path, "--config", seg_config, "--ckpt", seg_ckpt, "--out", out_npy, "--device", seg_device, "--mmseg_repo", mmseg_repo ] print("[INFO] Run mmseg subprocess:", " ".join(cmd)) r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if r.returncode != 0: print("====== mmseg subprocess STDOUT ======") print(r.stdout) print("====== mmseg subprocess STDERR ======") print(r.stderr) raise RuntimeError("mmseg subprocess failed (see logs above).") pred = np.load(out_npy).astype(np.int32) return pred
在你的 parse_args() 里加:
pythonap.add_argument("--mmseg_python", type=str, default="/media/jd/4997BB1603CFE2CC/lw/mmseg3.0/bin/python", help="mmseg3.0 虚拟环境 python 路径(必须是可执行文件)") ap.add_argument("--mmseg_repo", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation", help="mmsegmentation repo 根目录(用于 sys.path 兜底,可空)")
⚠️ 你需要把
--mmseg_python默认值改成你机器上真实的 venv python 路径
通常是:/path/to/mmseg3.0/bin/python
init_mmseg_model(...) 调用替换成“子进程推理”你现在报错的位置是:
pythonseg_model = init_mmseg_model(args.seg_config, args.seg_ckpt, args.seg_device)
把它替换成:
python# 子进程跑 mmseg,返回 pred (H,W) label map pred = infer_rail_mask_by_subprocess( img_path=img_path, # 你的当前帧图片路径 out_npy=os.path.join(out_dir, "seg_pred.npy"), seg_config=args.seg_config, seg_ckpt=args.seg_ckpt, seg_device=args.seg_device, mmseg_python=args.mmseg_python, mmseg_repo=args.mmseg_repo )
然后你后面原来用 infer_rail_mask(seg_model, img) 的地方,直接用 pred 即可。
例如你之前写的是:
pythonpred = infer_rail_mask(seg_model, img) rail_info = build_masks_from_rail_pred(pred, ...)
现在保持后半句不变:
pythonrail_info = build_masks_from_rail_pred(pred, args.rail_id, H, W, roi_mask)
bashpython qinjiepanding.py --frame_id 2025042116142702 \ --seg_config /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py \ --seg_ckpt /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth \ --mmseg_python /media/jd/4997BB1603CFE2C4/lw/mmseg3.0/bin/python \ --mmseg_repo /media/jd/4997BB1603CFE2C4/lw/mmsegmentation
因为你的现状是:mmseg 安装在另一个虚拟环境。
你现在这个解释器里缺的是 mmseg,通常还会连带缺 mmengine / mmcv / mmrazor / mmdeploy 等依赖。
单纯把 repo 加进 sys.path 很容易继续报一串依赖缺失。
用我上面这个方案,mmseg 的依赖全部留在 mmseg3.0 环境里,主环境只负责读 mask,最干净、最稳。
如果你愿意,我也可以把你现有 qinjiepanding.py 里“分割那段 + 后续用 pred 生成轨道mask/边缘/投影点云/八边形/侵界打印”的接口名,按你脚本里实际函数名再帮你对齐一次(避免你手动改漏)。你只要把你当前脚本里这两段贴出来即可:
1)parse_args() 2)分割推理到 build_masks_from_rail_pred() 之间那一段。
按照你说的修改代码后,代码报错如下:#!/usr/bin/env python3
用 PIDNet(mmseg) 语义分割 steel rail -> 提取轨道边缘/中心线 -> 投影匹配到点云 -> 构建八边形立体限界 -> FUSED目标侵界判定
并保存全部中间过程图片。
运行示例:
python intrusion_clearance_pidnet.py --frame_id 2025042116142702
你给的默认路径(可通过参数改):
--seg_config /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py
--seg_ckpt /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth
或者 --seg_ckpt .../last_checkpoint (脚本会自动读出真实 .pth)
输出目录:
debug_intrusion_pidnet/<frame_id>/
00_raw.jpg
01_seg_overlay.jpg
02_rail_mask.png
03_corridor_mask.png
04_edge_polylines.jpg
05_edge_masks.png
06_proj_all_points.jpg
07_proj_rail_points.jpg
08_proj_edge_points.jpg
09_bev_envelope.jpg
10_front_envelope.jpg
11_3d_scene.png (若 matplotlib 可用)
intrusion_metrics.json
intrusion_metrics.txt
"""
from future import annotations
import os
import math
import json
import random
import argparse
from pathlib import Path
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional
import numpy as np
import cv2
import sys
import subprocess
import textwrap
import fusion as F
ROI_ENABLE = True
ROI_Y_TOP_RATIO = 0.30
ROI_Y_BOT_RATIO = 0.98
ROI_X_TOP_L_RATIO = 0.30
ROI_X_TOP_R_RATIO = 0.70
ROI_X_BOT_L_RATIO = 0.03
ROI_X_BOT_R_RATIO = 0.97
RAIL_CLASS_ID_DEFAULT = 1
MASK_CLOSE_K = 9
MASK_OPEN_K = 7
MASK_DILATE_PX = 5 # 给投影点“容错”,外参略偏时很关键
KEEP_TOPK_CC = 6 # 连通域最多保留多少个(避免假阳性)
EDGE_MATCH_PX = 5.0
EDGE_LINE_THICK = 9 # 画边缘线mask厚度
CENTERLINE_N_ROWS = 80
MIN_RAIL_PTS = 200
TRAIN_MAX_WIDTH_M = 2.5 # 车体最大宽度(你文档提到 2.32.5m)3.7m)
HANG_HEIGHT_M = 3.7 # 轨面到车体最下部(3.4
SIDE_MARGIN_M = 0.35 # 横向安全裕度
TOP_MARGIN_M = 0.05 # 顶面额外裕度
HEIGHT_EXTRA_M = 0.30 # 竖向额外裕度
TOP_CHAMFER_H = 0.55 # 上倒角高度
BOT_CHAMFER_H = 0.55 # 下倒角高度
BOTTOM_WIDTH_RATIO = 0.85 # 底部相对最大宽度比例(保证“八边形”而不是六边形)
INTRUDE_VOL_RATIO_THR = 0.02
INTRUDE_AREA_RATIO_THR = 0.05
FORCE_CENTER_INSIDE_INTRUDE = True
OUT_ROOT = "debug_intrusion_pidnet"
os.makedirs(OUT_ROOT, exist_ok=True)
def make_roi_mask(h: int, w: int) -> np.ndarray:
if not ROI_ENABLE:
return np.ones((h, w), dtype=np.uint8) * 255
y_top = int(h * ROI_Y_TOP_RATIO)
y_bot = int(h * ROI_Y_BOT_RATIO)
x_tl = int(w * ROI_X_TOP_L_RATIO)
x_tr = int(w * ROI_X_TOP_R_RATIO)
x_bl = int(w * ROI_X_BOT_L_RATIO)
x_br = int(w * ROI_X_BOT_R_RATIO)
poly = np.array([[x_bl, y_bot],
[x_br, y_bot],
[x_tr, y_top],
[x_tl, y_top]], dtype=np.int32)
m = np.zeros((h, w), dtype=np.uint8)
cv2.fillConvexPoly(m, poly, 255)
return m
def overlay_mask(img_bgr: np.ndarray, mask01: np.ndarray, color=(0, 0, 255), alpha=0.55) -> np.ndarray:
vis = img_bgr.copy()
m = mask01.astype(bool)
if m.any():
vis[m] = (vis[m] * (1 - alpha) + np.array(color, dtype=np.float32) * alpha).astype(np.uint8)
return vis
def median_filter_1d(arr: np.ndarray, k: int = 7) -> np.ndarray:
if arr.size == 0:
return arr
k = max(3, int(k) | 1)
r = k // 2
out = arr.copy().astype(np.float32)
for i in range(arr.size):
lo = max(0, i - r)
hi = min(arr.size, i + r + 1)
out[i] = float(np.median(arr[lo:hi]))
return out
def resolve_checkpoint(ckpt_path: str) -> str:
ckpt = ckpt_path
if os.path.basename(ckpt_path) == "last_checkpoint":
# 文件里通常是一行 /path/to/xxx.pth
with open(ckpt_path, "r", encoding="utf-8") as f:
line = f.read().strip()
if line and os.path.isfile(line):
ckpt = line
return ckpt
def resolve_checkpoint(ckpt_path: str) -> str:
"""兼容 mmseg work_dir 的 last_checkpoint 文件(里面存的是实际 pth 路径)"""
if os.path.basename(ckpt_path) == "last_checkpoint":
with open(ckpt_path, "r", encoding="utf-8") as f:
real = f.read().strip()
if real and os.path.isfile(real):
return real
return ckpt_path
def _write_mmseg_infer_helper(helper_path: str):
"""写一个独立的小脚本,专门在 mmseg3.0 虚拟环境里跑推理并输出 npy"""
code = r'''
import os, sys, argparse
import numpy as np
import cv2
def resolve_checkpoint(ckpt_path: str) -> str:
if os.path.basename(ckpt_path) == "last_checkpoint":
with open(ckpt_path, "r", encoding="utf-8") as f:
real = f.read().strip()
if real and os.path.isfile(real):
return real
return ckpt_path
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--img", required=True)
ap.add_argument("--config", required=True)
ap.add_argument("--ckpt", required=True)
ap.add_argument("--out", required=True)
ap.add_argument("--device", default="cuda:0")
ap.add_argument("--mmseg_repo", default="")
args = ap.parse_args()
textif args.mmseg_repo and os.path.isdir(args.mmseg_repo): sys.path.insert(0, args.mmseg_repo) ckpt = resolve_checkpoint(args.ckpt) # ====== init model (兼容 mmseg 0.x / 1.x) ====== model = None try: from mmseg.apis import init_model model = init_model(args.config, ckpt, device=args.device) from mmseg.apis import inference_model use_new = True except Exception: use_new = False if model is None: try: from mmseg.apis import init_segmentor model = init_segmentor(args.config, ckpt, device=args.device) except Exception as e: raise RuntimeError(f"mmseg init failed in helper: {e}") img = cv2.imread(args.img) if img is None: raise RuntimeError(f"cannot read image: {args.img}") # ====== inference ====== if use_new: out = inference_model(model, img) pred = out.pred_sem_seg.data if hasattr(pred, "cpu"): pred = pred.cpu().numpy() pred = np.array(pred) if pred.ndim == 3: pred = pred[0] else: from mmseg.apis import inference_segmentor out = inference_segmentor(model, img) pred = np.array(out[0], dtype=np.int32) pred = pred.astype(np.int32) np.save(args.out, pred) print(f"[mmseg_helper] saved pred to {args.out} shape={pred.shape} dtype={pred.dtype}")
if name == "main":
main()
'''
with open(helper_path, "w", encoding="utf-8") as f:
f.write(code)
def infer_rail_mask_by_subprocess(
img_path: str,
out_npy: str,
seg_config: str,
seg_ckpt: str,
seg_device: str,
mmseg_python: str,
mmseg_repo: str = ""
) -> np.ndarray:
"""
用 mmseg3.0 的 python 子进程跑分割,返回 pred label map (H,W) int32
"""
seg_ckpt = resolve_checkpoint(seg_ckpt)
textif not os.path.isfile(mmseg_python): raise FileNotFoundError(f"--mmseg_python not found: {mmseg_python}") if not os.path.isfile(seg_config): raise FileNotFoundError(f"--seg_config not found: {seg_config}") if not os.path.isfile(seg_ckpt): raise FileNotFoundError(f"--seg_ckpt not found: {seg_ckpt}") if not os.path.isfile(img_path): raise FileNotFoundError(f"image not found: {img_path}") helper_path = os.path.join(os.path.dirname(out_npy), "_mmseg_infer_helper.py") if not os.path.isfile(helper_path): _write_mmseg_infer_helper(helper_path) cmd = [ mmseg_python, helper_path, "--img", img_path, "--config", seg_config, "--ckpt", seg_ckpt, "--out", out_npy, "--device", seg_device, "--mmseg_repo", mmseg_repo ] print("[INFO] Run mmseg subprocess:", " ".join(cmd)) r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if r.returncode != 0: print("====== mmseg subprocess STDOUT ======") print(r.stdout) print("====== mmseg subprocess STDERR ======") print(r.stderr) raise RuntimeError("mmseg subprocess failed (see logs above).") pred = np.load(out_npy).astype(np.int32) return pred
def init_mmseg_model(config_path: str, checkpoint_path: str, device: str):
ckpt = resolve_checkpoint(checkpoint_path)
if not os.path.isfile(config_path):
raise FileNotFoundError(f"seg_config not found: {config_path}")
if not os.path.isfile(ckpt):
raise FileNotFoundError(f"seg_checkpoint not found: {ckpt}")
text# 优先 mmseg>=1.0 try: from mmseg.apis import init_model # type: ignore model = init_model(config_path, ckpt, device=device) return model except Exception: pass # 兼容 mmseg 0.x try: from mmseg.apis import init_segmentor # type: ignore model = init_segmentor(config_path, ckpt, device=device) return model except Exception as e: raise RuntimeError( "mmseg init failed. 请确认你在安装了 mmsegmentation 的环境里运行,并且 config/ckpt 路径正确。\n" f"raw error: {e}" )
def infer_rail_mask(model, img_bgr: np.ndarray) -> np.ndarray:
"""
返回 pred label map: (H,W) int32
"""
# mmseg>=1.0
try:
from mmseg.apis import inference_model # type: ignore
out = inference_model(model, img_bgr)
pred = out.pred_sem_seg.data
# tensor -> numpy
if hasattr(pred, "cpu"):
pred = pred.cpu().numpy()
pred = np.array(pred)
if pred.ndim == 3:
pred = pred[0]
return pred.astype(np.int32)
except Exception:
pass
text# mmseg 0.x try: from mmseg.apis import inference_segmentor # type: ignore out = inference_segmentor(model, img_bgr) pred = out[0] return np.array(pred, dtype=np.int32) except Exception as e: raise RuntimeError(f"mmseg inference failed: {e}")
@dataclass
class RailMaskInfo:
rail_mask01: np.ndarray # (H,W) 0/1, steel rail
corridor01: np.ndarray # (H,W) 0/1, fill between left&right per row
left_poly: np.ndarray # (N,2) int (x,y)
right_poly: np.ndarray # (N,2)
mid_poly: np.ndarray # (N,2)
left_edge_mask: np.ndarray # (H,W) 0/255
right_edge_mask: np.ndarray
dbg: Dict
def keep_topk_connected_components(mask01: np.ndarray, k: int) -> np.ndarray:
"""
保留面积最大的 k 个连通域(rails 可能被切分成多段,所以 k 给大一点)
"""
m = (mask01 > 0).astype(np.uint8)
n, lab, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
if n <= 1:
return m
areas = stats[1:, cv2.CC_STAT_AREA]
order = np.argsort(-areas)
keep_ids = (order[:min(k, order.size)] + 1).tolist()
out = np.zeros_like(m)
for cid in keep_ids:
out[lab == cid] = 1
return out
def build_polylines_from_mask(rail01: np.ndarray, roi_mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict]:
"""
按行扫描提取 left/right/mid (支持一定曲率)
"""
H, W = rail01.shape[:2]
y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0
y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1)
textxs_left = [] xs_right = [] ys = [] for y in range(y_top, y_bot + 1): if roi_mask[y, :].max() == 0: continue row = rail01[y, :] & (roi_mask[y, :] > 0) idx = np.where(row > 0)[0] if idx.size < 6: continue xl = int(idx.min()) xr = int(idx.max()) if xr - xl < 10: continue xs_left.append(xl) xs_right.append(xr) ys.append(y) dbg = {"rows_valid": len(ys)} if len(ys) < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = np.array(xs_left, dtype=np.float32) xs_right = np.array(xs_right, dtype=np.float32) ys = np.array(ys, dtype=np.int32) widths = xs_right - xs_left med_w = float(np.median(widths)) dbg["median_width_px"] = med_w # 宽度异常行剔除(避免偶发假阳性把边缘拉飞) ok = (widths > 0.45 * med_w) & (widths < 1.60 * med_w) xs_left = xs_left[ok] xs_right = xs_right[ok] ys = ys[ok] dbg["rows_kept"] = int(ys.size) if ys.size < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = median_filter_1d(xs_left, 9) xs_right = median_filter_1d(xs_right, 9) xs_mid = 0.5 * (xs_left + xs_right) left_poly = np.stack([xs_left, ys.astype(np.float32)], axis=1).round().astype(np.int32) right_poly = np.stack([xs_right, ys.astype(np.float32)], axis=1).round().astype(np.int32) mid_poly = np.stack([xs_mid, ys.astype(np.float32)], axis=1).round().astype(np.int32) return left_poly, right_poly, mid_poly, dbg
def build_masks_from_rail_pred(pred: np.ndarray, rail_id: int, img_h: int, img_w: int, roi_mask: np.ndarray) -> RailMaskInfo:
# resize if needed
if pred.shape[0] != img_h or pred.shape[1] != img_w:
pred_rs = cv2.resize(pred.astype(np.int32), (img_w, img_h), interpolation=cv2.INTER_NEAREST)
else:
pred_rs = pred
textrail01 = (pred_rs == int(rail_id)).astype(np.uint8) # ROI 限制 if ROI_ENABLE: rail01 = (rail01 & (roi_mask > 0)).astype(np.uint8) # 形态学清理 kc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_CLOSE_K, MASK_CLOSE_K)) ko = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_OPEN_K, MASK_OPEN_K)) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_CLOSE, kc, iterations=2) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_OPEN, ko, iterations=1) # 保留较大连通域(rails 可能断裂,保留 topK) rail01 = keep_topk_connected_components(rail01, KEEP_TOPK_CC).astype(np.uint8) # corridor:每行用 min/max 填充(把中间黑色空隙也包含进来) H, W = rail01.shape[:2] corridor = np.zeros((H, W), dtype=np.uint8) y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0 y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1) for y in range(y_top, y_bot + 1): if ROI_ENABLE and roi_mask[y, :].max() == 0: continue idx = np.where(rail01[y, :] > 0)[0] if idx.size < 6: continue xl = int(idx.min()) xr = int(idx.max()) if xr - xl < 10: continue corridor[y, xl:xr + 1] = 1 # corridor 再做 close 填孔 corridor = cv2.morphologyEx(corridor, cv2.MORPH_CLOSE, kc, iterations=1) # 提取边缘曲线(left/right/mid) left_poly, right_poly, mid_poly, dbg = build_polylines_from_mask(rail01, roi_mask) # 构造边缘mask(用于 distanceTransform 匹配点云) left_edge_mask = np.zeros((H, W), dtype=np.uint8) right_edge_mask = np.zeros((H, W), dtype=np.uint8) if left_poly.shape[0] >= 10: cv2.polylines(left_edge_mask, [left_poly.reshape(-1, 1, 2)], isClosed=False, color=255, thickness=EDGE_LINE_THICK, lineType=cv2.LINE_AA) if right_poly.shape[0] >= 10: cv2.polylines(right_edge_mask, [right_poly.reshape(-1, 1, 2)], isClosed=False, color=255, thickness=EDGE_LINE_THICK, lineType=cv2.LINE_AA) return RailMaskInfo( rail_mask01=rail01, corridor01=corridor, left_poly=left_poly, right_poly=right_poly, mid_poly=mid_poly, left_edge_mask=left_edge_mask, right_edge_mask=right_edge_mask, dbg=dbg )
def select_points_by_mask(points_xyz: np.ndarray,
uv: np.ndarray,
valid: np.ndarray,
mask01: np.ndarray) -> np.ndarray:
H, W = mask01.shape[:2]
u = np.round(uv[:, 0]).astype(np.int32)
v = np.round(uv[:, 1]).astype(np.int32)
inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H)
idx = np.where(inside)[0]
if idx.size == 0:
return np.zeros((0, 3), dtype=np.float32)
u2 = u[idx]; v2 = v[idx]
keep = mask01[v2, u2] > 0
return points_xyz[idx[keep]].astype(np.float32)
def match_points_near_edge(points_xyz: np.ndarray,
uv: np.ndarray,
valid: np.ndarray,
edge_mask255: np.ndarray,
max_px: float) -> np.ndarray:
"""
通过 distanceTransform:挑投影点距离 edge<=max_px 的点云
"""
H, W = edge_mask255.shape[:2]
src = np.ones((H, W), dtype=np.uint8) * 255
src[edge_mask255 > 0] = 0
dist = cv2.distanceTransform(src, cv2.DIST_L2, 5)
textu = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return np.zeros((0, 3), dtype=np.float32) u2 = u[idx]; v2 = v[idx] d = dist[v2, u2] keep = d <= float(max_px) return points_xyz[idx[keep]].astype(np.float32)
def fit_plane_svd(points_xyz: np.ndarray, iters: int = 2, keep_q: float = 0.90) -> Tuple[np.ndarray, np.ndarray]:
"""
简单鲁棒平面拟合:
- SVD 得法向
- 按到平面距离剔除 top(1-keep_q) 再迭代
返回:plane_point p0, plane_normal n(单位向量,n[2]>0)
"""
pts = points_xyz.astype(np.float64)
if pts.shape[0] < 10:
p0 = np.median(pts, axis=0)
n = np.array([0, 0, 1.0], dtype=np.float64)
return p0.astype(np.float32), n.astype(np.float32)
textfor _ in range(max(1, iters)): p0 = np.median(pts, axis=0) X = pts - p0 _, _, vt = np.linalg.svd(X, full_matrices=False) n = vt[-1] n = n / (np.linalg.norm(n) + 1e-12) if n[2] < 0: n = -n # 过滤离群 d = np.abs((pts - p0) @ n.reshape(3, 1)).reshape(-1) thr = float(np.quantile(d, keep_q)) pts = pts[d <= thr] if pts.shape[0] < 30: break return p0.astype(np.float32), n.astype(np.float32)
def undistort_pixels_to_normalized(uv: np.ndarray, calib: Dict) -> np.ndarray:
"""
uv: (N,2) 像素坐标
返回 normalized coords (N,2) in camera frame
"""
K = np.array(calib["camera_matrix"], dtype=np.float64)
dist = np.array(calib.get("dist_coeffs", [0, 0, 0, 0, 0]), dtype=np.float64).reshape(-1)
pts = uv.reshape(-1, 1, 2).astype(np.float64)
und = cv2.undistortPoints(pts, K, dist) # -> normalized
und = und.reshape(-1, 2)
return und.astype(np.float32)
def pixels_to_rays_in_lidar(uv: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
把像素点转换为激光雷达坐标系下的光线:
ray_origin: (3,)
ray_dirs: (N,3) normalized
"""
# cam->lidar
T = np.array(T_lidar2cam, dtype=np.float64)
R = T[:3, :3]
t = T[:3, 3]
R_c2l = R.T
o_lidar = (-R.T @ t).reshape(3)
text# normalized camera coords xy = undistort_pixels_to_normalized(uv, calib) if F.USE_DISTORTION else None if xy is None: K = np.array(calib["camera_matrix"], dtype=np.float64) fx, fy = K[0, 0], K[1, 1] cx, cy = K[0, 2], K[1, 2] x = (uv[:, 0] - cx) / fx y = (uv[:, 1] - cy) / fy xy = np.stack([x, y], axis=1).astype(np.float32) dirs_cam = np.concatenate([xy.astype(np.float64), np.ones((xy.shape[0], 1), dtype=np.float64)], axis=1) dirs_lidar = (R_c2l @ dirs_cam.T).T nrm = np.linalg.norm(dirs_lidar, axis=1, keepdims=True) + 1e-12 dirs_lidar = dirs_lidar / nrm return o_lidar.astype(np.float32), dirs_lidar.astype(np.float32)
def intersect_rays_with_plane(ray_origin: np.ndarray,
ray_dirs: np.ndarray,
plane_p0: np.ndarray,
plane_n: np.ndarray) -> np.ndarray:
"""
ray: p = o + t d
plane: n·(p - p0) = 0
返回 (N,3),无效的设为 nan
"""
o = ray_origin.reshape(1, 3).astype(np.float64)
d = ray_dirs.astype(np.float64)
p0 = plane_p0.reshape(1, 3).astype(np.float64)
n = plane_n.reshape(1, 3).astype(np.float64)
textdenom = (d * n).sum(axis=1) # (N,) num = ((p0 - o) * n).sum(axis=1) t = np.full_like(num, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num[ok] / denom[ok] # 只保留前向 t[t <= 0] = np.nan P = o + d * t.reshape(-1, 1) return P.astype(np.float32)
@dataclass
class TrackModel:
ok: bool
origin: np.ndarray # (3,)
t: np.ndarray # (3,) tangent
n: np.ndarray # (3,) lateral
up: np.ndarray # (3,) plane normal
s_min: float
s_max: float
half_w_top: float
poly_lv: np.ndarray # (8,2) in (l,v) where v=0 is top plane, v<0 downward
dbg: Dict
def pca_direction(points_xyz: np.ndarray, up: np.ndarray) -> np.ndarray:
"""
取主方向,并投影到轨道面内(与 up 正交)
"""
pts = points_xyz.astype(np.float64)
mu = pts.mean(axis=0)
X = pts - mu
_, _, vt = np.linalg.svd(X, full_matrices=False)
t = vt[0]
# 去掉 up 分量
upv = up.astype(np.float64)
t = t - upv * (t @ upv)
t = t / (np.linalg.norm(t) + 1e-12)
if t[0] < 0:
t = -t
return t.astype(np.float32)
def build_octagon(half_w_top: float) -> np.ndarray:
"""
构建八边形截面 (l,v):v=0 顶面,v<0 向下
"""
w_top = float(max(0.10, half_w_top + TOP_MARGIN_M))
w_mid = float(max(w_top + 0.10, 0.5 * TRAIN_MAX_WIDTH_M + SIDE_MARGIN_M))
w_bot = float(max(w_top, w_mid * BOTTOM_WIDTH_RATIO))
textH = float(HANG_HEIGHT_M + HEIGHT_EXTRA_M) ht = float(min(TOP_CHAMFER_H, max(0.10, 0.25 * H))) hb = float(min(BOT_CHAMFER_H, max(0.10, 0.25 * H))) v0 = 0.0 v1 = -ht v2 = -(H - hb) v3 = -H poly = np.array([ [-w_top, v0], [ w_top, v0], [ w_mid, v1], [ w_mid, v2], [ w_bot, v3], [-w_bot, v3], [-w_mid, v2], [-w_mid, v1], ], dtype=np.float32) return poly
def build_track_model(rail_info: RailMaskInfo,
calib: Dict,
T_lidar2cam: np.ndarray,
rail_pts_xyz: np.ndarray,
img_h: int, img_w: int) -> TrackModel:
dbg: Dict = {}
if rail_info.mid_poly.shape[0] < 20:
return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32),
np.array([0,1,0],dtype=np.float32), np.array([0,0,1],dtype=np.float32),
0.0, 1.0, 0.5, np.zeros((0,2),dtype=np.float32),
{"err": "mid_poly too short"})
textif rail_pts_xyz.shape[0] < MIN_RAIL_PTS: dbg["warn"] = f"rail_pts too few ({rail_pts_xyz.shape[0]}). plane may be unstable." # 1) 用 rail_pts 拟合轨道顶面平面 p0, up = fit_plane_svd(rail_pts_xyz, iters=2, keep_q=0.90) dbg["plane_p0"] = p0.tolist() dbg["plane_up"] = up.tolist() # 2) 用中心线像素与平面求交得到中心线3D点(抗左右点云不均) # 取若干行采样点 mid = rail_info.mid_poly # 下采样到 CENTERLINE_N_ROWS if mid.shape[0] > CENTERLINE_N_ROWS: idx = np.linspace(0, mid.shape[0]-1, CENTERLINE_N_ROWS).astype(np.int32) mid_s = mid[idx] else: mid_s = mid uv_mid = mid_s[:, :2].astype(np.float32) o_lidar, d_lidar = pixels_to_rays_in_lidar(uv_mid, calib, T_lidar2cam) cl_world = intersect_rays_with_plane(o_lidar, d_lidar, p0, up) ok = np.isfinite(cl_world).all(axis=1) cl_world = cl_world[ok] dbg["centerline_pts"] = int(cl_world.shape[0]) if cl_world.shape[0] < 10: return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32), np.array([0,1,0],dtype=np.float32), up, 0.0, 1.0, 0.5, np.zeros((0,2),dtype=np.float32), {"err": "centerline ray-plane intersection too few", **dbg}) # 3) 轨道方向 t:对中心线3D点做 PCA,再投影到轨道面内 t = pca_direction(cl_world, up) # 横向 n n = np.cross(up, t) n = n / (np.linalg.norm(n) + 1e-12) # 4) origin:中心线点均值(本身在平面上) origin = cl_world.mean(axis=0).astype(np.float32) # 5) s range(沿 t 的投影范围) s = (cl_world - origin.reshape(1, 3)) @ t.reshape(3, 1) s = s.reshape(-1) s_min = float(np.quantile(s, 0.02)) s_max = float(np.quantile(s, 0.98)) # 加一点 margin margin = max(1.5, 0.10 * (s_max - s_min)) s_min -= margin s_max += margin dbg["s_min"] = s_min dbg["s_max"] = s_max # 6) 用左右边缘像素求交 -> 得到半宽(更稳,不依赖点云左右对称) left = rail_info.left_poly right = rail_info.right_poly if left.shape[0] > 10 and right.shape[0] > 10: # 对齐长度取同样数量 k = min(left.shape[0], right.shape[0], 80) idl = np.linspace(0, left.shape[0]-1, k).astype(np.int32) idr = np.linspace(0, right.shape[0]-1, k).astype(np.int32) uvL = left[idl, :2].astype(np.float32) uvR = right[idr, :2].astype(np.float32) oL, dL = pixels_to_rays_in_lidar(uvL, calib, T_lidar2cam) oR, dR = pixels_to_rays_in_lidar(uvR, calib, T_lidar2cam) pL = intersect_rays_with_plane(oL, dL, p0, up) pR = intersect_rays_with_plane(oR, dR, p0, up) okL = np.isfinite(pL).all(axis=1) okR = np.isfinite(pR).all(axis=1) ok2 = okL & okR pL = pL[ok2] pR = pR[ok2] if pL.shape[0] >= 10: lL = (pL - origin.reshape(1, 3)) @ n.reshape(3, 1) lR = (pR - origin.reshape(1, 3)) @ n.reshape(3, 1) w = (lR - lL).reshape(-1) half_w_top = float(np.median(np.abs(w) * 0.5)) else: half_w_top = 0.55 dbg["warn_half_w"] = "edge ray-plane too few, fallback=0.55" else: half_w_top = 0.55 dbg["warn_half_w"] = "edge poly too short, fallback=0.55" dbg["half_w_top"] = half_w_top poly_lv = build_octagon(half_w_top) return TrackModel(True, origin, t.astype(np.float32), n.astype(np.float32), up.astype(np.float32), s_min, s_max, float(half_w_top), poly_lv, dbg)
def world_to_track_coords(track: TrackModel, pts_xyz: np.ndarray) -> np.ndarray:
"""
world -> (s,l,v)
"""
rel = pts_xyz - track.origin.reshape(1, 3)
s = rel @ track.t.reshape(3, 1)
l = rel @ track.n.reshape(3, 1)
v = rel @ track.up.reshape(3, 1)
return np.concatenate([s, l, v], axis=1).astype(np.float32)
def _cross(o: np.ndarray, a: np.ndarray, b: np.ndarray) -> float:
return float((a[0]-o[0])(b[1]-o[1]) - (a[1]-o[1])(b[0]-o[0]))
def convex_hull(points: np.ndarray) -> np.ndarray:
if points is None or len(points) == 0:
return np.zeros((0, 2), dtype=np.float32)
pts = np.unique(points.astype(np.float64), axis=0)
if pts.shape[0] < 3:
return pts.astype(np.float32)
pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))]
lower = []
for p in pts:
while len(lower) >= 2 and _cross(np.array(lower[-2]), np.array(lower[-1]), p) <= 0:
lower.pop()
lower.append(p)
upper = []
for p in pts[::-1]:
while len(upper) >= 2 and _cross(np.array(upper[-2]), np.array(upper[-1]), p) <= 0:
upper.pop()
upper.append(p)
hull = np.array(lower[:-1] + upper[:-1], dtype=np.float64)
return hull.astype(np.float32)
def polygon_area(poly: np.ndarray) -> float:
if poly is None or poly.shape[0] < 3:
return 0.0
x = poly[:, 0]; y = poly[:, 1]
return float(0.5 * abs(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))
def ensure_ccw(poly: np.ndarray) -> np.ndarray:
if poly is None or poly.shape[0] < 3:
return poly
x = poly[:, 0]; y = poly[:, 1]
signed = float(0.5 * (np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))
return poly[::-1].copy() if signed < 0 else poly
def inside_half_plane(p: np.ndarray, a: np.ndarray, b: np.ndarray) -> bool:
return _cross(a, b, p) >= -1e-9
def line_intersection(p1: np.ndarray, p2: np.ndarray, a: np.ndarray, b: np.ndarray) -> np.ndarray:
x1, y1 = p1; x2, y2 = p2
x3, y3 = a; x4, y4 = b
den = (x1-x2)(y3-y4) - (y1-y2)(x3-x4)
if abs(den) < 1e-12:
return p2.copy()
px = ((x1y2 - y1x2)(x3-x4) - (x1-x2)(x3y4 - y3x4)) / den
py = ((x1y2 - y1x2)(y3-y4) - (y1-y2)(x3y4 - y3x4)) / den
return np.array([px, py], dtype=np.float32)
def convex_polygon_intersection(subject: np.ndarray, clip: np.ndarray) -> np.ndarray:
if subject is None or subject.shape[0] < 3:
return np.zeros((0, 2), dtype=np.float32)
if clip is None or clip.shape[0] < 3:
return np.zeros((0, 2), dtype=np.float32)
subj = ensure_ccw(subject).astype(np.float32)
clp = ensure_ccw(clip).astype(np.float32)
textout = subj for i in range(clp.shape[0]): a = clp[i] b = clp[(i+1) % clp.shape[0]] inp = out if inp.shape[0] == 0: break out_list = [] for j in range(inp.shape[0]): p = inp[j] q = inp[(j+1) % inp.shape[0]] pin = inside_half_plane(p, a, b) qin = inside_half_plane(q, a, b) if qin: if not pin: out_list.append(line_intersection(p, q, a, b)) out_list.append(q.copy()) elif pin: out_list.append(line_intersection(p, q, a, b)) out = np.array(out_list, dtype=np.float32) return out
def point_in_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> bool:
poly = ensure_ccw(poly)
for i in range(poly.shape[0]):
a = poly[i]; b = poly[(i+1) % poly.shape[0]]
if _cross(a, b, pt) < -1e-8:
return False
return True
def point_to_segment_distance(pt: np.ndarray, a: np.ndarray, b: np.ndarray) -> float:
ax, ay = a; bx, by = b; px, py = pt
vx, vy = bx-ax, by-ay
wx, wy = px-ax, py-ay
c1 = vxwx + vywy
if c1 <= 0:
return float(math.hypot(px-ax, py-ay))
c2 = vxvx + vyvy
if c2 <= c1:
return float(math.hypot(px-bx, py-by))
t = c1 / (c2 + 1e-12)
projx = ax + tvx
projy = ay + tvy
return float(math.hypot(px-projx, py-projy))
def signed_distance_to_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> float:
poly = ensure_ccw(poly)
dmin = float("inf")
for i in range(poly.shape[0]):
a = poly[i]; b = poly[(i+1) % poly.shape[0]]
dmin = min(dmin, point_to_segment_distance(pt, a, b))
inside = point_in_convex_polygon(pt, poly)
return -dmin if inside else dmin
@dataclass
class IntrusionMetrics:
cls: str
score: float
intrude: bool
reason: str
s_overlap: float
s_ratio: float
area_ratio: float
volume_ratio: float
center_inside: bool
signed_dist_center: float
def intrusion_for_det(track: TrackModel, det: F.Det3D) -> IntrusionMetrics:
box7 = det.box7.astype(np.float32)
corners = F.boxes3d_to_corners(box7.reshape(1, 7))[0] # (8,3)
textslv = world_to_track_coords(track, corners) # (8,3) s_vals = slv[:, 0] lv = slv[:, 1:3] det_s_min = float(np.min(s_vals)) det_s_max = float(np.max(s_vals)) det_s_len = max(1e-6, det_s_max - det_s_min) s0 = max(det_s_min, track.s_min) s1 = min(det_s_max, track.s_max) s_overlap = max(0.0, s1 - s0) s_ratio = float(s_overlap / det_s_len) det_poly = convex_hull(lv) det_area = polygon_area(det_poly) clear_poly = ensure_ccw(track.poly_lv.copy()) inter = convex_polygon_intersection(det_poly, clear_poly) if det_area > 1e-9 else np.zeros((0, 2), np.float32) inter_area = polygon_area(inter) area_ratio = float(inter_area / (det_area + 1e-9)) if det_area > 1e-9 else 0.0 volume_ratio = float(area_ratio * s_ratio) center = box7[:3].reshape(1, 3) c_slv = world_to_track_coords(track, center)[0] c_lv = c_slv[1:3] center_inside = (track.s_min <= float(c_slv[0]) <= track.s_max) and point_in_convex_polygon(c_lv, clear_poly) signed_dist = float(signed_distance_to_convex_polygon(c_lv, clear_poly)) # 判定逻辑(会打印/落盘) if s_overlap <= 1e-6: intrude = False reason = "no_s_overlap" else: if FORCE_CENTER_INSIDE_INTRUDE and center_inside: intrude = True reason = "center_inside" elif volume_ratio >= INTRUDE_VOL_RATIO_THR: intrude = True reason = f"volume_ratio>={INTRUDE_VOL_RATIO_THR:.2f}" elif (area_ratio >= INTRUDE_AREA_RATIO_THR) and (s_ratio >= 0.25): intrude = True reason = f"area_ratio>={INTRUDE_AREA_RATIO_THR:.2f}&s_ratio>=0.25" else: intrude = False reason = "below_thresholds" return IntrusionMetrics( cls=det.cls_name, score=float(det.score), intrude=bool(intrude), reason=reason, s_overlap=float(s_overlap), s_ratio=float(s_ratio), area_ratio=float(area_ratio), volume_ratio=float(volume_ratio), center_inside=bool(center_inside), signed_dist_center=float(signed_dist) )
def save_edge_debug_images(out_dir: str, img: np.ndarray, roi_mask: np.ndarray, rail: RailMaskInfo):
# 01 segmentation overlay
vis = overlay_mask(img, rail.rail_mask01, color=(0, 0, 255), alpha=0.55)
if ROI_ENABLE:
cnts, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(vis, cnts, -1, (0, 255, 255), 2, cv2.LINE_AA)
cv2.imwrite(os.path.join(out_dir, "01_seg_overlay.jpg"), vis)
text# masks cv2.imwrite(os.path.join(out_dir, "02_rail_mask.png"), (rail.rail_mask01 * 255).astype(np.uint8)) cv2.imwrite(os.path.join(out_dir, "03_corridor_mask.png"), (rail.corridor01 * 255).astype(np.uint8)) # polylines overlay poly_vis = img.copy() if rail.left_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.left_poly.reshape(-1, 1, 2)], False, (255, 0, 0), 3, cv2.LINE_AA) if rail.right_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.right_poly.reshape(-1, 1, 2)], False, (0, 0, 255), 3, cv2.LINE_AA) if rail.mid_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.mid_poly.reshape(-1, 1, 2)], False, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "04_edge_polylines.jpg"), poly_vis) # edge masks edge_rgb = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) edge_rgb[rail.left_edge_mask > 0] = (255, 0, 0) edge_rgb[rail.right_edge_mask > 0] = (0, 0, 255) cv2.imwrite(os.path.join(out_dir, "05_edge_masks.png"), edge_rgb)
def draw_projected_points(img: np.ndarray, uv: np.ndarray, valid: np.ndarray, color, step: int = 8):
H, W = img.shape[:2]
u = np.round(uv[:, 0]).astype(np.int32)
v = np.round(uv[:, 1]).astype(np.int32)
inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H)
idx = np.where(inside)[0]
if idx.size == 0:
return
idx = idx[::max(1, step)]
for i in idx:
cv2.circle(img, (int(u[i]), int(v[i])), 1, color, -1, cv2.LINE_AA)
def render_bev_density(points_xyz: np.ndarray,
out_path: str,
track: Optional[TrackModel] = None,
dets: Optional[List[F.Det3D]] = None,
res: float = 0.06):
pts = points_xyz
if pts.shape[0] > 250000:
idx = np.random.choice(pts.shape[0], 250000, replace=False)
pts = pts[idx]
textx = pts[:, 0]; y = pts[:, 1] x_min = float(np.quantile(x, 0.02)); x_max = float(np.quantile(x, 0.98)) y_min = float(np.quantile(y, 0.02)); y_max = float(np.quantile(y, 0.98)) margin = 3.0 x_min -= margin; x_max += margin y_min -= margin; y_max += margin H = int(max(480, math.ceil((x_max - x_min) / res))) W = int(max(480, math.ceil((y_max - y_min) / res))) rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H-1) cc = np.clip(cc, 0, W-1) idx = rr * W + cc cnt = np.bincount(idx, minlength=H*W).reshape(H, W).astype(np.float32) img = np.log1p(cnt) if img.max() > 0: img = img / img.max() img = (img**0.55 * 255).astype(np.uint8) bev = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) def xy_to_pix(xy): xx, yy = float(xy[0]), float(xy[1]) r = (x_max - xx) / res c = (yy - y_min) / res return int(round(c)), int(round(r)) # overlay envelope wireframe (XY) if track is not None and track.ok: # 取一些 s 切片画八边形外轮廓 ss = np.linspace(track.s_min, track.s_max, 14).astype(np.float32) for s in ss: base = track.origin + track.t * float(s) poly_xy = [] for l, v in track.poly_lv: p = base + track.n * float(l) + track.up * float(v) poly_xy.append(p[:2]) poly_xy = np.array(poly_xy, dtype=np.float32) poly_pix = np.array([xy_to_pix(p) for p in poly_xy], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(bev, [poly_pix], True, (0, 255, 255), 1, cv2.LINE_AA) # det footprint if dets: for d in dets: corners = F.boxes3d_to_corners(d.box7.reshape(1, 7).astype(np.float32))[0] xy = corners[:4, :2] pix = np.array([xy_to_pix(p) for p in xy], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(bev, [pix], True, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(out_path, bev)
def render_front_xz_density(points_xyz: np.ndarray,
out_path: str,
track: Optional[TrackModel] = None,
dets: Optional[List[F.Det3D]] = None,
res: float = 0.06):
pts = points_xyz
if pts.shape[0] > 250000:
idx = np.random.choice(pts.shape[0], 250000, replace=False)
pts = pts[idx]
textx = pts[:, 0]; z = pts[:, 2] x_min = float(np.quantile(x, 0.02)); x_max = float(np.quantile(x, 0.98)) z_min = float(np.quantile(z, 0.02)); z_max = float(np.quantile(z, 0.98)) margin = 1.5 x_min -= margin; x_max += margin z_min -= margin; z_max += margin H = int(max(480, math.ceil((z_max - z_min) / res))) W = int(max(480, math.ceil((x_max - x_min) / res))) rr = ((z_max - pts[:, 2]) / res).astype(np.int32) cc = ((pts[:, 0] - x_min) / res).astype(np.int32) rr = np.clip(rr, 0, H-1) cc = np.clip(cc, 0, W-1) idx = rr * W + cc cnt = np.bincount(idx, minlength=H*W).reshape(H, W).astype(np.float32) img = np.log1p(cnt) if img.max() > 0: img = img / img.max() img = (img**0.55 * 255).astype(np.uint8) front = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) def xz_to_pix(xz): xx, zz = float(xz[0]), float(xz[1]) c = (xx - x_min) / res r = (z_max - zz) / res return int(round(c)), int(round(r)) # envelope wireframe (X-Z) if track is not None and track.ok: ss = np.linspace(track.s_min, track.s_max, 14).astype(np.float32) for s in ss: base = track.origin + track.t * float(s) poly_xz = [] for l, v in track.poly_lv: p = base + track.n * float(l) + track.up * float(v) poly_xz.append([p[0], p[2]]) poly_xz = np.array(poly_xz, dtype=np.float32) poly_pix = np.array([xz_to_pix(p) for p in poly_xz], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(front, [poly_pix], True, (0, 255, 255), 1, cv2.LINE_AA) # det boxes (XZ 用四角近似) if dets: for d in dets: corners = F.boxes3d_to_corners(d.box7.reshape(1, 7).astype(np.float32))[0] xz = corners[[0,1,2,3], :][:, [0,2]] pix = np.array([xz_to_pix(p) for p in xz], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(front, [pix], True, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(out_path, front)
def render_3d_scene(points_xyz: np.ndarray, track: TrackModel, dets: List[F.Det3D], out_png: str):
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # noqa: F401
except Exception as e:
print(f"[WARN] matplotlib not available, skip 3d render: {e}")
return
textpts = points_xyz if pts.shape[0] > 20000: idx = np.random.choice(pts.shape[0], 20000, replace=False) pts = pts[idx] fig = plt.figure(figsize=(10, 8)) ax = fig.add_subplot(111, projection="3d") ax.scatter(pts[:,0], pts[:,1], pts[:,2], s=0.4) # envelope wireframe if track.ok: ss = np.linspace(track.s_min, track.s_max, 18) prev = None for s in ss: base = track.origin + track.t * float(s) V = [] for l, v in track.poly_lv: p = base + track.n*float(l) + track.up*float(v) V.append(p) V = np.array(V) # loop for i in range(8): p = V[i]; q = V[(i+1)%8] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) if prev is not None: for i in range(8): p = prev[i]; q = V[i] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) prev = V # det boxes edges = [(0,1),(1,2),(2,3),(3,0),(4,5),(5,6),(6,7),(7,4),(0,4),(1,5),(2,6),(3,7)] for d in dets: c = F.boxes3d_to_corners(d.box7.reshape(1,7).astype(np.float32))[0] for i,j in edges: p=c[i]; q=c[j] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) ax.set_xlabel("X"); ax.set_ylabel("Y"); ax.set_zlabel("Z") plt.tight_layout() fig.savefig(out_png, dpi=160) plt.close(fig)
def parse_args():
ap = argparse.ArgumentParser()
ap.add_argument("--frame_id", type=str, default="", help="指定帧号(不含扩展名),例如 2025042116142702")
ap.add_argument("--seg_config", type=str,
default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py")
ap.add_argument("--seg_ckpt", type=str,
default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth")
ap.add_argument("--seg_device", type=str, default="cuda:0")
ap.add_argument("--rail_id", type=int, default=RAIL_CLASS_ID_DEFAULT)
ap.add_argument("--seed", type=int, default=42)
ap.add_argument("--topk", type=int, default=20, help="侵界判定最多取多少个 fused det")
return ap.parse_args()
def main():
args = parse_args()
args.add_argument("--mmseg_python", type=str,
default="/home/jd/anaconda3/envs/mmseg3.0/bin/python",
help="mmseg3.0 虚拟环境 python 路径(必须是可执行文件)")
textargs.add_argument("--mmseg_repo", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation", help="mmsegmentation repo 根目录(用于 sys.path 兜底,可空)") random.seed(args.seed) np.random.seed(args.seed) # 1) init seg print("[INFO] Loading PIDNet(mmseg)...") # 子进程跑 mmseg,返回 pred (H,W) label map pred = infer_rail_mask_by_subprocess( img_path=img_path, # 你的当前帧图片路径 out_npy=os.path.join(out_dir, "seg_pred.npy"), seg_config=args.seg_config, seg_ckpt=args.seg_ckpt, seg_device=args.seg_device, mmseg_python=args.mmseg_python, mmseg_repo=args.mmseg_repo ) # 2) load det models device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = F.load_models() idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # choose frame bin_files = sorted(list(Path(F.PCDET_POINTS_DIR).glob("*.bin"))) frame_ids = [p.stem for p in bin_files if F.find_image_for_frame(p.stem) is not None and p.stem in idx_map] if not frame_ids: print("[ERROR] no matched (img,bin) frames.") return fid = args.frame_id if args.frame_id else random.choice(frame_ids) if fid not in idx_map: print(f"[ERROR] frame_id {fid} not in dataset index.") return out_dir = os.path.join(OUT_ROOT, fid) os.makedirs(out_dir, exist_ok=True) # read image img_path = F.find_image_for_frame(fid) img = cv2.imread(img_path) if img is None: print(f"[ERROR] cannot read image: {img_path}") return H, W = img.shape[:2] cv2.imwrite(os.path.join(out_dir, "00_raw.jpg"), img) # roi mask roi_mask = make_roi_mask(H, W) # 3) segmentation inference # pred = infer_rail_mask(seg_model, img) rail_info = build_masks_from_rail_pred(pred, args.rail_id, H, W, roi_mask) save_edge_debug_images(out_dir, img, roi_mask, rail_info) print(f"[SEG] rows_valid={rail_info.dbg.get('rows_valid')} rows_kept={rail_info.dbg.get('rows_kept')} median_width_px={rail_info.dbg.get('median_width_px')}") # 4) det inference yolo_dets = F.infer_yolo(yolo_model, img) _, raw_points, pcdet_dets = F.infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if raw_points is None or raw_points.shape[0] == 0: print("[ERROR] raw_points empty.") return # extrinsic choose T = F.get_extrinsic_matrix(F.CALIB, raw_points[:, :3], W, H) # fuse fused = F.fuse_frame(yolo_dets, pcdet_dets, W, H, F.CALIB, T, raw_points) fused = sorted(fused, key=lambda d: d.score, reverse=True)[:max(1, args.topk)] fused_keep = [d for d in fused if d.score >= 0.10] print(f"[DETS] yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused)} keep(score>=0.10)={len(fused_keep)}") # 5) project points to image pts_xyz = raw_points[:, :3].astype(np.float32) uv, valid = F.project_points_lidar_to_img(pts_xyz, F.CALIB, T, W, H, use_distortion=F.USE_DISTORTION) # debug: all points overlay vis_all = img.copy() draw_projected_points(vis_all, uv, valid, (200, 200, 200), step=12) vis_all = overlay_mask(vis_all, rail_info.rail_mask01, color=(0,0,255), alpha=0.35) cv2.imwrite(os.path.join(out_dir, "06_proj_all_points.jpg"), vis_all) # 6) select rail top points by rail mask (dilate for tolerance) rail_mask255 = (rail_info.rail_mask01 * 255).astype(np.uint8) if MASK_DILATE_PX > 0: kd = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_DILATE_PX, MASK_DILATE_PX)) rail_mask255 = cv2.dilate(rail_mask255, kd, iterations=1) rail_mask01_dil = (rail_mask255 > 0).astype(np.uint8) rail_pts = select_points_by_mask(pts_xyz, uv, valid, rail_mask01_dil) print(f"[RAIL_PTS] selected={rail_pts.shape[0]}") vis_rail = img.copy() draw_projected_points(vis_rail, uv, valid, (120, 120, 120), step=20) # rail points overlay(绿色) # 为了快,用 mask 反算 idx 再画 Hh, Ww = rail_mask01_dil.shape u = np.round(uv[:,0]).astype(np.int32) v = np.round(uv[:,1]).astype(np.int32) inside = valid & (u>=0)&(u<Ww)&(v>=0)&(v<Hh) idx = np.where(inside)[0] keep = rail_mask01_dil[v[idx], u[idx]] > 0 idx_keep = idx[keep] for i in idx_keep[::6]: cv2.circle(vis_rail, (int(u[i]), int(v[i])), 1, (0,255,0), -1, cv2.LINE_AA) # edges polylines if rail_info.left_poly.shape[0] > 0: cv2.polylines(vis_rail, [rail_info.left_poly.reshape(-1,1,2)], False, (255,0,0), 2, cv2.LINE_AA) if rail_info.right_poly.shape[0] > 0: cv2.polylines(vis_rail, [rail_info.right_poly.reshape(-1,1,2)], False, (0,0,255), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "07_proj_rail_points.jpg"), vis_rail) # 7) match edge points (left/right separately) left_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.left_edge_mask, EDGE_MATCH_PX) right_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.right_edge_mask, EDGE_MATCH_PX) print(f"[EDGE_PTS] left={left_edge_pts.shape[0]} right={right_edge_pts.shape[0]}") vis_edge = img.copy() draw_projected_points(vis_edge, uv, valid, (120, 120, 120), step=22) # 将边缘点画出来(蓝/红) # 这里为了速度,同样用“再次按 edge mask 采样 idx”方式画 def draw_edge_points(edge_mask255, color): src = np.ones((H, W), dtype=np.uint8) * 255 src[edge_mask255 > 0] = 0 dist = cv2.distanceTransform(src, cv2.DIST_L2, 5) uu = np.round(uv[:,0]).astype(np.int32) vv = np.round(uv[:,1]).astype(np.int32) ins = valid & (uu>=0)&(uu<W)&(vv>=0)&(vv<H) ii = np.where(ins)[0] d = dist[vv[ii], uu[ii]] kk = d <= EDGE_MATCH_PX jj = ii[kk] for j in jj[::4]: cv2.circle(vis_edge, (int(uu[j]), int(vv[j])), 1, color, -1, cv2.LINE_AA) draw_edge_points(rail_info.left_edge_mask, (255,0,0)) draw_edge_points(rail_info.right_edge_mask, (0,0,255)) if rail_info.mid_poly.shape[0] > 0: cv2.polylines(vis_edge, [rail_info.mid_poly.reshape(-1,1,2)], False, (0,255,0), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "08_proj_edge_points.jpg"), vis_edge) # 8) build track model + envelope track = build_track_model(rail_info, F.CALIB, T, rail_pts, H, W) if not track.ok: print("[ERROR] build_track_model failed:", track.dbg) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: f.write("build_track_model failed\n") f.write(json.dumps(track.dbg, ensure_ascii=False, indent=2)) return print("\n[TRACK_MODEL]") print(f" origin={track.origin.tolist()}") print(f" t={track.t.tolist()}") print(f" n={track.n.tolist()}") print(f" up={track.up.tolist()}") print(f" s_range=[{track.s_min:.2f}, {track.s_max:.2f}] half_w_top={track.half_w_top:.3f}") print(f" octagon(l,v):\n{track.poly_lv}") print(f" dbg={track.dbg}") # 9) intrusion metrics metrics: List[IntrusionMetrics] = [] for d in fused_keep: metrics.append(intrusion_for_det(track, d)) print("\n========== INTRUSION METRICS ==========") if not metrics: print("No fused detections (score>=0.10).") for i, m in enumerate(metrics): print(f"[{i}] cls={m.cls:14s} detScore={m.score:.3f} INTRUDE={m.intrude} ({m.reason})") print(f" s_overlap={m.s_overlap:.2f}m s_ratio={m.s_ratio:.3f}") print(f" area_ratio(l-v)={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}") print(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}m") # save metrics out_json = { "frame_id": fid, "seg_dbg": rail_info.dbg, "track_dbg": track.dbg, "rules": { "INTRUDE_VOL_RATIO_THR": INTRUDE_VOL_RATIO_THR, "INTRUDE_AREA_RATIO_THR": INTRUDE_AREA_RATIO_THR, "FORCE_CENTER_INSIDE_INTRUDE": FORCE_CENTER_INSIDE_INTRUDE, }, "dets": [m.__dict__ for m in metrics] } with open(os.path.join(out_dir, "intrusion_metrics.json"), "w", encoding="utf-8") as f: json.dump(out_json, f, ensure_ascii=False, indent=2) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: for i, m in enumerate(metrics): f.write(f"[{i}] cls={m.cls} score={m.score:.3f} intrude={m.intrude} reason={m.reason}\n") f.write(f" s_overlap={m.s_overlap:.3f} s_ratio={m.s_ratio:.3f}\n") f.write(f" area_ratio={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}\n") f.write(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}\n") # 10) bev/front debug with envelope render_bev_density(pts_xyz, os.path.join(out_dir, "09_bev_envelope.jpg"), track=track, dets=fused_keep, res=0.06) render_front_xz_density(pts_xyz, os.path.join(out_dir, "10_front_envelope.jpg"), track=track, dets=fused_keep, res=0.06) # 11) 3d scene (optional) render_3d_scene(pts_xyz, track, fused_keep, os.path.join(out_dir, "11_3d_scene.png")) print(f"\n[DONE] saved all debug files in: {out_dir}")
if name == "main":
main()代码报错如下:(openpcdet) jd@jd-x11dai-n:/media/jd/4997BB1603CFE2C4/lw/Openpcdet$ CUDA_VISIBLE_DEVICES=1 python fusion/qinjiepanding.py
Traceback (most recent call last):
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1504, in <module>
main()
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1289, in main
args.add_argument("--mmseg_python", type=str,
AttributeError: 'Namespace' object has no attribute 'add_argument'
如何解决呢
这个报错原因很简单:你把 add_argument() 写到了 args(Namespace)上,但 add_argument() 只能写在 ArgumentParser(ap) 上。
所以会出现:AttributeError: 'Namespace' object has no attribute 'add_argument'
同时你现在代码还有一个逻辑问题:你在 main() 一开始就调用了
pythonpred = infer_rail_mask_by_subprocess(img_path=img_path, out_npy=os.path.join(out_dir,...))
但这时 img_path、out_dir 还没创建(你后面才选 frame、创建 out_dir、读取图片),所以即使修复 add_argument,下一步也会报 NameError 或路径不存在。
下面给你“正确写法”的修改片段(照抄覆盖即可)。
parse_args() 里(不要放到 main 里)把你的 parse_args() 改成这样(只改这一段就行):
pythondef parse_args(): ap = argparse.ArgumentParser() ap.add_argument("--frame_id", type=str, default="", help="指定帧号(不含扩展名),例如 2025042116142702") ap.add_argument("--seg_config", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py") ap.add_argument("--seg_ckpt", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth") ap.add_argument("--seg_device", type=str, default="cuda:0") ap.add_argument("--rail_id", type=int, default=RAIL_CLASS_ID_DEFAULT) ap.add_argument("--seed", type=int, default=42) ap.add_argument("--topk", type=int, default=20, help="侵界判定最多取多少个 fused det") # ✅ 新增:mmseg3.0 的 python 和 repo ap.add_argument("--mmseg_python", type=str, default="/home/jd/anaconda3/envs/mmseg3.0/bin/python", help="mmseg3.0 虚拟环境 python 路径(必须是可执行文件)") ap.add_argument("--mmseg_repo", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation", help="mmsegmentation repo 根目录(用于 sys.path 兜底,可空)") return ap.parse_args()
你之前写的
args.add_argument(...)两行 必须删掉。
把你 main() 开头部分改成下面这个顺序(关键是:先选 frame → 建 out_dir → 读 img → 再跑 mmseg 子进程):
pythondef main(): args = parse_args() random.seed(args.seed) np.random.seed(args.seed) # 1) load det models(先加载融合相关) device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = F.load_models() idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # 2) choose frame bin_files = sorted(list(Path(F.PCDET_POINTS_DIR).glob("*.bin"))) frame_ids = [p.stem for p in bin_files if F.find_image_for_frame(p.stem) is not None and p.stem in idx_map] if not frame_ids: print("[ERROR] no matched (img,bin) frames.") return fid = args.frame_id if args.frame_id else random.choice(frame_ids) if fid not in idx_map: print(f"[ERROR] frame_id {fid} not in dataset index.") return out_dir = os.path.join(OUT_ROOT, fid) os.makedirs(out_dir, exist_ok=True) # 3) read image img_path = F.find_image_for_frame(fid) img = cv2.imread(img_path) if img is None: print(f"[ERROR] cannot read image: {img_path}") return H, W = img.shape[:2] cv2.imwrite(os.path.join(out_dir, "00_raw.jpg"), img) # 4) ROI roi_mask = make_roi_mask(H, W) # ✅ 5) segmentation inference(此时 img_path/out_dir 都存在了) print("[INFO] Running PIDNet(mmseg) in subprocess...") pred = infer_rail_mask_by_subprocess( img_path=img_path, out_npy=os.path.join(out_dir, "seg_pred.npy"), seg_config=args.seg_config, seg_ckpt=args.seg_ckpt, seg_device=args.seg_device, mmseg_python=args.mmseg_python, mmseg_repo=args.mmseg_repo ) # 6) build rail masks + debug rail_info = build_masks_from_rail_pred(pred, args.rail_id, H, W, roi_mask) save_edge_debug_images(out_dir, img, roi_mask, rail_info) print(f"[SEG] rows_valid={rail_info.dbg.get('rows_valid')} rows_kept={rail_info.dbg.get('rows_kept')} median_width_px={rail_info.dbg.get('median_width_px')}") # ---- 后面你的 YOLO/PCDet/投影/限界/侵界代码保持原样继续即可 ----
resolve_checkpoint 定义了两次你现在贴出来的代码里,resolve_checkpoint() 出现了两遍。
请保留我给你的那一版即可(带 last_checkpoint 支持),把另一版删掉,避免混淆。
init_mmseg_model(...)infer_rail_mask(model, img)不删也没事,但不要再调用它们,否则又会回到 No module named mmseg。
bashCUDA_VISIBLE_DEVICES=1 python fusion/qinjiepanding.py \ --mmseg_python /home/jd/anaconda3/envs/mmseg3.0/bin/python \ --mmseg_repo /media/jd/4997BB1603CFE2C4/lw/mmsegmentation
如果子进程失败,它会把 STDOUT/STDERR 全部打印出来(我在 infer_rail_mask_by_subprocess 里已经写了),你把那段 stderr 发我,我就能直接定位是 mmseg3.0 环境缺依赖(mmcv/mmengine/torch 版本)还是 config 引用路径问题。
代码报错如下:[CALIB] auto | lidar2cam_inlier=1.000 cam2lidar_inlier=0.224 -> choose lidar2cam
[DETS] yolo=1 pcdet=1 fused=1 keep(score>=0.10)=1
[RAIL_PTS] selected=1012
[EDGE_PTS] left=139 right=180
Traceback (most recent call last):
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1504, in <module>
main()
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1441, in main
track = build_track_model(rail_info, F.CALIB, T, rail_pts, H, W)
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 757, in build_track_model
o_lidar, d_lidar = pixels_to_rays_in_lidar(uv_mid, calib, T_lidar2cam)
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 620, in pixels_to_rays_in_lidar
xy = undistort_pixels_to_normalized(uv, calib) if F.USE_DISTORTION else None
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 598, in undistort_pixels_to_normalized
K = np.array(calib["camera_matrix"], dtype=np.float64)
KeyError: 'camera_matrix'
如何解决呢,我原来的fusion如下:
CALIB = {
"extrinsic_matrix": np.array([
[0.00871822977022152, -0.9990101808868351, -0.043619387365335945, -0.04000000000000002],
[-0.0003806461322609286, 0.043617726472722454, -0.9990482215818578, 0.25],
[0.9999619230641715, 0.008726535498373544, -1.3877787807814457e-17, -0.04000000000000002],
[0.0, 0.0, 0.0, 1.0]
], dtype=np.float64),
"fx": 3605.0801593073,
"fy": 3604.9573399128,
"cx": 951.9363889574,
"cy": 816.9773743048,
"width": 1920,
"height": 1536,
"dist": np.array([
2.4515361243, -46.8007861419,
-0.0002973913, -0.0008024507,
-144.3698857610, 2.6420544359,
-46.0443623397, -158.1742719597
], dtype=np.float64)
}
这个报错就是因为你脚本里 按 mmseg 的写法去读 calib["camera_matrix"],但你 fusion.py 的 CALIB 只有 fx/fy/cx/cy/dist,所以 KeyError: 'camera_matrix'。
解决办法:让代码 同时兼容两种标定格式(camera_matrix/dist_coeffs 或 fx/fy/cx/cy/dist)。最稳的是在你的 qinjiepanding.py 里加一个“取K和畸变”的适配函数,然后把 undistort_pixels_to_normalized() 改掉即可。
pythondef calib_get_K_dist(calib: Dict) -> Tuple[np.ndarray, np.ndarray]: """兼容两种标定格式: 1) calib["camera_matrix"] + calib["dist_coeffs"] 2) calib["fx","fy","cx","cy"] + calib["dist"] """ if "camera_matrix" in calib: K = np.array(calib["camera_matrix"], dtype=np.float64).reshape(3, 3) else: fx = float(calib["fx"]); fy = float(calib["fy"]) cx = float(calib["cx"]); cy = float(calib["cy"]) K = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float64) # dist 兼容 if "dist_coeffs" in calib: dist = np.array(calib["dist_coeffs"], dtype=np.float64).reshape(-1) elif "dist" in calib: dist = np.array(calib["dist"], dtype=np.float64).reshape(-1) else: dist = np.zeros((5,), dtype=np.float64) return K, dist
undistort_pixels_to_normalized()(直接覆盖原函数)pythondef undistort_pixels_to_normalized(uv: np.ndarray, calib: Dict) -> np.ndarray: """ uv: (N,2) 像素坐标 返回 normalized coords (N,2) in camera frame """ K, dist = calib_get_K_dist(calib) pts = uv.reshape(-1, 1, 2).astype(np.float64) und = cv2.undistortPoints(pts, K, dist) # 输出为归一化相机坐标 und = und.reshape(-1, 2) return und.astype(np.float32)
pixels_to_rays_in_lidar() 里取 K 的部分(可选但建议)你现在 pixels_to_rays_in_lidar() 里如果 F.USE_DISTORTION 为 False,会自己用 calib["camera_matrix"] 的逻辑也会炸。把里面这段:
pythonK = np.array(calib["camera_matrix"], dtype=np.float64) fx, fy = K[0, 0], K[1, 1] cx, cy = K[0, 2], K[1, 2]
改成:
pythonK, dist = calib_get_K_dist(calib) fx, fy = K[0, 0], K[1, 1] cx, cy = K[0, 2], K[1, 2]
并且如果你想在 F.USE_DISTORTION=True 时使用畸变,就保持你现在的:
pythonxy = undistort_pixels_to_normalized(uv, calib) if F.USE_DISTORTION else None
这样就不会再 KeyError。
如果你不想改函数,也可以在 fusion.py 的 CALIB 里直接补两个字段(推荐补在原 CALIB 下面):
pythonCALIB["camera_matrix"] = np.array([ [CALIB["fx"], 0.0, CALIB["cx"]], [0.0, CALIB["fy"], CALIB["cy"]], [0.0, 0.0, 1.0] ], dtype=np.float64) CALIB["dist_coeffs"] = CALIB["dist"]
但我更推荐上面第 1~3 步的“兼容式写法”,以后你换标定文件也不容易炸。
text[RAIL_PTS] selected=1012 [EDGE_PTS] left=139 right=180
说明:分割、投影、边缘匹配都通了,只是在做像素->光线的时候取K失败。按上面补齐 K/dist 后,就会继续往下跑(开始建中心线、建八边形限界、做侵界判定)。
如果你改完后又报新错,把新的 traceback(尤其是 mmseg 子进程 stderr 或 build_track_model 的 dbg)贴出来,我继续给你对症修。
代码报错如下:Traceback (most recent call last):
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1527, in <module>
main()
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 1464, in main
track = build_track_model(rail_info, F.CALIB, T, rail_pts, H, W)
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 781, in build_track_model
cl_world = intersect_rays_with_plane(o_lidar, d_lidar, p0, up)
File "/media/jd/4997BB1603CFE2C4/lw/Openpcdet/fusion/qinjiepanding.py", line 677, in intersect_rays_with_plane
t[ok] = num[ok] / denom[ok]
IndexError: boolean index did not match indexed array along dimension 0; dimension is 1 but corresponding boolean dimension is 80
怎么解决呢
这个报错的根因是:你在 intersect_rays_with_plane() 里把 ray_origin reshape 成了 (1,3),导致 num 计算出来是 长度 1 的数组,而 denom/ok 是 长度 N(=80) 的数组,于是出现:
t 的维度是 1ok 的维度是 80t[ok] = ... 就会报 IndexError: boolean index did not match...intersect_rays_with_plane() 改成“num 是标量,t 是 N 维”请你用下面这个函数完整替换你脚本里的 intersect_rays_with_plane()(原函数整段删掉换这段):
pythondef intersect_rays_with_plane(ray_origin: np.ndarray, ray_dirs: np.ndarray, plane_p0: np.ndarray, plane_n: np.ndarray) -> np.ndarray: """ ray: p = o + t d plane: n·(p - p0) = 0 ray_origin: (3,) 或 (N,3) ray_dirs: (N,3) 返回 (N,3),无效的设为 nan """ d = np.asarray(ray_dirs, dtype=np.float64) # (N,3) n = np.asarray(plane_n, dtype=np.float64).reshape(3) # (3,) p0 = np.asarray(plane_p0, dtype=np.float64).reshape(3) # (3,) o = np.asarray(ray_origin, dtype=np.float64) if o.ndim == 1: o = o.reshape(1, 3) # (1,3) if o.shape[0] == 1: # 只有一个 origin,给所有射线共享 o0 = o.reshape(3) num = float(np.dot(n, (p0 - o0))) # 标量 denom = (d @ n.reshape(3, 1)).reshape(-1) # (N,) t = np.full_like(denom, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num / denom[ok] t[t <= 0] = np.nan P = o0.reshape(1, 3) + d * t.reshape(-1, 1) return P.astype(np.float32) # 如果你以后传进来的是每条射线一个 origin (N,3) if o.shape[0] != d.shape[0]: raise ValueError(f"ray_origin shape {o.shape} not compatible with ray_dirs shape {d.shape}") num = ((p0.reshape(1, 3) - o) @ n.reshape(3, 1)).reshape(-1) # (N,) denom = (d @ n.reshape(3, 1)).reshape(-1) # (N,) t = np.full_like(denom, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num[ok] / denom[ok] t[t <= 0] = np.nan P = o + d * t.reshape(-1, 1) return P.astype(np.float32)
num 本质上对“同一个光心 origin”的所有 rays 是常数(一个标量)denom 是每条 ray 不同的 (d·n),长度是 Nt 必须是长度 N 的数组,才能用 ok 去索引。如果 t 很多是 nan,说明:
up 和某些 ray 几乎平行(denom≈0),或t<=0 被过滤)但你之前已经有:
text[RAIL_PTS] selected=1012 [EDGE_PTS] left=139 right=180
说明 rail 平面点是有的,大概率能继续跑通。
你先替换函数跑一次;如果后面 build_track_model failed: centerline ray-plane intersection too few,把 track.dbg 或终端输出贴我,我再帮你检查是外参方向还是平面/中心线采样的问题。
我现在的全部代码如下:#!/usr/bin/env python3
用 PIDNet(mmseg) 语义分割 steel rail -> 提取轨道边缘/中心线 -> 投影匹配到点云 -> 构建八边形立体限界 -> FUSED目标侵界判定
并保存全部中间过程图片。
运行示例:
python intrusion_clearance_pidnet.py --frame_id 2025042116142702
你给的默认路径(可通过参数改):
--seg_config /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py
--seg_ckpt /media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth
或者 --seg_ckpt .../last_checkpoint (脚本会自动读出真实 .pth)
输出目录:
debug_intrusion_pidnet/<frame_id>/
00_raw.jpg
01_seg_overlay.jpg
02_rail_mask.png
03_corridor_mask.png
04_edge_polylines.jpg
05_edge_masks.png
06_proj_all_points.jpg
07_proj_rail_points.jpg
08_proj_edge_points.jpg
09_bev_envelope.jpg
10_front_envelope.jpg
11_3d_scene.png (若 matplotlib 可用)
intrusion_metrics.json
intrusion_metrics.txt
"""
from future import annotations
import os
import math
import json
import random
import argparse
from pathlib import Path
from dataclasses import dataclass
from typing import Dict, List, Tuple, Optional
import numpy as np
import cv2
import sys
import subprocess
import textwrap
import fusion as F
ROI_ENABLE = True
ROI_Y_TOP_RATIO = 0.30
ROI_Y_BOT_RATIO = 0.98
ROI_X_TOP_L_RATIO = 0.30
ROI_X_TOP_R_RATIO = 0.70
ROI_X_BOT_L_RATIO = 0.03
ROI_X_BOT_R_RATIO = 0.97
RAIL_CLASS_ID_DEFAULT = 1
MASK_CLOSE_K = 9
MASK_OPEN_K = 7
MASK_DILATE_PX = 5 # 给投影点“容错”,外参略偏时很关键
KEEP_TOPK_CC = 6 # 连通域最多保留多少个(避免假阳性)
EDGE_MATCH_PX = 5.0
EDGE_LINE_THICK = 9 # 画边缘线mask厚度
CENTERLINE_N_ROWS = 80
MIN_RAIL_PTS = 200
TRAIN_MAX_WIDTH_M = 2.5 # 车体最大宽度(你文档提到 2.32.5m)3.7m)
HANG_HEIGHT_M = 3.7 # 轨面到车体最下部(3.4
SIDE_MARGIN_M = 0.35 # 横向安全裕度
TOP_MARGIN_M = 0.05 # 顶面额外裕度
HEIGHT_EXTRA_M = 0.30 # 竖向额外裕度
TOP_CHAMFER_H = 0.55 # 上倒角高度
BOT_CHAMFER_H = 0.55 # 下倒角高度
BOTTOM_WIDTH_RATIO = 0.85 # 底部相对最大宽度比例(保证“八边形”而不是六边形)
INTRUDE_VOL_RATIO_THR = 0.02
INTRUDE_AREA_RATIO_THR = 0.05
FORCE_CENTER_INSIDE_INTRUDE = True
OUT_ROOT = "debug_intrusion_pidnet"
os.makedirs(OUT_ROOT, exist_ok=True)
def make_roi_mask(h: int, w: int) -> np.ndarray:
if not ROI_ENABLE:
return np.ones((h, w), dtype=np.uint8) * 255
y_top = int(h * ROI_Y_TOP_RATIO)
y_bot = int(h * ROI_Y_BOT_RATIO)
x_tl = int(w * ROI_X_TOP_L_RATIO)
x_tr = int(w * ROI_X_TOP_R_RATIO)
x_bl = int(w * ROI_X_BOT_L_RATIO)
x_br = int(w * ROI_X_BOT_R_RATIO)
poly = np.array([[x_bl, y_bot],
[x_br, y_bot],
[x_tr, y_top],
[x_tl, y_top]], dtype=np.int32)
m = np.zeros((h, w), dtype=np.uint8)
cv2.fillConvexPoly(m, poly, 255)
return m
def overlay_mask(img_bgr: np.ndarray, mask01: np.ndarray, color=(0, 0, 255), alpha=0.55) -> np.ndarray:
vis = img_bgr.copy()
m = mask01.astype(bool)
if m.any():
vis[m] = (vis[m] * (1 - alpha) + np.array(color, dtype=np.float32) * alpha).astype(np.uint8)
return vis
def median_filter_1d(arr: np.ndarray, k: int = 7) -> np.ndarray:
if arr.size == 0:
return arr
k = max(3, int(k) | 1)
r = k // 2
out = arr.copy().astype(np.float32)
for i in range(arr.size):
lo = max(0, i - r)
hi = min(arr.size, i + r + 1)
out[i] = float(np.median(arr[lo:hi]))
return out
def resolve_checkpoint(ckpt_path: str) -> str:
ckpt = ckpt_path
if os.path.basename(ckpt_path) == "last_checkpoint":
# 文件里通常是一行 /path/to/xxx.pth
with open(ckpt_path, "r", encoding="utf-8") as f:
line = f.read().strip()
if line and os.path.isfile(line):
ckpt = line
return ckpt
def resolve_checkpoint(ckpt_path: str) -> str:
"""兼容 mmseg work_dir 的 last_checkpoint 文件(里面存的是实际 pth 路径)"""
if os.path.basename(ckpt_path) == "last_checkpoint":
with open(ckpt_path, "r", encoding="utf-8") as f:
real = f.read().strip()
if real and os.path.isfile(real):
return real
return ckpt_path
def _write_mmseg_infer_helper(helper_path: str):
"""写一个独立的小脚本,专门在 mmseg3.0 虚拟环境里跑推理并输出 npy"""
code = r'''
import os, sys, argparse
import numpy as np
import cv2
def resolve_checkpoint(ckpt_path: str) -> str:
if os.path.basename(ckpt_path) == "last_checkpoint":
with open(ckpt_path, "r", encoding="utf-8") as f:
real = f.read().strip()
if real and os.path.isfile(real):
return real
return ckpt_path
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--img", required=True)
ap.add_argument("--config", required=True)
ap.add_argument("--ckpt", required=True)
ap.add_argument("--out", required=True)
ap.add_argument("--device", default="cuda:0")
ap.add_argument("--mmseg_repo", default="")
args = ap.parse_args()
textif args.mmseg_repo and os.path.isdir(args.mmseg_repo): sys.path.insert(0, args.mmseg_repo) ckpt = resolve_checkpoint(args.ckpt) # ====== init model (兼容 mmseg 0.x / 1.x) ====== model = None try: from mmseg.apis import init_model model = init_model(args.config, ckpt, device=args.device) from mmseg.apis import inference_model use_new = True except Exception: use_new = False if model is None: try: from mmseg.apis import init_segmentor model = init_segmentor(args.config, ckpt, device=args.device) except Exception as e: raise RuntimeError(f"mmseg init failed in helper: {e}") img = cv2.imread(args.img) if img is None: raise RuntimeError(f"cannot read image: {args.img}") # ====== inference ====== if use_new: out = inference_model(model, img) pred = out.pred_sem_seg.data if hasattr(pred, "cpu"): pred = pred.cpu().numpy() pred = np.array(pred) if pred.ndim == 3: pred = pred[0] else: from mmseg.apis import inference_segmentor out = inference_segmentor(model, img) pred = np.array(out[0], dtype=np.int32) pred = pred.astype(np.int32) np.save(args.out, pred) print(f"[mmseg_helper] saved pred to {args.out} shape={pred.shape} dtype={pred.dtype}")
if name == "main":
main()
'''
with open(helper_path, "w", encoding="utf-8") as f:
f.write(code)
def infer_rail_mask_by_subprocess(
img_path: str,
out_npy: str,
seg_config: str,
seg_ckpt: str,
seg_device: str,
mmseg_python: str,
mmseg_repo: str = ""
) -> np.ndarray:
"""
用 mmseg3.0 的 python 子进程跑分割,返回 pred label map (H,W) int32
"""
seg_ckpt = resolve_checkpoint(seg_ckpt)
textif not os.path.isfile(mmseg_python): raise FileNotFoundError(f"--mmseg_python not found: {mmseg_python}") if not os.path.isfile(seg_config): raise FileNotFoundError(f"--seg_config not found: {seg_config}") if not os.path.isfile(seg_ckpt): raise FileNotFoundError(f"--seg_ckpt not found: {seg_ckpt}") if not os.path.isfile(img_path): raise FileNotFoundError(f"image not found: {img_path}") helper_path = os.path.join(os.path.dirname(out_npy), "_mmseg_infer_helper.py") if not os.path.isfile(helper_path): _write_mmseg_infer_helper(helper_path) cmd = [ mmseg_python, helper_path, "--img", img_path, "--config", seg_config, "--ckpt", seg_ckpt, "--out", out_npy, "--device", seg_device, "--mmseg_repo", mmseg_repo ] print("[INFO] Run mmseg subprocess:", " ".join(cmd)) r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if r.returncode != 0: print("====== mmseg subprocess STDOUT ======") print(r.stdout) print("====== mmseg subprocess STDERR ======") print(r.stderr) raise RuntimeError("mmseg subprocess failed (see logs above).") pred = np.load(out_npy).astype(np.int32) return pred
def init_mmseg_model(config_path: str, checkpoint_path: str, device: str):
ckpt = resolve_checkpoint(checkpoint_path)
if not os.path.isfile(config_path):
raise FileNotFoundError(f"seg_config not found: {config_path}")
if not os.path.isfile(ckpt):
raise FileNotFoundError(f"seg_checkpoint not found: {ckpt}")
text# 优先 mmseg>=1.0 try: from mmseg.apis import init_model # type: ignore model = init_model(config_path, ckpt, device=device) return model except Exception: pass # 兼容 mmseg 0.x try: from mmseg.apis import init_segmentor # type: ignore model = init_segmentor(config_path, ckpt, device=device) return model except Exception as e: raise RuntimeError( "mmseg init failed. 请确认你在安装了 mmsegmentation 的环境里运行,并且 config/ckpt 路径正确。\n" f"raw error: {e}" )
def infer_rail_mask(model, img_bgr: np.ndarray) -> np.ndarray:
"""
返回 pred label map: (H,W) int32
"""
# mmseg>=1.0
try:
from mmseg.apis import inference_model # type: ignore
out = inference_model(model, img_bgr)
pred = out.pred_sem_seg.data
# tensor -> numpy
if hasattr(pred, "cpu"):
pred = pred.cpu().numpy()
pred = np.array(pred)
if pred.ndim == 3:
pred = pred[0]
return pred.astype(np.int32)
except Exception:
pass
text# mmseg 0.x try: from mmseg.apis import inference_segmentor # type: ignore out = inference_segmentor(model, img_bgr) pred = out[0] return np.array(pred, dtype=np.int32) except Exception as e: raise RuntimeError(f"mmseg inference failed: {e}")
@dataclass
class RailMaskInfo:
rail_mask01: np.ndarray # (H,W) 0/1, steel rail
corridor01: np.ndarray # (H,W) 0/1, fill between left&right per row
left_poly: np.ndarray # (N,2) int (x,y)
right_poly: np.ndarray # (N,2)
mid_poly: np.ndarray # (N,2)
left_edge_mask: np.ndarray # (H,W) 0/255
right_edge_mask: np.ndarray
dbg: Dict
def keep_topk_connected_components(mask01: np.ndarray, k: int) -> np.ndarray:
"""
保留面积最大的 k 个连通域(rails 可能被切分成多段,所以 k 给大一点)
"""
m = (mask01 > 0).astype(np.uint8)
n, lab, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8)
if n <= 1:
return m
areas = stats[1:, cv2.CC_STAT_AREA]
order = np.argsort(-areas)
keep_ids = (order[:min(k, order.size)] + 1).tolist()
out = np.zeros_like(m)
for cid in keep_ids:
out[lab == cid] = 1
return out
def build_polylines_from_mask(rail01: np.ndarray, roi_mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict]:
"""
按行扫描提取 left/right/mid (支持一定曲率)
"""
H, W = rail01.shape[:2]
y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0
y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1)
textxs_left = [] xs_right = [] ys = [] for y in range(y_top, y_bot + 1): if roi_mask[y, :].max() == 0: continue row = rail01[y, :] & (roi_mask[y, :] > 0) idx = np.where(row > 0)[0] if idx.size < 6: continue xl = int(idx.min()) xr = int(idx.max()) if xr - xl < 10: continue xs_left.append(xl) xs_right.append(xr) ys.append(y) dbg = {"rows_valid": len(ys)} if len(ys) < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = np.array(xs_left, dtype=np.float32) xs_right = np.array(xs_right, dtype=np.float32) ys = np.array(ys, dtype=np.int32) widths = xs_right - xs_left med_w = float(np.median(widths)) dbg["median_width_px"] = med_w # 宽度异常行剔除(避免偶发假阳性把边缘拉飞) ok = (widths > 0.45 * med_w) & (widths < 1.60 * med_w) xs_left = xs_left[ok] xs_right = xs_right[ok] ys = ys[ok] dbg["rows_kept"] = int(ys.size) if ys.size < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = median_filter_1d(xs_left, 9) xs_right = median_filter_1d(xs_right, 9) xs_mid = 0.5 * (xs_left + xs_right) left_poly = np.stack([xs_left, ys.astype(np.float32)], axis=1).round().astype(np.int32) right_poly = np.stack([xs_right, ys.astype(np.float32)], axis=1).round().astype(np.int32) mid_poly = np.stack([xs_mid, ys.astype(np.float32)], axis=1).round().astype(np.int32) return left_poly, right_poly, mid_poly, dbg
def build_masks_from_rail_pred(pred: np.ndarray, rail_id: int, img_h: int, img_w: int, roi_mask: np.ndarray) -> RailMaskInfo:
# resize if needed
if pred.shape[0] != img_h or pred.shape[1] != img_w:
pred_rs = cv2.resize(pred.astype(np.int32), (img_w, img_h), interpolation=cv2.INTER_NEAREST)
else:
pred_rs = pred
textrail01 = (pred_rs == int(rail_id)).astype(np.uint8) # ROI 限制 if ROI_ENABLE: rail01 = (rail01 & (roi_mask > 0)).astype(np.uint8) # 形态学清理 kc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_CLOSE_K, MASK_CLOSE_K)) ko = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_OPEN_K, MASK_OPEN_K)) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_CLOSE, kc, iterations=2) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_OPEN, ko, iterations=1) # 保留较大连通域(rails 可能断裂,保留 topK) rail01 = keep_topk_connected_components(rail01, KEEP_TOPK_CC).astype(np.uint8) # corridor:每行用 min/max 填充(把中间黑色空隙也包含进来) H, W = rail01.shape[:2] corridor = np.zeros((H, W), dtype=np.uint8) y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0 y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1) for y in range(y_top, y_bot + 1): if ROI_ENABLE and roi_mask[y, :].max() == 0: continue idx = np.where(rail01[y, :] > 0)[0] if idx.size < 6: continue xl = int(idx.min()) xr = int(idx.max()) if xr - xl < 10: continue corridor[y, xl:xr + 1] = 1 # corridor 再做 close 填孔 corridor = cv2.morphologyEx(corridor, cv2.MORPH_CLOSE, kc, iterations=1) # 提取边缘曲线(left/right/mid) left_poly, right_poly, mid_poly, dbg = build_polylines_from_mask(rail01, roi_mask) # 构造边缘mask(用于 distanceTransform 匹配点云) left_edge_mask = np.zeros((H, W), dtype=np.uint8) right_edge_mask = np.zeros((H, W), dtype=np.uint8) if left_poly.shape[0] >= 10: cv2.polylines(left_edge_mask, [left_poly.reshape(-1, 1, 2)], isClosed=False, color=255, thickness=EDGE_LINE_THICK, lineType=cv2.LINE_AA) if right_poly.shape[0] >= 10: cv2.polylines(right_edge_mask, [right_poly.reshape(-1, 1, 2)], isClosed=False, color=255, thickness=EDGE_LINE_THICK, lineType=cv2.LINE_AA) return RailMaskInfo( rail_mask01=rail01, corridor01=corridor, left_poly=left_poly, right_poly=right_poly, mid_poly=mid_poly, left_edge_mask=left_edge_mask, right_edge_mask=right_edge_mask, dbg=dbg )
def select_points_by_mask(points_xyz: np.ndarray,
uv: np.ndarray,
valid: np.ndarray,
mask01: np.ndarray) -> np.ndarray:
H, W = mask01.shape[:2]
u = np.round(uv[:, 0]).astype(np.int32)
v = np.round(uv[:, 1]).astype(np.int32)
inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H)
idx = np.where(inside)[0]
if idx.size == 0:
return np.zeros((0, 3), dtype=np.float32)
u2 = u[idx]; v2 = v[idx]
keep = mask01[v2, u2] > 0
return points_xyz[idx[keep]].astype(np.float32)
def match_points_near_edge(points_xyz: np.ndarray,
uv: np.ndarray,
valid: np.ndarray,
edge_mask255: np.ndarray,
max_px: float) -> np.ndarray:
"""
通过 distanceTransform:挑投影点距离 edge<=max_px 的点云
"""
H, W = edge_mask255.shape[:2]
src = np.ones((H, W), dtype=np.uint8) * 255
src[edge_mask255 > 0] = 0
dist = cv2.distanceTransform(src, cv2.DIST_L2, 5)
textu = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return np.zeros((0, 3), dtype=np.float32) u2 = u[idx]; v2 = v[idx] d = dist[v2, u2] keep = d <= float(max_px) return points_xyz[idx[keep]].astype(np.float32)
def fit_plane_svd(points_xyz: np.ndarray, iters: int = 2, keep_q: float = 0.90) -> Tuple[np.ndarray, np.ndarray]:
"""
简单鲁棒平面拟合:
- SVD 得法向
- 按到平面距离剔除 top(1-keep_q) 再迭代
返回:plane_point p0, plane_normal n(单位向量,n[2]>0)
"""
pts = points_xyz.astype(np.float64)
if pts.shape[0] < 10:
p0 = np.median(pts, axis=0)
n = np.array([0, 0, 1.0], dtype=np.float64)
return p0.astype(np.float32), n.astype(np.float32)
textfor _ in range(max(1, iters)): p0 = np.median(pts, axis=0) X = pts - p0 _, _, vt = np.linalg.svd(X, full_matrices=False) n = vt[-1] n = n / (np.linalg.norm(n) + 1e-12) if n[2] < 0: n = -n # 过滤离群 d = np.abs((pts - p0) @ n.reshape(3, 1)).reshape(-1) thr = float(np.quantile(d, keep_q)) pts = pts[d <= thr] if pts.shape[0] < 30: break return p0.astype(np.float32), n.astype(np.float32)
def calib_get_K_dist(calib: Dict) -> Tuple[np.ndarray, np.ndarray]:
"""兼容两种标定格式:
1) calib["camera_matrix"] + calib["dist_coeffs"]
2) calib["fx","fy","cx","cy"] + calib["dist"]
"""
if "camera_matrix" in calib:
K = np.array(calib["camera_matrix"], dtype=np.float64).reshape(3, 3)
else:
fx = float(calib["fx"]); fy = float(calib["fy"])
cx = float(calib["cx"]); cy = float(calib["cy"])
K = np.array([[fx, 0.0, cx],
[0.0, fy, cy],
[0.0, 0.0, 1.0]], dtype=np.float64)
text# dist 兼容 if "dist_coeffs" in calib: dist = np.array(calib["dist_coeffs"], dtype=np.float64).reshape(-1) elif "dist" in calib: dist = np.array(calib["dist"], dtype=np.float64).reshape(-1) else: dist = np.zeros((5,), dtype=np.float64) return K, dist
def undistort_pixels_to_normalized(uv: np.ndarray, calib: Dict) -> np.ndarray:
"""
uv: (N,2) 像素坐标
返回 normalized coords (N,2) in camera frame
"""
K, dist = calib_get_K_dist(calib)
textpts = uv.reshape(-1, 1, 2).astype(np.float64) und = cv2.undistortPoints(pts, K, dist) # 输出为归一化相机坐标 und = und.reshape(-1, 2) return und.astype(np.float32)
def pixels_to_rays_in_lidar(uv: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
"""
把像素点转换为激光雷达坐标系下的光线:
ray_origin: (3,)
ray_dirs: (N,3) normalized
"""
# cam->lidar
T = np.array(T_lidar2cam, dtype=np.float64)
R = T[:3, :3]
t = T[:3, 3]
R_c2l = R.T
o_lidar = (-R.T @ t).reshape(3)
text# normalized camera coords xy = undistort_pixels_to_normalized(uv, calib) if F.USE_DISTORTION else None if xy is None: K, dist = calib_get_K_dist(calib) fx, fy = K[0, 0], K[1, 1] cx, cy = K[0, 2], K[1, 2] x = (uv[:, 0] - cx) / fx y = (uv[:, 1] - cy) / fy xy = np.stack([x, y], axis=1).astype(np.float32) dirs_cam = np.concatenate([xy.astype(np.float64), np.ones((xy.shape[0], 1), dtype=np.float64)], axis=1) dirs_lidar = (R_c2l @ dirs_cam.T).T nrm = np.linalg.norm(dirs_lidar, axis=1, keepdims=True) + 1e-12 dirs_lidar = dirs_lidar / nrm return o_lidar.astype(np.float32), dirs_lidar.astype(np.float32)
def intersect_rays_with_plane(ray_origin: np.ndarray,
ray_dirs: np.ndarray,
plane_p0: np.ndarray,
plane_n: np.ndarray) -> np.ndarray:
"""
ray: p = o + t d
plane: n·(p - p0) = 0
ray_origin: (3,) 或 (N,3)
ray_dirs: (N,3)
返回 (N,3),无效的设为 nan
"""
d = np.asarray(ray_dirs, dtype=np.float64) # (N,3)
n = np.asarray(plane_n, dtype=np.float64).reshape(3) # (3,)
p0 = np.asarray(plane_p0, dtype=np.float64).reshape(3) # (3,)
texto = np.asarray(ray_origin, dtype=np.float64) if o.ndim == 1: o = o.reshape(1, 3) # (1,3) if o.shape[0] == 1: # 只有一个 origin,给所有射线共享 o0 = o.reshape(3) num = float(np.dot(n, (p0 - o0))) # 标量 denom = (d @ n.reshape(3, 1)).reshape(-1) # (N,) t = np.full_like(denom, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num / denom[ok] t[t <= 0] = np.nan P = o0.reshape(1, 3) + d * t.reshape(-1, 1) return P.astype(np.float32) # 如果你以后传进来的是每条射线一个 origin (N,3) if o.shape[0] != d.shape[0]: raise ValueError(f"ray_origin shape {o.shape} not compatible with ray_dirs shape {d.shape}") num = ((p0.reshape(1, 3) - o) @ n.reshape(3, 1)).reshape(-1) # (N,) denom = (d @ n.reshape(3, 1)).reshape(-1) # (N,) t = np.full_like(denom, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num[ok] / denom[ok] t[t <= 0] = np.nan P = o + d * t.reshape(-1, 1) return P.astype(np.float32)
@dataclass
class TrackModel:
ok: bool
origin: np.ndarray # (3,)
t: np.ndarray # (3,) tangent
n: np.ndarray # (3,) lateral
up: np.ndarray # (3,) plane normal
s_min: float
s_max: float
half_w_top: float
poly_lv: np.ndarray # (8,2) in (l,v) where v=0 is top plane, v<0 downward
dbg: Dict
def pca_direction(points_xyz: np.ndarray, up: np.ndarray) -> np.ndarray:
"""
取主方向,并投影到轨道面内(与 up 正交)
"""
pts = points_xyz.astype(np.float64)
mu = pts.mean(axis=0)
X = pts - mu
_, _, vt = np.linalg.svd(X, full_matrices=False)
t = vt[0]
# 去掉 up 分量
upv = up.astype(np.float64)
t = t - upv * (t @ upv)
t = t / (np.linalg.norm(t) + 1e-12)
if t[0] < 0:
t = -t
return t.astype(np.float32)
def build_octagon(half_w_top: float) -> np.ndarray:
"""
构建八边形截面 (l,v):v=0 顶面,v<0 向下
"""
w_top = float(max(0.10, half_w_top + TOP_MARGIN_M))
w_mid = float(max(w_top + 0.10, 0.5 * TRAIN_MAX_WIDTH_M + SIDE_MARGIN_M))
w_bot = float(max(w_top, w_mid * BOTTOM_WIDTH_RATIO))
textH = float(HANG_HEIGHT_M + HEIGHT_EXTRA_M) ht = float(min(TOP_CHAMFER_H, max(0.10, 0.25 * H))) hb = float(min(BOT_CHAMFER_H, max(0.10, 0.25 * H))) v0 = 0.0 v1 = -ht v2 = -(H - hb) v3 = -H poly = np.array([ [-w_top, v0], [ w_top, v0], [ w_mid, v1], [ w_mid, v2], [ w_bot, v3], [-w_bot, v3], [-w_mid, v2], [-w_mid, v1], ], dtype=np.float32) return poly
def build_track_model(rail_info: RailMaskInfo,
calib: Dict,
T_lidar2cam: np.ndarray,
rail_pts_xyz: np.ndarray,
img_h: int, img_w: int) -> TrackModel:
dbg: Dict = {}
if rail_info.mid_poly.shape[0] < 20:
return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32),
np.array([0,1,0],dtype=np.float32), np.array([0,0,1],dtype=np.float32),
0.0, 1.0, 0.5, np.zeros((0,2),dtype=np.float32),
{"err": "mid_poly too short"})
textif rail_pts_xyz.shape[0] < MIN_RAIL_PTS: dbg["warn"] = f"rail_pts too few ({rail_pts_xyz.shape[0]}). plane may be unstable." # 1) 用 rail_pts 拟合轨道顶面平面 p0, up = fit_plane_svd(rail_pts_xyz, iters=2, keep_q=0.90) dbg["plane_p0"] = p0.tolist() dbg["plane_up"] = up.tolist() # 2) 用中心线像素与平面求交得到中心线3D点(抗左右点云不均) # 取若干行采样点 mid = rail_info.mid_poly # 下采样到 CENTERLINE_N_ROWS if mid.shape[0] > CENTERLINE_N_ROWS: idx = np.linspace(0, mid.shape[0]-1, CENTERLINE_N_ROWS).astype(np.int32) mid_s = mid[idx] else: mid_s = mid uv_mid = mid_s[:, :2].astype(np.float32) o_lidar, d_lidar = pixels_to_rays_in_lidar(uv_mid, calib, T_lidar2cam) cl_world = intersect_rays_with_plane(o_lidar, d_lidar, p0, up) ok = np.isfinite(cl_world).all(axis=1) cl_world = cl_world[ok] dbg["centerline_pts"] = int(cl_world.shape[0]) if cl_world.shape[0] < 10: return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32), np.array([0,1,0],dtype=np.float32), up, 0.0, 1.0, 0.5, np.zeros((0,2),dtype=np.float32), {"err": "centerline ray-plane intersection too few", **dbg}) # 3) 轨道方向 t:对中心线3D点做 PCA,再投影到轨道面内 t = pca_direction(cl_world, up) # 横向 n n = np.cross(up, t) n = n / (np.linalg.norm(n) + 1e-12) # 4) origin:中心线点均值(本身在平面上) origin = cl_world.mean(axis=0).astype(np.float32) # 5) s range(沿 t 的投影范围) s = (cl_world - origin.reshape(1, 3)) @ t.reshape(3, 1) s = s.reshape(-1) s_min = float(np.quantile(s, 0.02)) s_max = float(np.quantile(s, 0.98)) # 加一点 margin margin = max(1.5, 0.10 * (s_max - s_min)) s_min -= margin s_max += margin dbg["s_min"] = s_min dbg["s_max"] = s_max # 6) 用左右边缘像素求交 -> 得到半宽(更稳,不依赖点云左右对称) left = rail_info.left_poly right = rail_info.right_poly if left.shape[0] > 10 and right.shape[0] > 10: # 对齐长度取同样数量 k = min(left.shape[0], right.shape[0], 80) idl = np.linspace(0, left.shape[0]-1, k).astype(np.int32) idr = np.linspace(0, right.shape[0]-1, k).astype(np.int32) uvL = left[idl, :2].astype(np.float32) uvR = right[idr, :2].astype(np.float32) oL, dL = pixels_to_rays_in_lidar(uvL, calib, T_lidar2cam) oR, dR = pixels_to_rays_in_lidar(uvR, calib, T_lidar2cam) pL = intersect_rays_with_plane(oL, dL, p0, up) pR = intersect_rays_with_plane(oR, dR, p0, up) okL = np.isfinite(pL).all(axis=1) okR = np.isfinite(pR).all(axis=1) ok2 = okL & okR pL = pL[ok2] pR = pR[ok2] if pL.shape[0] >= 10: lL = (pL - origin.reshape(1, 3)) @ n.reshape(3, 1) lR = (pR - origin.reshape(1, 3)) @ n.reshape(3, 1) w = (lR - lL).reshape(-1) half_w_top = float(np.median(np.abs(w) * 0.5)) else: half_w_top = 0.55 dbg["warn_half_w"] = "edge ray-plane too few, fallback=0.55" else: half_w_top = 0.55 dbg["warn_half_w"] = "edge poly too short, fallback=0.55" dbg["half_w_top"] = half_w_top poly_lv = build_octagon(half_w_top) return TrackModel(True, origin, t.astype(np.float32), n.astype(np.float32), up.astype(np.float32), s_min, s_max, float(half_w_top), poly_lv, dbg)
def world_to_track_coords(track: TrackModel, pts_xyz: np.ndarray) -> np.ndarray:
"""
world -> (s,l,v)
"""
rel = pts_xyz - track.origin.reshape(1, 3)
s = rel @ track.t.reshape(3, 1)
l = rel @ track.n.reshape(3, 1)
v = rel @ track.up.reshape(3, 1)
return np.concatenate([s, l, v], axis=1).astype(np.float32)
def _cross(o: np.ndarray, a: np.ndarray, b: np.ndarray) -> float:
return float((a[0]-o[0])(b[1]-o[1]) - (a[1]-o[1])(b[0]-o[0]))
def convex_hull(points: np.ndarray) -> np.ndarray:
if points is None or len(points) == 0:
return np.zeros((0, 2), dtype=np.float32)
pts = np.unique(points.astype(np.float64), axis=0)
if pts.shape[0] < 3:
return pts.astype(np.float32)
pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))]
lower = []
for p in pts:
while len(lower) >= 2 and _cross(np.array(lower[-2]), np.array(lower[-1]), p) <= 0:
lower.pop()
lower.append(p)
upper = []
for p in pts[::-1]:
while len(upper) >= 2 and _cross(np.array(upper[-2]), np.array(upper[-1]), p) <= 0:
upper.pop()
upper.append(p)
hull = np.array(lower[:-1] + upper[:-1], dtype=np.float64)
return hull.astype(np.float32)
def polygon_area(poly: np.ndarray) -> float:
if poly is None or poly.shape[0] < 3:
return 0.0
x = poly[:, 0]; y = poly[:, 1]
return float(0.5 * abs(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))
def ensure_ccw(poly: np.ndarray) -> np.ndarray:
if poly is None or poly.shape[0] < 3:
return poly
x = poly[:, 0]; y = poly[:, 1]
signed = float(0.5 * (np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1))))
return poly[::-1].copy() if signed < 0 else poly
def inside_half_plane(p: np.ndarray, a: np.ndarray, b: np.ndarray) -> bool:
return _cross(a, b, p) >= -1e-9
def line_intersection(p1: np.ndarray, p2: np.ndarray, a: np.ndarray, b: np.ndarray) -> np.ndarray:
x1, y1 = p1; x2, y2 = p2
x3, y3 = a; x4, y4 = b
den = (x1-x2)(y3-y4) - (y1-y2)(x3-x4)
if abs(den) < 1e-12:
return p2.copy()
px = ((x1y2 - y1x2)(x3-x4) - (x1-x2)(x3y4 - y3x4)) / den
py = ((x1y2 - y1x2)(y3-y4) - (y1-y2)(x3y4 - y3x4)) / den
return np.array([px, py], dtype=np.float32)
def convex_polygon_intersection(subject: np.ndarray, clip: np.ndarray) -> np.ndarray:
if subject is None or subject.shape[0] < 3:
return np.zeros((0, 2), dtype=np.float32)
if clip is None or clip.shape[0] < 3:
return np.zeros((0, 2), dtype=np.float32)
subj = ensure_ccw(subject).astype(np.float32)
clp = ensure_ccw(clip).astype(np.float32)
textout = subj for i in range(clp.shape[0]): a = clp[i] b = clp[(i+1) % clp.shape[0]] inp = out if inp.shape[0] == 0: break out_list = [] for j in range(inp.shape[0]): p = inp[j] q = inp[(j+1) % inp.shape[0]] pin = inside_half_plane(p, a, b) qin = inside_half_plane(q, a, b) if qin: if not pin: out_list.append(line_intersection(p, q, a, b)) out_list.append(q.copy()) elif pin: out_list.append(line_intersection(p, q, a, b)) out = np.array(out_list, dtype=np.float32) return out
def point_in_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> bool:
poly = ensure_ccw(poly)
for i in range(poly.shape[0]):
a = poly[i]; b = poly[(i+1) % poly.shape[0]]
if _cross(a, b, pt) < -1e-8:
return False
return True
def point_to_segment_distance(pt: np.ndarray, a: np.ndarray, b: np.ndarray) -> float:
ax, ay = a; bx, by = b; px, py = pt
vx, vy = bx-ax, by-ay
wx, wy = px-ax, py-ay
c1 = vxwx + vywy
if c1 <= 0:
return float(math.hypot(px-ax, py-ay))
c2 = vxvx + vyvy
if c2 <= c1:
return float(math.hypot(px-bx, py-by))
t = c1 / (c2 + 1e-12)
projx = ax + tvx
projy = ay + tvy
return float(math.hypot(px-projx, py-projy))
def signed_distance_to_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> float:
poly = ensure_ccw(poly)
dmin = float("inf")
for i in range(poly.shape[0]):
a = poly[i]; b = poly[(i+1) % poly.shape[0]]
dmin = min(dmin, point_to_segment_distance(pt, a, b))
inside = point_in_convex_polygon(pt, poly)
return -dmin if inside else dmin
@dataclass
class IntrusionMetrics:
cls: str
score: float
intrude: bool
reason: str
s_overlap: float
s_ratio: float
area_ratio: float
volume_ratio: float
center_inside: bool
signed_dist_center: float
def intrusion_for_det(track: TrackModel, det: F.Det3D) -> IntrusionMetrics:
box7 = det.box7.astype(np.float32)
corners = F.boxes3d_to_corners(box7.reshape(1, 7))[0] # (8,3)
textslv = world_to_track_coords(track, corners) # (8,3) s_vals = slv[:, 0] lv = slv[:, 1:3] det_s_min = float(np.min(s_vals)) det_s_max = float(np.max(s_vals)) det_s_len = max(1e-6, det_s_max - det_s_min) s0 = max(det_s_min, track.s_min) s1 = min(det_s_max, track.s_max) s_overlap = max(0.0, s1 - s0) s_ratio = float(s_overlap / det_s_len) det_poly = convex_hull(lv) det_area = polygon_area(det_poly) clear_poly = ensure_ccw(track.poly_lv.copy()) inter = convex_polygon_intersection(det_poly, clear_poly) if det_area > 1e-9 else np.zeros((0, 2), np.float32) inter_area = polygon_area(inter) area_ratio = float(inter_area / (det_area + 1e-9)) if det_area > 1e-9 else 0.0 volume_ratio = float(area_ratio * s_ratio) center = box7[:3].reshape(1, 3) c_slv = world_to_track_coords(track, center)[0] c_lv = c_slv[1:3] center_inside = (track.s_min <= float(c_slv[0]) <= track.s_max) and point_in_convex_polygon(c_lv, clear_poly) signed_dist = float(signed_distance_to_convex_polygon(c_lv, clear_poly)) # 判定逻辑(会打印/落盘) if s_overlap <= 1e-6: intrude = False reason = "no_s_overlap" else: if FORCE_CENTER_INSIDE_INTRUDE and center_inside: intrude = True reason = "center_inside" elif volume_ratio >= INTRUDE_VOL_RATIO_THR: intrude = True reason = f"volume_ratio>={INTRUDE_VOL_RATIO_THR:.2f}" elif (area_ratio >= INTRUDE_AREA_RATIO_THR) and (s_ratio >= 0.25): intrude = True reason = f"area_ratio>={INTRUDE_AREA_RATIO_THR:.2f}&s_ratio>=0.25" else: intrude = False reason = "below_thresholds" return IntrusionMetrics( cls=det.cls_name, score=float(det.score), intrude=bool(intrude), reason=reason, s_overlap=float(s_overlap), s_ratio=float(s_ratio), area_ratio=float(area_ratio), volume_ratio=float(volume_ratio), center_inside=bool(center_inside), signed_dist_center=float(signed_dist) )
def save_edge_debug_images(out_dir: str, img: np.ndarray, roi_mask: np.ndarray, rail: RailMaskInfo):
# 01 segmentation overlay
vis = overlay_mask(img, rail.rail_mask01, color=(0, 0, 255), alpha=0.55)
if ROI_ENABLE:
cnts, _ = cv2.findContours(roi_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(vis, cnts, -1, (0, 255, 255), 2, cv2.LINE_AA)
cv2.imwrite(os.path.join(out_dir, "01_seg_overlay.jpg"), vis)
text# masks cv2.imwrite(os.path.join(out_dir, "02_rail_mask.png"), (rail.rail_mask01 * 255).astype(np.uint8)) cv2.imwrite(os.path.join(out_dir, "03_corridor_mask.png"), (rail.corridor01 * 255).astype(np.uint8)) # polylines overlay poly_vis = img.copy() if rail.left_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.left_poly.reshape(-1, 1, 2)], False, (255, 0, 0), 3, cv2.LINE_AA) if rail.right_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.right_poly.reshape(-1, 1, 2)], False, (0, 0, 255), 3, cv2.LINE_AA) if rail.mid_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.mid_poly.reshape(-1, 1, 2)], False, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "04_edge_polylines.jpg"), poly_vis) # edge masks edge_rgb = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) edge_rgb[rail.left_edge_mask > 0] = (255, 0, 0) edge_rgb[rail.right_edge_mask > 0] = (0, 0, 255) cv2.imwrite(os.path.join(out_dir, "05_edge_masks.png"), edge_rgb)
def draw_projected_points(img: np.ndarray, uv: np.ndarray, valid: np.ndarray, color, step: int = 8):
H, W = img.shape[:2]
u = np.round(uv[:, 0]).astype(np.int32)
v = np.round(uv[:, 1]).astype(np.int32)
inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H)
idx = np.where(inside)[0]
if idx.size == 0:
return
idx = idx[::max(1, step)]
for i in idx:
cv2.circle(img, (int(u[i]), int(v[i])), 1, color, -1, cv2.LINE_AA)
def render_bev_density(points_xyz: np.ndarray,
out_path: str,
track: Optional[TrackModel] = None,
dets: Optional[List[F.Det3D]] = None,
res: float = 0.06):
pts = points_xyz
if pts.shape[0] > 250000:
idx = np.random.choice(pts.shape[0], 250000, replace=False)
pts = pts[idx]
textx = pts[:, 0]; y = pts[:, 1] x_min = float(np.quantile(x, 0.02)); x_max = float(np.quantile(x, 0.98)) y_min = float(np.quantile(y, 0.02)); y_max = float(np.quantile(y, 0.98)) margin = 3.0 x_min -= margin; x_max += margin y_min -= margin; y_max += margin H = int(max(480, math.ceil((x_max - x_min) / res))) W = int(max(480, math.ceil((y_max - y_min) / res))) rr = ((x_max - pts[:, 0]) / res).astype(np.int32) cc = ((pts[:, 1] - y_min) / res).astype(np.int32) rr = np.clip(rr, 0, H-1) cc = np.clip(cc, 0, W-1) idx = rr * W + cc cnt = np.bincount(idx, minlength=H*W).reshape(H, W).astype(np.float32) img = np.log1p(cnt) if img.max() > 0: img = img / img.max() img = (img**0.55 * 255).astype(np.uint8) bev = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) def xy_to_pix(xy): xx, yy = float(xy[0]), float(xy[1]) r = (x_max - xx) / res c = (yy - y_min) / res return int(round(c)), int(round(r)) # overlay envelope wireframe (XY) if track is not None and track.ok: # 取一些 s 切片画八边形外轮廓 ss = np.linspace(track.s_min, track.s_max, 14).astype(np.float32) for s in ss: base = track.origin + track.t * float(s) poly_xy = [] for l, v in track.poly_lv: p = base + track.n * float(l) + track.up * float(v) poly_xy.append(p[:2]) poly_xy = np.array(poly_xy, dtype=np.float32) poly_pix = np.array([xy_to_pix(p) for p in poly_xy], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(bev, [poly_pix], True, (0, 255, 255), 1, cv2.LINE_AA) # det footprint if dets: for d in dets: corners = F.boxes3d_to_corners(d.box7.reshape(1, 7).astype(np.float32))[0] xy = corners[:4, :2] pix = np.array([xy_to_pix(p) for p in xy], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(bev, [pix], True, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(out_path, bev)
def render_front_xz_density(points_xyz: np.ndarray,
out_path: str,
track: Optional[TrackModel] = None,
dets: Optional[List[F.Det3D]] = None,
res: float = 0.06):
pts = points_xyz
if pts.shape[0] > 250000:
idx = np.random.choice(pts.shape[0], 250000, replace=False)
pts = pts[idx]
textx = pts[:, 0]; z = pts[:, 2] x_min = float(np.quantile(x, 0.02)); x_max = float(np.quantile(x, 0.98)) z_min = float(np.quantile(z, 0.02)); z_max = float(np.quantile(z, 0.98)) margin = 1.5 x_min -= margin; x_max += margin z_min -= margin; z_max += margin H = int(max(480, math.ceil((z_max - z_min) / res))) W = int(max(480, math.ceil((x_max - x_min) / res))) rr = ((z_max - pts[:, 2]) / res).astype(np.int32) cc = ((pts[:, 0] - x_min) / res).astype(np.int32) rr = np.clip(rr, 0, H-1) cc = np.clip(cc, 0, W-1) idx = rr * W + cc cnt = np.bincount(idx, minlength=H*W).reshape(H, W).astype(np.float32) img = np.log1p(cnt) if img.max() > 0: img = img / img.max() img = (img**0.55 * 255).astype(np.uint8) front = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) def xz_to_pix(xz): xx, zz = float(xz[0]), float(xz[1]) c = (xx - x_min) / res r = (z_max - zz) / res return int(round(c)), int(round(r)) # envelope wireframe (X-Z) if track is not None and track.ok: ss = np.linspace(track.s_min, track.s_max, 14).astype(np.float32) for s in ss: base = track.origin + track.t * float(s) poly_xz = [] for l, v in track.poly_lv: p = base + track.n * float(l) + track.up * float(v) poly_xz.append([p[0], p[2]]) poly_xz = np.array(poly_xz, dtype=np.float32) poly_pix = np.array([xz_to_pix(p) for p in poly_xz], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(front, [poly_pix], True, (0, 255, 255), 1, cv2.LINE_AA) # det boxes (XZ 用四角近似) if dets: for d in dets: corners = F.boxes3d_to_corners(d.box7.reshape(1, 7).astype(np.float32))[0] xz = corners[[0,1,2,3], :][:, [0,2]] pix = np.array([xz_to_pix(p) for p in xz], dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(front, [pix], True, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(out_path, front)
def render_3d_scene(points_xyz: np.ndarray, track: TrackModel, dets: List[F.Det3D], out_png: str):
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D # noqa: F401
except Exception as e:
print(f"[WARN] matplotlib not available, skip 3d render: {e}")
return
textpts = points_xyz if pts.shape[0] > 20000: idx = np.random.choice(pts.shape[0], 20000, replace=False) pts = pts[idx] fig = plt.figure(figsize=(10, 8)) ax = fig.add_subplot(111, projection="3d") ax.scatter(pts[:,0], pts[:,1], pts[:,2], s=0.4) # envelope wireframe if track.ok: ss = np.linspace(track.s_min, track.s_max, 18) prev = None for s in ss: base = track.origin + track.t * float(s) V = [] for l, v in track.poly_lv: p = base + track.n*float(l) + track.up*float(v) V.append(p) V = np.array(V) # loop for i in range(8): p = V[i]; q = V[(i+1)%8] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) if prev is not None: for i in range(8): p = prev[i]; q = V[i] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) prev = V # det boxes edges = [(0,1),(1,2),(2,3),(3,0),(4,5),(5,6),(6,7),(7,4),(0,4),(1,5),(2,6),(3,7)] for d in dets: c = F.boxes3d_to_corners(d.box7.reshape(1,7).astype(np.float32))[0] for i,j in edges: p=c[i]; q=c[j] ax.plot([p[0],q[0]],[p[1],q[1]],[p[2],q[2]]) ax.set_xlabel("X"); ax.set_ylabel("Y"); ax.set_zlabel("Z") plt.tight_layout() fig.savefig(out_png, dpi=160) plt.close(fig)
def parse_args():
ap = argparse.ArgumentParser()
ap.add_argument("--frame_id", type=str, default="", help="指定帧号(不含扩展名),例如 2025042116142702")
ap.add_argument("--seg_config", type=str,
default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py")
ap.add_argument("--seg_ckpt", type=str,
default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth")
ap.add_argument("--seg_device", type=str, default="cuda:0")
ap.add_argument("--rail_id", type=int, default=RAIL_CLASS_ID_DEFAULT)
ap.add_argument("--seed", type=int, default=42)
ap.add_argument("--topk", type=int, default=20, help="侵界判定最多取多少个 fused det")
text# ✅ 新增:mmseg3.0 的 python 和 repo ap.add_argument("--mmseg_python", type=str, default="/home/jd/anaconda3/envs/mmseg3.0/bin/python", help="mmseg3.0 虚拟环境 python 路径(必须是可执行文件)") ap.add_argument("--mmseg_repo", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation", help="mmsegmentation repo 根目录(用于 sys.path 兜底,可空)") return ap.parse_args()
def main():
args = parse_args()
random.seed(args.seed)
np.random.seed(args.seed)
text# 1) load det models(先加载融合相关) device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = F.load_models() idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # 2) choose frame bin_files = sorted(list(Path(F.PCDET_POINTS_DIR).glob("*.bin"))) frame_ids = [p.stem for p in bin_files if F.find_image_for_frame(p.stem) is not None and p.stem in idx_map] if not frame_ids: print("[ERROR] no matched (img,bin) frames.") return fid = args.frame_id if args.frame_id else random.choice(frame_ids) if fid not in idx_map: print(f"[ERROR] frame_id {fid} not in dataset index.") return out_dir = os.path.join(OUT_ROOT, fid) os.makedirs(out_dir, exist_ok=True) # 3) read image img_path = F.find_image_for_frame(fid) img = cv2.imread(img_path) if img is None: print(f"[ERROR] cannot read image: {img_path}") return H, W = img.shape[:2] cv2.imwrite(os.path.join(out_dir, "00_raw.jpg"), img) # 4) ROI roi_mask = make_roi_mask(H, W) # ✅ 5) segmentation inference(此时 img_path/out_dir 都存在了) print("[INFO] Running PIDNet(mmseg) in subprocess...") pred = infer_rail_mask_by_subprocess( img_path=img_path, out_npy=os.path.join(out_dir, "seg_pred.npy"), seg_config=args.seg_config, seg_ckpt=args.seg_ckpt, seg_device=args.seg_device, mmseg_python=args.mmseg_python, mmseg_repo=args.mmseg_repo ) # 6) build rail masks + debug rail_info = build_masks_from_rail_pred(pred, args.rail_id, H, W, roi_mask) save_edge_debug_images(out_dir, img, roi_mask, rail_info) print(f"[SEG] rows_valid={rail_info.dbg.get('rows_valid')} rows_kept={rail_info.dbg.get('rows_kept')} median_width_px={rail_info.dbg.get('median_width_px')}") # ---- 后面你的 YOLO/PCDet/投影/限界/侵界代码保持原样继续即可 ---- # 4) det inference yolo_dets = F.infer_yolo(yolo_model, img) _, raw_points, pcdet_dets = F.infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if raw_points is None or raw_points.shape[0] == 0: print("[ERROR] raw_points empty.") return # extrinsic choose T = F.get_extrinsic_matrix(F.CALIB, raw_points[:, :3], W, H) # fuse fused = F.fuse_frame(yolo_dets, pcdet_dets, W, H, F.CALIB, T, raw_points) fused = sorted(fused, key=lambda d: d.score, reverse=True)[:max(1, args.topk)] fused_keep = [d for d in fused if d.score >= 0.10] print(f"[DETS] yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused)} keep(score>=0.10)={len(fused_keep)}") # 5) project points to image pts_xyz = raw_points[:, :3].astype(np.float32) uv, valid = F.project_points_lidar_to_img(pts_xyz, F.CALIB, T, W, H, use_distortion=F.USE_DISTORTION) # debug: all points overlay vis_all = img.copy() draw_projected_points(vis_all, uv, valid, (200, 200, 200), step=12) vis_all = overlay_mask(vis_all, rail_info.rail_mask01, color=(0,0,255), alpha=0.35) cv2.imwrite(os.path.join(out_dir, "06_proj_all_points.jpg"), vis_all) # 6) select rail top points by rail mask (dilate for tolerance) rail_mask255 = (rail_info.rail_mask01 * 255).astype(np.uint8) if MASK_DILATE_PX > 0: kd = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_DILATE_PX, MASK_DILATE_PX)) rail_mask255 = cv2.dilate(rail_mask255, kd, iterations=1) rail_mask01_dil = (rail_mask255 > 0).astype(np.uint8) rail_pts = select_points_by_mask(pts_xyz, uv, valid, rail_mask01_dil) print(f"[RAIL_PTS] selected={rail_pts.shape[0]}") vis_rail = img.copy() draw_projected_points(vis_rail, uv, valid, (120, 120, 120), step=20) # rail points overlay(绿色) # 为了快,用 mask 反算 idx 再画 Hh, Ww = rail_mask01_dil.shape u = np.round(uv[:,0]).astype(np.int32) v = np.round(uv[:,1]).astype(np.int32) inside = valid & (u>=0)&(u<Ww)&(v>=0)&(v<Hh) idx = np.where(inside)[0] keep = rail_mask01_dil[v[idx], u[idx]] > 0 idx_keep = idx[keep] for i in idx_keep[::6]: cv2.circle(vis_rail, (int(u[i]), int(v[i])), 1, (0,255,0), -1, cv2.LINE_AA) # edges polylines if rail_info.left_poly.shape[0] > 0: cv2.polylines(vis_rail, [rail_info.left_poly.reshape(-1,1,2)], False, (255,0,0), 2, cv2.LINE_AA) if rail_info.right_poly.shape[0] > 0: cv2.polylines(vis_rail, [rail_info.right_poly.reshape(-1,1,2)], False, (0,0,255), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "07_proj_rail_points.jpg"), vis_rail) # 7) match edge points (left/right separately) left_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.left_edge_mask, EDGE_MATCH_PX) right_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.right_edge_mask, EDGE_MATCH_PX) print(f"[EDGE_PTS] left={left_edge_pts.shape[0]} right={right_edge_pts.shape[0]}") vis_edge = img.copy() draw_projected_points(vis_edge, uv, valid, (120, 120, 120), step=22) # 将边缘点画出来(蓝/红) # 这里为了速度,同样用“再次按 edge mask 采样 idx”方式画 def draw_edge_points(edge_mask255, color): src = np.ones((H, W), dtype=np.uint8) * 255 src[edge_mask255 > 0] = 0 dist = cv2.distanceTransform(src, cv2.DIST_L2, 5) uu = np.round(uv[:,0]).astype(np.int32) vv = np.round(uv[:,1]).astype(np.int32) ins = valid & (uu>=0)&(uu<W)&(vv>=0)&(vv<H) ii = np.where(ins)[0] d = dist[vv[ii], uu[ii]] kk = d <= EDGE_MATCH_PX jj = ii[kk] for j in jj[::4]: cv2.circle(vis_edge, (int(uu[j]), int(vv[j])), 1, color, -1, cv2.LINE_AA) draw_edge_points(rail_info.left_edge_mask, (255,0,0)) draw_edge_points(rail_info.right_edge_mask, (0,0,255)) if rail_info.mid_poly.shape[0] > 0: cv2.polylines(vis_edge, [rail_info.mid_poly.reshape(-1,1,2)], False, (0,255,0), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "08_proj_edge_points.jpg"), vis_edge) # 8) build track model + envelope track = build_track_model(rail_info, F.CALIB, T, rail_pts, H, W) if not track.ok: print("[ERROR] build_track_model failed:", track.dbg) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: f.write("build_track_model failed\n") f.write(json.dumps(track.dbg, ensure_ascii=False, indent=2)) return print("\n[TRACK_MODEL]") print(f" origin={track.origin.tolist()}") print(f" t={track.t.tolist()}") print(f" n={track.n.tolist()}") print(f" up={track.up.tolist()}") print(f" s_range=[{track.s_min:.2f}, {track.s_max:.2f}] half_w_top={track.half_w_top:.3f}") print(f" octagon(l,v):\n{track.poly_lv}") print(f" dbg={track.dbg}") # 9) intrusion metrics metrics: List[IntrusionMetrics] = [] for d in fused_keep: metrics.append(intrusion_for_det(track, d)) print("\n========== INTRUSION METRICS ==========") if not metrics: print("No fused detections (score>=0.10).") for i, m in enumerate(metrics): print(f"[{i}] cls={m.cls:14s} detScore={m.score:.3f} INTRUDE={m.intrude} ({m.reason})") print(f" s_overlap={m.s_overlap:.2f}m s_ratio={m.s_ratio:.3f}") print(f" area_ratio(l-v)={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}") print(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}m") # save metrics out_json = { "frame_id": fid, "seg_dbg": rail_info.dbg, "track_dbg": track.dbg, "rules": { "INTRUDE_VOL_RATIO_THR": INTRUDE_VOL_RATIO_THR, "INTRUDE_AREA_RATIO_THR": INTRUDE_AREA_RATIO_THR, "FORCE_CENTER_INSIDE_INTRUDE": FORCE_CENTER_INSIDE_INTRUDE, }, "dets": [m.__dict__ for m in metrics] } with open(os.path.join(out_dir, "intrusion_metrics.json"), "w", encoding="utf-8") as f: json.dump(out_json, f, ensure_ascii=False, indent=2) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: for i, m in enumerate(metrics): f.write(f"[{i}] cls={m.cls} score={m.score:.3f} intrude={m.intrude} reason={m.reason}\n") f.write(f" s_overlap={m.s_overlap:.3f} s_ratio={m.s_ratio:.3f}\n") f.write(f" area_ratio={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}\n") f.write(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}\n") # 10) bev/front debug with envelope render_bev_density(pts_xyz, os.path.join(out_dir, "09_bev_envelope.jpg"), track=track, dets=fused_keep, res=0.06) render_front_xz_density(pts_xyz, os.path.join(out_dir, "10_front_envelope.jpg"), track=track, dets=fused_keep, res=0.06) # 11) 3d scene (optional) render_3d_scene(pts_xyz, track, fused_keep, os.path.join(out_dir, "11_3d_scene.png")) print(f"\n[DONE] saved all debug files in: {out_dir}")
if name == "main":
main()
梳理这个代码逻辑,我看这个代码的流程是分割轨道区域后,01_seg_overlay还建立了黄色梯形框,我认为这个是不需要的,而且我要求限界界面是八边形,你不应该给我这个错误的黄色梯形,这个黄色如果是图像限界截面,应该为八边形,八边形的一条顶边靠近轨道符合我上传的限界构建大小要求,比如向轨道左右平等延伸多长,而不是现在的随便延伸长度,另外其他的高度等符合限界要求的超过车体距离,把这个距离都要换算为像素大小,你可以借助轨道点云共同判断这个大小,相关尺寸大小参考单轨设计文件,其次我说的点云限界,他应该是一个包络体,是一个类似隧道的包裹轨道的立体,而不是几个单纯的八边形截面,你需要把这些八边形连接起来分为做一个八边体,你还给了11_3d_scene,这个有什么用,还不如用open3d让我自己旋转视角判断限界是否正确 ,请把这些问题修改好,给我一个完整的代码,
下面我按你提的每个点,把代码逻辑“纠正到工程意义正确”,并给你一份可直接替换 qinjiepanding.py 的完整脚本(仍然复用你现有 fusion.py 的 YOLO+PCDet+投影/融合能力,mmseg 仍用子进程)。
你现在看到的黄色梯形其实是我用于边缘提取的 ROI(梯形),它的目的是过滤掉图像上方/两侧的噪声,不是限界。
既然你明确说“不需要、而且容易误解”,我已经在新代码里做到:
--roi_enable 才启用)你要求的尺寸必须来自《单轨设计文件》。我已经把《悬挂式单轨交通设计规范》的 附录 A/B/C 的车辆限界/设备限界坐标表硬编码进脚本(单位 mm),并提供参数选择:
--clear_profile A|B|C(中铁科工 / 中车四方 / 中车铺镇)--clear_kind vehicle|equipment(车辆限界 or 设备限界)这些坐标来自你给的 PDF(例:附录 A 表 A.0.1-2/3,附录 B 表 B.0.1-2/3,附录 C 表 C.0.1-2/3)。(单轨设计.pdf)(单轨设计.pdf)(单轨设计.pdf)
关键点:
我不再用之前那套TRAIN_MAX_WIDTH_M / HANG_HEIGHT_M的“拍脑袋八边形”,而是直接使用规范坐标。
你说对:只要把限界做成 3D(米),再用相机标定+外参投影到图像,就自然得到像素尺度,不需要手写“mm->pixel 比例”。
新代码新增:
12_clearance_overlay.jpg:把限界截面/隧道网格投影回图像(你能看到它在像素里到底多大、多贴近轨道)你说得对。之前我画的只是“多切片线框”。
新代码做了:
13_clearance_tunnel.ply(你可以用 open3d/meshlab 直接看)新代码加入 --vis_o3d:
open3d,会弹出交互窗口:点云 + 限界隧道 mesh + 检测框线框说明:
- 你现有
fusion.py不用改(仍然提供:CALIB、project_points_lidar_to_img、get_extrinsic_matrix、fuse_frame、boxes3d_to_corners等)。- mmseg 仍用你给的
mmseg3.0子进程运行(避免 openpcdet 环境缺 mmseg)。- 输出目录仍是
debug_intrusion_pidnet/<frame_id>/...,我新增了两个输出:
12_clearance_overlay.jpg(图像上的限界投影)13_clearance_tunnel.ply(点云中的限界包络体 mesh)
python#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ qinjiepanding.py (updated) --------------------------------------------------------- mmseg(PIDNet) 语义分割 steel rail -> 提取轨道边缘/中心线 -> 投影匹配到点云 -> 根据《悬挂式单轨交通设计规范》附录 A/B/C 的限界坐标构建“八边形截面” -> 沿中心线生成“隧道式包络体 mesh” -> FUSED目标侵界判定 并保存中间过程图、导出 tunnel mesh(PLY),可选 Open3D 交互查看。 新增/修改要点: - 不再在 01_seg_overlay 里画黄色 ROI 梯形(避免误解) - 限界截面来自规范附录 A/B/C 的车辆/设备限界坐标表(mm) - 将限界构建为 3D 隧道包络体(mesh),并可 open3d 旋转查看 """ from __future__ import annotations import os import math import json import random import argparse from pathlib import Path from dataclasses import dataclass from typing import Dict, List, Tuple, Optional import numpy as np import cv2 import sys import subprocess # 复用你原来的融合代码 import fusion as F # ========================= # 可调参数(你可按数据微调) # ========================= # ROI:只用于约束“边缘提取”(不影响语义分割,也不再画出来) ROI_ENABLE = False ROI_Y_TOP_RATIO = 0.30 ROI_Y_BOT_RATIO = 0.98 ROI_X_TOP_L_RATIO = 0.30 ROI_X_TOP_R_RATIO = 0.70 ROI_X_BOT_L_RATIO = 0.03 ROI_X_BOT_R_RATIO = 0.97 # segmentation class id:你的两类里 steel rail=1 RAIL_CLASS_ID_DEFAULT = 1 # mask 清理 MASK_CLOSE_K = 9 MASK_OPEN_K = 7 MASK_DILATE_PX = 5 # 给投影点“容错”,外参略偏时很关键 KEEP_TOPK_CC = 6 # 连通域最多保留多少个(避免假阳性) # 边缘匹配到点云(像素距离阈值) EDGE_MATCH_PX = 5.0 EDGE_LINE_THICK = 9 # 画边缘线mask厚度 # 中心线采样行数(越大越平滑) CENTERLINE_N_ROWS = 80 # 轨道点云(rail mask 内)最少点数 MIN_RAIL_PTS = 200 # ========== 侵界判定阈值 ========== INTRUDE_VOL_RATIO_THR = 0.02 INTRUDE_AREA_RATIO_THR = 0.05 FORCE_CENTER_INSIDE_INTRUDE = True # debug 可视化输出目录 OUT_ROOT = "debug_intrusion_pidnet" os.makedirs(OUT_ROOT, exist_ok=True) # ========================= # 规范附录限界坐标(mm) # ========================= # 说明: # - 这些表在你给的 PDF 附录 A/B/C 中,以 OYZ 坐标给出(单位 mm),通常只给“右半边+底部中心”,默认关于中心线对称。 # - 我们将其转换为 track 坐标系的 (l,v): # l = ±Y/1000 (米) # v = -Z/1000 (米) # 因为 Z 是“向下为正”,而我们用 up 为向上法向,v<0 表示向下 # # 证据来源: # - 附录 A 表 A.0.1-2/3(单轨设计.pdf) # - 附录 B 表 B.0.1-2/3(单轨设计.pdf) # - 附录 C 表 C.0.1-2/3(单轨设计.pdf) CLEARANCE_TABLES_MM: Dict[str, Dict[str, Dict[str, List[float]]]] = { "A": { "vehicle": { "Y": [413, 623, 1133, 1604, 1604, 1760, 1548, 0], "Z": [262, 254, 198, 1920, 2251, 3514, 3810, 3707], }, "equipment": { "Y": [403, 629, 1156, 1670, 1670, 1880, 1647, 0], "Z": [227, 210, 153, 1898, 2269, 3559, 3859, 3735], }, }, "B": { "vehicle": { "Y": [417, 816, 1028, 1158, 1216, 1337, 1371, 1389, 1618, 1651, 1609, 1593, 1621, 0], "Z": [336, 285, 354, 644, 908, 890, 1034, 1678, 2613, 2862, 3327, 3420, 3640, 4005], }, "equipment": { "Y": [455, 826, 1101, 1254, 1295, 1413, 1469, 1489, 1717, 1752, 1709, 1694, 1721, 0], "Z": [232, 183, 273, 612, 795, 780, 1016, 1666, 2594, 2860, 3340, 3422, 3705, 4105], }, }, "C": { "vehicle": { "Y": [0, 878, 1314, 1493, 1480, 1460, 1470, 0], "Z": [232, 232, 400, 2059, 3047, 3392, 3828, 3828], }, "equipment": { "Y": [0, 938, 1374, 1583, 1587, 1573, 1590, 0], "Z": [182, 182, 350, 2059, 3082, 3442, 3888, 3888], }, }, } def get_clearance_half_yz_mm(profile: str, kind: str) -> np.ndarray: profile = profile.upper().strip() kind = kind.lower().strip() if profile not in CLEARANCE_TABLES_MM: raise ValueError(f"--clear_profile must be one of {list(CLEARANCE_TABLES_MM.keys())}, got {profile}") if kind not in CLEARANCE_TABLES_MM[profile]: raise ValueError(f"--clear_kind must be one of {list(CLEARANCE_TABLES_MM[profile].keys())}, got {kind}") Y = CLEARANCE_TABLES_MM[profile][kind]["Y"] Z = CLEARANCE_TABLES_MM[profile][kind]["Z"] if len(Y) != len(Z): raise ValueError(f"clearance table {profile}/{kind}: len(Y)!=len(Z)") yz = np.stack([np.array(Y, dtype=np.float32), np.array(Z, dtype=np.float32)], axis=1) # (N,2) mm return yz def ensure_ccw(poly: np.ndarray) -> np.ndarray: if poly is None or poly.shape[0] < 3: return poly x = poly[:, 0]; y = poly[:, 1] signed = float(0.5 * (np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))) return poly[::-1].copy() if signed < 0 else poly def _cross(o: np.ndarray, a: np.ndarray, b: np.ndarray) -> float: return float((a[0]-o[0])*(b[1]-o[1]) - (a[1]-o[1])*(b[0]-o[0])) def convex_hull(points: np.ndarray) -> np.ndarray: """返回去掉共线点后的凸包(用于侵界计算需要“凸多边形”)""" if points is None or len(points) == 0: return np.zeros((0, 2), dtype=np.float32) pts = np.unique(points.astype(np.float64), axis=0) if pts.shape[0] < 3: return pts.astype(np.float32) pts = pts[np.lexsort((pts[:, 1], pts[:, 0]))] lower = [] for p in pts: while len(lower) >= 2 and _cross(np.array(lower[-2]), np.array(lower[-1]), p) <= 0: lower.pop() lower.append(p) upper = [] for p in pts[::-1]: while len(upper) >= 2 and _cross(np.array(upper[-2]), np.array(upper[-1]), p) <= 0: upper.pop() upper.append(p) hull = np.array(lower[:-1] + upper[:-1], dtype=np.float64) return hull.astype(np.float32) def build_clearance_polygon_lv_m(profile: str, kind: str, use_hull: bool = False) -> np.ndarray: """ 从附录表(mm)构建对称的完整截面 polygon,返回 (M,2) 的 (l,v) 米。 l: 横向,单位 m(中心线对称) v: 竖向,单位 m(v<0 向下) """ half_yz = get_clearance_half_yz_mm(profile, kind) # (N,2) in mm, Y>=0, Z>=0 (downwards) Y = half_yz[:, 0] / 1000.0 Z = half_yz[:, 1] / 1000.0 # 右半边 (l>0) right = np.stack([Y, -Z], axis=1).astype(np.float32) # 左半边:对 right 镜像,避免重复的中心线点(Y==0) left_list = [] for p in right[-2::-1]: # 从倒数第二个到第一个 if abs(float(p[0])) < 1e-9: continue left_list.append(np.array([-p[0], p[1]], dtype=np.float32)) if left_list: left = np.stack(left_list, axis=0).astype(np.float32) poly = np.vstack([right, left]).astype(np.float32) else: poly = right # 保障凸性:侵界计算用凸多边形更稳 if use_hull: poly = convex_hull(poly) poly = ensure_ccw(poly) return poly # ========================= # 小工具:ROI mask(仅用于边缘提取,不再画出来) # ========================= def make_roi_mask(h: int, w: int) -> np.ndarray: if not ROI_ENABLE: return np.ones((h, w), dtype=np.uint8) * 255 y_top = int(h * ROI_Y_TOP_RATIO) y_bot = int(h * ROI_Y_BOT_RATIO) x_tl = int(w * ROI_X_TOP_L_RATIO) x_tr = int(w * ROI_X_TOP_R_RATIO) x_bl = int(w * ROI_X_BOT_L_RATIO) x_br = int(w * ROI_X_BOT_R_RATIO) poly = np.array([[x_bl, y_bot], [x_br, y_bot], [x_tr, y_top], [x_tl, y_top]], dtype=np.int32) m = np.zeros((h, w), dtype=np.uint8) cv2.fillConvexPoly(m, poly, 255) return m def overlay_mask(img_bgr: np.ndarray, mask01: np.ndarray, color=(0, 0, 255), alpha=0.55) -> np.ndarray: vis = img_bgr.copy() m = mask01.astype(bool) if m.any(): vis[m] = (vis[m] * (1 - alpha) + np.array(color, dtype=np.float32) * alpha).astype(np.uint8) return vis def median_filter_1d(arr: np.ndarray, k: int = 7) -> np.ndarray: if arr.size == 0: return arr k = max(3, int(k) | 1) r = k // 2 out = arr.copy().astype(np.float32) for i in range(arr.size): lo = max(0, i - r) hi = min(arr.size, i + r + 1) out[i] = float(np.median(arr[lo:hi])) return out # ========================= # mmseg 子进程推理 # ========================= def resolve_checkpoint(ckpt_path: str) -> str: """兼容 mmseg work_dir 的 last_checkpoint 文件(里面存的是实际 pth 路径)""" if os.path.basename(ckpt_path) == "last_checkpoint": with open(ckpt_path, "r", encoding="utf-8") as f: real = f.read().strip() if real and os.path.isfile(real): return real return ckpt_path def _write_mmseg_infer_helper(helper_path: str): """写一个独立的小脚本,专门在 mmseg3.0 虚拟环境里跑推理并输出 npy""" code = r''' import os, sys, argparse import numpy as np import cv2 def resolve_checkpoint(ckpt_path: str) -> str: if os.path.basename(ckpt_path) == "last_checkpoint": with open(ckpt_path, "r", encoding="utf-8") as f: real = f.read().strip() if real and os.path.isfile(real): return real return ckpt_path def main(): ap = argparse.ArgumentParser() ap.add_argument("--img", required=True) ap.add_argument("--config", required=True) ap.add_argument("--ckpt", required=True) ap.add_argument("--out", required=True) ap.add_argument("--device", default="cuda:0") ap.add_argument("--mmseg_repo", default="") args = ap.parse_args() if args.mmseg_repo and os.path.isdir(args.mmseg_repo): sys.path.insert(0, args.mmseg_repo) ckpt = resolve_checkpoint(args.ckpt) model = None try: from mmseg.apis import init_model model = init_model(args.config, ckpt, device=args.device) from mmseg.apis import inference_model use_new = True except Exception: use_new = False if model is None: try: from mmseg.apis import init_segmentor model = init_segmentor(args.config, ckpt, device=args.device) except Exception as e: raise RuntimeError(f"mmseg init failed in helper: {e}") img = cv2.imread(args.img) if img is None: raise RuntimeError(f"cannot read image: {args.img}") if use_new: out = inference_model(model, img) pred = out.pred_sem_seg.data if hasattr(pred, "cpu"): pred = pred.cpu().numpy() pred = np.array(pred) if pred.ndim == 3: pred = pred[0] else: from mmseg.apis import inference_segmentor out = inference_segmentor(model, img) pred = np.array(out[0], dtype=np.int32) pred = pred.astype(np.int32) np.save(args.out, pred) print(f"[mmseg_helper] saved pred to {args.out} shape={pred.shape} dtype={pred.dtype}") if __name__ == "__main__": main() ''' with open(helper_path, "w", encoding="utf-8") as f: f.write(code) def infer_rail_mask_by_subprocess( img_path: str, out_npy: str, seg_config: str, seg_ckpt: str, seg_device: str, mmseg_python: str, mmseg_repo: str = "" ) -> np.ndarray: """ 用 mmseg3.0 的 python 子进程跑分割,返回 pred label map (H,W) int32 """ seg_ckpt = resolve_checkpoint(seg_ckpt) if not os.path.isfile(mmseg_python): raise FileNotFoundError(f"--mmseg_python not found: {mmseg_python}") if not os.path.isfile(seg_config): raise FileNotFoundError(f"--seg_config not found: {seg_config}") if not os.path.isfile(seg_ckpt): raise FileNotFoundError(f"--seg_ckpt not found: {seg_ckpt}") if not os.path.isfile(img_path): raise FileNotFoundError(f"image not found: {img_path}") helper_path = os.path.join(os.path.dirname(out_npy), "_mmseg_infer_helper.py") if not os.path.isfile(helper_path): _write_mmseg_infer_helper(helper_path) cmd = [ mmseg_python, helper_path, "--img", img_path, "--config", seg_config, "--ckpt", seg_ckpt, "--out", out_npy, "--device", seg_device, "--mmseg_repo", mmseg_repo ] print("[INFO] Run mmseg subprocess:", " ".join(cmd)) r = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) if r.returncode != 0: print("====== mmseg subprocess STDOUT ======") print(r.stdout) print("====== mmseg subprocess STDERR ======") print(r.stderr) raise RuntimeError("mmseg subprocess failed (see logs above).") pred = np.load(out_npy).astype(np.int32) return pred # ========================= # 从 rail mask 构造:走廊mask、左右边缘曲线、中心线曲线 # ========================= @dataclass class RailMaskInfo: rail_mask01: np.ndarray # (H,W) 0/1, steel rail corridor01: np.ndarray # (H,W) 0/1, fill between left&right per row left_poly: np.ndarray # (N,2) int (x,y) right_poly: np.ndarray # (N,2) mid_poly: np.ndarray # (N,2) left_edge_mask: np.ndarray # (H,W) 0/255 right_edge_mask: np.ndarray dbg: Dict def keep_topk_connected_components(mask01: np.ndarray, k: int) -> np.ndarray: m = (mask01 > 0).astype(np.uint8) n, lab, stats, _ = cv2.connectedComponentsWithStats(m, connectivity=8) if n <= 1: return m areas = stats[1:, cv2.CC_STAT_AREA] order = np.argsort(-areas) keep_ids = (order[:min(k, order.size)] + 1).tolist() out = np.zeros_like(m) for cid in keep_ids: out[lab == cid] = 1 return out def build_polylines_from_mask(rail01: np.ndarray, roi_mask: np.ndarray) -> Tuple[np.ndarray, np.ndarray, np.ndarray, Dict]: H, W = rail01.shape[:2] y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0 y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1) xs_left, xs_right, ys = [], [], [] for y in range(y_top, y_bot + 1): if roi_mask[y, :].max() == 0: continue row = rail01[y, :] & (roi_mask[y, :] > 0) idx = np.where(row > 0)[0] if idx.size < 6: continue xl = int(idx.min()); xr = int(idx.max()) if xr - xl < 10: continue xs_left.append(xl); xs_right.append(xr); ys.append(y) dbg = {"rows_valid": len(ys)} if len(ys) < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = np.array(xs_left, dtype=np.float32) xs_right = np.array(xs_right, dtype=np.float32) ys = np.array(ys, dtype=np.int32) widths = xs_right - xs_left med_w = float(np.median(widths)) dbg["median_width_px"] = med_w ok = (widths > 0.45 * med_w) & (widths < 1.60 * med_w) xs_left = xs_left[ok]; xs_right = xs_right[ok]; ys = ys[ok] dbg["rows_kept"] = int(ys.size) if ys.size < 30: return np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), np.zeros((0, 2), np.int32), dbg xs_left = median_filter_1d(xs_left, 9) xs_right = median_filter_1d(xs_right, 9) xs_mid = 0.5 * (xs_left + xs_right) left_poly = np.stack([xs_left, ys.astype(np.float32)], axis=1).round().astype(np.int32) right_poly = np.stack([xs_right, ys.astype(np.float32)], axis=1).round().astype(np.int32) mid_poly = np.stack([xs_mid, ys.astype(np.float32)], axis=1).round().astype(np.int32) return left_poly, right_poly, mid_poly, dbg def build_masks_from_rail_pred(pred: np.ndarray, rail_id: int, img_h: int, img_w: int, roi_mask: np.ndarray) -> RailMaskInfo: if pred.shape[0] != img_h or pred.shape[1] != img_w: pred_rs = cv2.resize(pred.astype(np.int32), (img_w, img_h), interpolation=cv2.INTER_NEAREST) else: pred_rs = pred rail01 = (pred_rs == int(rail_id)).astype(np.uint8) if ROI_ENABLE: rail01 = (rail01 & (roi_mask > 0)).astype(np.uint8) kc = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_CLOSE_K, MASK_CLOSE_K)) ko = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_OPEN_K, MASK_OPEN_K)) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_CLOSE, kc, iterations=2) rail01 = cv2.morphologyEx(rail01, cv2.MORPH_OPEN, ko, iterations=1) rail01 = keep_topk_connected_components(rail01, KEEP_TOPK_CC).astype(np.uint8) H, W = rail01.shape[:2] corridor = np.zeros((H, W), dtype=np.uint8) y_top = int(H * ROI_Y_TOP_RATIO) if ROI_ENABLE else 0 y_bot = int(H * ROI_Y_BOT_RATIO) if ROI_ENABLE else (H - 1) for y in range(y_top, y_bot + 1): if ROI_ENABLE and roi_mask[y, :].max() == 0: continue idx = np.where(rail01[y, :] > 0)[0] if idx.size < 6: continue xl = int(idx.min()); xr = int(idx.max()) if xr - xl < 10: continue corridor[y, xl:xr + 1] = 1 corridor = cv2.morphologyEx(corridor, cv2.MORPH_CLOSE, kc, iterations=1) left_poly, right_poly, mid_poly, dbg = build_polylines_from_mask(rail01, roi_mask) left_edge_mask = np.zeros((H, W), dtype=np.uint8) right_edge_mask = np.zeros((H, W), dtype=np.uint8) if left_poly.shape[0] >= 10: cv2.polylines(left_edge_mask, [left_poly.reshape(-1, 1, 2)], False, 255, EDGE_LINE_THICK, cv2.LINE_AA) if right_poly.shape[0] >= 10: cv2.polylines(right_edge_mask, [right_poly.reshape(-1, 1, 2)], False, 255, EDGE_LINE_THICK, cv2.LINE_AA) return RailMaskInfo( rail_mask01=rail01, corridor01=corridor, left_poly=left_poly, right_poly=right_poly, mid_poly=mid_poly, left_edge_mask=left_edge_mask, right_edge_mask=right_edge_mask, dbg=dbg ) # ========================= # 投影:点云->图像,按mask筛点云 # ========================= def select_points_by_mask(points_xyz: np.ndarray, uv: np.ndarray, valid: np.ndarray, mask01: np.ndarray) -> np.ndarray: H, W = mask01.shape[:2] u = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return np.zeros((0, 3), dtype=np.float32) u2 = u[idx]; v2 = v[idx] keep = mask01[v2, u2] > 0 return points_xyz[idx[keep]].astype(np.float32) def match_points_near_edge(points_xyz: np.ndarray, uv: np.ndarray, valid: np.ndarray, edge_mask255: np.ndarray, max_px: float) -> np.ndarray: H, W = edge_mask255.shape[:2] src = np.ones((H, W), dtype=np.uint8) * 255 src[edge_mask255 > 0] = 0 dist = cv2.distanceTransform(src, cv2.DIST_L2, 5) u = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return np.zeros((0, 3), dtype=np.float32) u2 = u[idx]; v2 = v[idx] d = dist[v2, u2] keep = d <= float(max_px) return points_xyz[idx[keep]].astype(np.float32) # ========================= # 几何:平面拟合 + 像素光线求交 # ========================= def fit_plane_svd(points_xyz: np.ndarray, iters: int = 2, keep_q: float = 0.90) -> Tuple[np.ndarray, np.ndarray]: pts = points_xyz.astype(np.float64) if pts.shape[0] < 10: p0 = np.median(pts, axis=0) n = np.array([0, 0, 1.0], dtype=np.float64) return p0.astype(np.float32), n.astype(np.float32) for _ in range(max(1, iters)): p0 = np.median(pts, axis=0) X = pts - p0 _, _, vt = np.linalg.svd(X, full_matrices=False) n = vt[-1] n = n / (np.linalg.norm(n) + 1e-12) if n[2] < 0: n = -n d = np.abs((pts - p0) @ n.reshape(3, 1)).reshape(-1) thr = float(np.quantile(d, keep_q)) pts = pts[d <= thr] if pts.shape[0] < 30: break return p0.astype(np.float32), n.astype(np.float32) def calib_get_K_dist(calib: Dict) -> Tuple[np.ndarray, np.ndarray]: if "camera_matrix" in calib: K = np.array(calib["camera_matrix"], dtype=np.float64).reshape(3, 3) else: fx = float(calib["fx"]); fy = float(calib["fy"]) cx = float(calib["cx"]); cy = float(calib["cy"]) K = np.array([[fx, 0.0, cx], [0.0, fy, cy], [0.0, 0.0, 1.0]], dtype=np.float64) if "dist_coeffs" in calib: dist = np.array(calib["dist_coeffs"], dtype=np.float64).reshape(-1) elif "dist" in calib: dist = np.array(calib["dist"], dtype=np.float64).reshape(-1) else: dist = np.zeros((5,), dtype=np.float64) return K, dist def undistort_pixels_to_normalized(uv: np.ndarray, calib: Dict) -> np.ndarray: K, dist = calib_get_K_dist(calib) pts = uv.reshape(-1, 1, 2).astype(np.float64) und = cv2.undistortPoints(pts, K, dist) return und.reshape(-1, 2).astype(np.float32) def pixels_to_rays_in_lidar(uv: np.ndarray, calib: Dict, T_lidar2cam: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: T = np.array(T_lidar2cam, dtype=np.float64) R = T[:3, :3] t = T[:3, 3] R_c2l = R.T o_lidar = (-R.T @ t).reshape(3) xy = undistort_pixels_to_normalized(uv, calib) if F.USE_DISTORTION else None if xy is None: K, _ = calib_get_K_dist(calib) fx, fy = K[0, 0], K[1, 1] cx, cy = K[0, 2], K[1, 2] x = (uv[:, 0] - cx) / fx y = (uv[:, 1] - cy) / fy xy = np.stack([x, y], axis=1).astype(np.float32) dirs_cam = np.concatenate([xy.astype(np.float64), np.ones((xy.shape[0], 1), dtype=np.float64)], axis=1) dirs_lidar = (R_c2l @ dirs_cam.T).T dirs_lidar = dirs_lidar / (np.linalg.norm(dirs_lidar, axis=1, keepdims=True) + 1e-12) return o_lidar.astype(np.float32), dirs_lidar.astype(np.float32) def intersect_rays_with_plane(ray_origin: np.ndarray, ray_dirs: np.ndarray, plane_p0: np.ndarray, plane_n: np.ndarray) -> np.ndarray: d = np.asarray(ray_dirs, dtype=np.float64) # (N,3) n = np.asarray(plane_n, dtype=np.float64).reshape(3) # (3,) p0 = np.asarray(plane_p0, dtype=np.float64).reshape(3) # (3,) o = np.asarray(ray_origin, dtype=np.float64) if o.ndim == 1: o = o.reshape(1, 3) if o.shape[0] == 1: o0 = o.reshape(3) num = float(np.dot(n, (p0 - o0))) denom = (d @ n.reshape(3, 1)).reshape(-1) t = np.full_like(denom, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num / denom[ok] t[t <= 0] = np.nan P = o0.reshape(1, 3) + d * t.reshape(-1, 1) return P.astype(np.float32) if o.shape[0] != d.shape[0]: raise ValueError(f"ray_origin shape {o.shape} not compatible with ray_dirs shape {d.shape}") num = ((p0.reshape(1, 3) - o) @ n.reshape(3, 1)).reshape(-1) denom = (d @ n.reshape(3, 1)).reshape(-1) t = np.full_like(denom, np.nan, dtype=np.float64) ok = np.abs(denom) > 1e-9 t[ok] = num[ok] / denom[ok] t[t <= 0] = np.nan P = o + d * t.reshape(-1, 1) return P.astype(np.float32) # ========================= # 轨道3D模型 + “隧道式”限界包络体 # ========================= @dataclass class TrackModel: ok: bool origin: np.ndarray # (3,) t: np.ndarray # (3,) tangent n: np.ndarray # (3,) lateral up: np.ndarray # (3,) plane normal s_min: float s_max: float poly_lv: np.ndarray # (M,2) in (l,v) centerline_world: np.ndarray # (K,3) ordered dbg: Dict def pca_direction(points_xyz: np.ndarray, up: np.ndarray) -> np.ndarray: pts = points_xyz.astype(np.float64) mu = pts.mean(axis=0) X = pts - mu _, _, vt = np.linalg.svd(X, full_matrices=False) t = vt[0] upv = up.astype(np.float64) t = t - upv * (t @ upv) t = t / (np.linalg.norm(t) + 1e-12) if t[0] < 0: t = -t return t.astype(np.float32) def build_track_model(rail_info: RailMaskInfo, calib: Dict, T_lidar2cam: np.ndarray, rail_pts_xyz: np.ndarray, clearance_poly_lv: np.ndarray) -> TrackModel: dbg: Dict = {} if rail_info.mid_poly.shape[0] < 20: return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32), np.array([0,1,0],dtype=np.float32), np.array([0,0,1],dtype=np.float32), 0.0, 1.0, np.zeros((0,2),dtype=np.float32), np.zeros((0,3),dtype=np.float32), {"err": "mid_poly too short"}) if rail_pts_xyz.shape[0] < MIN_RAIL_PTS: dbg["warn"] = f"rail_pts too few ({rail_pts_xyz.shape[0]}). plane may be unstable." # 1) rail plane p0, up = fit_plane_svd(rail_pts_xyz, iters=2, keep_q=0.90) dbg["plane_p0"] = p0.tolist() dbg["plane_up"] = up.tolist() # 2) centerline pixels -> rays -> plane intersection mid = rail_info.mid_poly if mid.shape[0] > CENTERLINE_N_ROWS: idx = np.linspace(0, mid.shape[0]-1, CENTERLINE_N_ROWS).astype(np.int32) mid_s = mid[idx] else: mid_s = mid uv_mid = mid_s[:, :2].astype(np.float32) o_lidar, d_lidar = pixels_to_rays_in_lidar(uv_mid, calib, T_lidar2cam) cl_world = intersect_rays_with_plane(o_lidar, d_lidar, p0, up) ok = np.isfinite(cl_world).all(axis=1) cl_world = cl_world[ok] dbg["centerline_pts"] = int(cl_world.shape[0]) if cl_world.shape[0] < 10: return TrackModel(False, np.zeros(3), np.array([1,0,0],dtype=np.float32), np.array([0,1,0],dtype=np.float32), up, 0.0, 1.0, np.zeros((0,2),dtype=np.float32), np.zeros((0,3),dtype=np.float32), {"err": "centerline ray-plane intersection too few", **dbg}) # 3) t direction t = pca_direction(cl_world, up) n = np.cross(up, t) n = n / (np.linalg.norm(n) + 1e-12) # 4) origin origin = cl_world.mean(axis=0).astype(np.float32) # 5) s range s = ((cl_world - origin.reshape(1, 3)) @ t.reshape(3, 1)).reshape(-1) order = np.argsort(s) cl_sorted = cl_world[order] s_sorted = s[order] s_min = float(np.quantile(s_sorted, 0.02)) s_max = float(np.quantile(s_sorted, 0.98)) margin = max(1.5, 0.10 * (s_max - s_min)) s_min -= margin s_max += margin dbg["s_min"] = s_min dbg["s_max"] = s_max # 只保留范围内中心线点,给 mesh 用 keep = (s_sorted >= s_min) & (s_sorted <= s_max) cl_keep = cl_sorted[keep] if cl_keep.shape[0] < 10: cl_keep = cl_sorted # 兜底 # 6) clearance polygon(来自设计规范坐标) poly_lv = ensure_ccw(clearance_poly_lv.copy()).astype(np.float32) if poly_lv.shape[0] < 3: return TrackModel(False, origin, t, n, up, s_min, s_max, poly_lv, cl_keep, {"err": "clearance_poly invalid", **dbg}) dbg["clear_poly_vertices"] = int(poly_lv.shape[0]) dbg["clear_poly_l_range_m"] = [float(np.min(poly_lv[:,0])), float(np.max(poly_lv[:,0]))] dbg["clear_poly_v_range_m"] = [float(np.min(poly_lv[:,1])), float(np.max(poly_lv[:,1]))] return TrackModel(True, origin, t.astype(np.float32), n.astype(np.float32), up.astype(np.float32), s_min, s_max, poly_lv, cl_keep.astype(np.float32), dbg) def world_to_track_coords(track: TrackModel, pts_xyz: np.ndarray) -> np.ndarray: rel = pts_xyz - track.origin.reshape(1, 3) s = rel @ track.t.reshape(3, 1) l = rel @ track.n.reshape(3, 1) v = rel @ track.up.reshape(3, 1) return np.concatenate([s, l, v], axis=1).astype(np.float32) # ========================= # 凸多边形交叠工具(l-v 截面交叠) # ========================= def polygon_area(poly: np.ndarray) -> float: if poly is None or poly.shape[0] < 3: return 0.0 x = poly[:, 0]; y = poly[:, 1] return float(0.5 * abs(np.dot(x, np.roll(y, -1)) - np.dot(y, np.roll(x, -1)))) def inside_half_plane(p: np.ndarray, a: np.ndarray, b: np.ndarray) -> bool: return _cross(a, b, p) >= -1e-9 def line_intersection(p1: np.ndarray, p2: np.ndarray, a: np.ndarray, b: np.ndarray) -> np.ndarray: x1, y1 = p1; x2, y2 = p2 x3, y3 = a; x4, y4 = b den = (x1-x2)*(y3-y4) - (y1-y2)*(x3-x4) if abs(den) < 1e-12: return p2.copy() px = ((x1*y2 - y1*x2)*(x3-x4) - (x1-x2)*(x3*y4 - y3*x4)) / den py = ((x1*y2 - y1*x2)*(y3-y4) - (y1-y2)*(x3*y4 - y3*x4)) / den return np.array([px, py], dtype=np.float32) def convex_polygon_intersection(subject: np.ndarray, clip: np.ndarray) -> np.ndarray: if subject is None or subject.shape[0] < 3: return np.zeros((0, 2), dtype=np.float32) if clip is None or clip.shape[0] < 3: return np.zeros((0, 2), dtype=np.float32) subj = ensure_ccw(subject).astype(np.float32) clp = ensure_ccw(clip).astype(np.float32) out = subj for i in range(clp.shape[0]): a = clp[i] b = clp[(i+1) % clp.shape[0]] inp = out if inp.shape[0] == 0: break out_list = [] for j in range(inp.shape[0]): p = inp[j] q = inp[(j+1) % inp.shape[0]] pin = inside_half_plane(p, a, b) qin = inside_half_plane(q, a, b) if qin: if not pin: out_list.append(line_intersection(p, q, a, b)) out_list.append(q.copy()) elif pin: out_list.append(line_intersection(p, q, a, b)) out = np.array(out_list, dtype=np.float32) return out def point_in_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> bool: poly = ensure_ccw(poly) for i in range(poly.shape[0]): a = poly[i]; b = poly[(i+1) % poly.shape[0]] if _cross(a, b, pt) < -1e-8: return False return True def point_to_segment_distance(pt: np.ndarray, a: np.ndarray, b: np.ndarray) -> float: ax, ay = a; bx, by = b; px, py = pt vx, vy = bx-ax, by-ay wx, wy = px-ax, py-ay c1 = vx*wx + vy*wy if c1 <= 0: return float(math.hypot(px-ax, py-ay)) c2 = vx*vx + vy*vy if c2 <= c1: return float(math.hypot(px-bx, py-by)) t = c1 / (c2 + 1e-12) projx = ax + t*vx projy = ay + t*vy return float(math.hypot(px-projx, py-projy)) def signed_distance_to_convex_polygon(pt: np.ndarray, poly: np.ndarray) -> float: poly = ensure_ccw(poly) dmin = float("inf") for i in range(poly.shape[0]): a = poly[i]; b = poly[(i+1) % poly.shape[0]] dmin = min(dmin, point_to_segment_distance(pt, a, b)) inside = point_in_convex_polygon(pt, poly) return -dmin if inside else dmin # ========================= # 侵界检测(仍然用“截面交叠×纵向重叠”近似体积比) # ========================= @dataclass class IntrusionMetrics: cls: str score: float intrude: bool reason: str s_overlap: float s_ratio: float area_ratio: float volume_ratio: float center_inside: bool signed_dist_center: float def intrusion_for_det(track: TrackModel, det: F.Det3D) -> IntrusionMetrics: box7 = det.box7.astype(np.float32) corners = F.boxes3d_to_corners(box7.reshape(1, 7))[0] # (8,3) slv = world_to_track_coords(track, corners) # (8,3) s_vals = slv[:, 0] lv = slv[:, 1:3] det_s_min = float(np.min(s_vals)) det_s_max = float(np.max(s_vals)) det_s_len = max(1e-6, det_s_max - det_s_min) s0 = max(det_s_min, track.s_min) s1 = min(det_s_max, track.s_max) s_overlap = max(0.0, s1 - s0) s_ratio = float(s_overlap / det_s_len) det_poly = convex_hull(lv) det_area = polygon_area(det_poly) clear_poly = ensure_ccw(track.poly_lv.copy()) inter = convex_polygon_intersection(det_poly, clear_poly) if det_area > 1e-9 else np.zeros((0, 2), np.float32) inter_area = polygon_area(inter) area_ratio = float(inter_area / (det_area + 1e-9)) if det_area > 1e-9 else 0.0 volume_ratio = float(area_ratio * s_ratio) center = box7[:3].reshape(1, 3) c_slv = world_to_track_coords(track, center)[0] c_lv = c_slv[1:3] center_inside = (track.s_min <= float(c_slv[0]) <= track.s_max) and point_in_convex_polygon(c_lv, clear_poly) signed_dist = float(signed_distance_to_convex_polygon(c_lv, clear_poly)) if s_overlap <= 1e-6: intrude = False reason = "no_s_overlap" else: if FORCE_CENTER_INSIDE_INTRUDE and center_inside: intrude = True reason = "center_inside" elif volume_ratio >= INTRUDE_VOL_RATIO_THR: intrude = True reason = f"volume_ratio>={INTRUDE_VOL_RATIO_THR:.2f}" elif (area_ratio >= INTRUDE_AREA_RATIO_THR) and (s_ratio >= 0.25): intrude = True reason = f"area_ratio>={INTRUDE_AREA_RATIO_THR:.2f}&s_ratio>=0.25" else: intrude = False reason = "below_thresholds" return IntrusionMetrics( cls=det.cls_name, score=float(det.score), intrude=bool(intrude), reason=reason, s_overlap=float(s_overlap), s_ratio=float(s_ratio), area_ratio=float(area_ratio), volume_ratio=float(volume_ratio), center_inside=bool(center_inside), signed_dist_center=float(signed_dist) ) # ========================= # 可视化 & Mesh 导出 # ========================= def save_edge_debug_images(out_dir: str, img: np.ndarray, rail: RailMaskInfo): # 01 segmentation overlay(不再画 ROI 梯形) vis = overlay_mask(img, rail.rail_mask01, color=(0, 0, 255), alpha=0.55) cv2.imwrite(os.path.join(out_dir, "01_seg_overlay.jpg"), vis) cv2.imwrite(os.path.join(out_dir, "02_rail_mask.png"), (rail.rail_mask01 * 255).astype(np.uint8)) cv2.imwrite(os.path.join(out_dir, "03_corridor_mask.png"), (rail.corridor01 * 255).astype(np.uint8)) poly_vis = img.copy() if rail.left_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.left_poly.reshape(-1, 1, 2)], False, (255, 0, 0), 3, cv2.LINE_AA) if rail.right_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.right_poly.reshape(-1, 1, 2)], False, (0, 0, 255), 3, cv2.LINE_AA) if rail.mid_poly.shape[0] > 0: cv2.polylines(poly_vis, [rail.mid_poly.reshape(-1, 1, 2)], False, (0, 255, 0), 2, cv2.LINE_AA) cv2.imwrite(os.path.join(out_dir, "04_edge_polylines.jpg"), poly_vis) edge_rgb = np.zeros((img.shape[0], img.shape[1], 3), dtype=np.uint8) edge_rgb[rail.left_edge_mask > 0] = (255, 0, 0) edge_rgb[rail.right_edge_mask > 0] = (0, 0, 255) cv2.imwrite(os.path.join(out_dir, "05_edge_masks.png"), edge_rgb) def draw_projected_points(img: np.ndarray, uv: np.ndarray, valid: np.ndarray, color, step: int = 8): H, W = img.shape[:2] u = np.round(uv[:, 0]).astype(np.int32) v = np.round(uv[:, 1]).astype(np.int32) inside = valid & (u >= 0) & (u < W) & (v >= 0) & (v < H) idx = np.where(inside)[0] if idx.size == 0: return idx = idx[::max(1, step)] for i in idx: cv2.circle(img, (int(u[i]), int(v[i])), 1, color, -1, cv2.LINE_AA) def draw_poly_uv(img: np.ndarray, uv: np.ndarray, valid: np.ndarray, color=(0, 255, 255), thickness=2): """在图像上画投影多边形(尽量画,点不全就跳过)""" if uv.shape[0] < 3: return pts = [] for i in range(uv.shape[0]): if not bool(valid[i]): pts.append(None) else: pts.append((int(round(float(uv[i, 0]))), int(round(float(uv[i, 1]))))) # 如果有效点太少就不画 if sum([p is not None for p in pts]) < 3: return # 简单策略:将 invalid 点也连上可能会乱,这里只在全 valid 时画闭合 if all([p is not None for p in pts]): poly = np.array(pts, dtype=np.int32).reshape(-1, 1, 2) cv2.polylines(img, [poly], True, color, thickness, cv2.LINE_AA) def clearance_vertices_world(track: TrackModel, base: np.ndarray, tangent: np.ndarray) -> np.ndarray: """给定一个 base 点和 tangent(都在 lidar/world),计算该截面顶点世界坐标""" up = track.up t = tangent.astype(np.float32) # 保证 t 在平面内 t = t - up * float(np.dot(t, up)) t = t / (np.linalg.norm(t) + 1e-12) n = np.cross(up, t) n = n / (np.linalg.norm(n) + 1e-12) V = [] for l, v in track.poly_lv: p = base + n * float(l) + up * float(v) V.append(p) return np.array(V, dtype=np.float32) def save_clearance_overlay_image(out_dir: str, img: np.ndarray, track: TrackModel, calib: Dict, T_lidar2cam: np.ndarray, W: int, H: int, n_slices: int = 6): """把隧道限界投影回图像,给你“像素尺度”的直观检查""" vis = img.copy() # 取中心线点等距采样 n_slices cl = track.centerline_world if cl.shape[0] < 2: return # 用累计长度采样 d = np.linalg.norm(cl[1:] - cl[:-1], axis=1) s = np.concatenate([[0.0], np.cumsum(d)], axis=0) total = float(s[-1]) if total < 1e-6: return qs = np.linspace(0.0, total, max(2, int(n_slices))).astype(np.float32) # 插值采样 base 点 bases = [] for q in qs: j = int(np.searchsorted(s, q, side="right") - 1) j = max(0, min(j, s.shape[0] - 2)) t = (q - s[j]) / max(1e-6, (s[j+1] - s[j])) p = (1 - t) * cl[j] + t * cl[j+1] bases.append(p.astype(np.float32)) bases = np.array(bases, dtype=np.float32) # 每个 slice 求 tangent tangents = [] for i in range(bases.shape[0]): if i == 0: tt = bases[1] - bases[0] elif i == bases.shape[0] - 1: tt = bases[-1] - bases[-2] else: tt = bases[i+1] - bases[i-1] if np.linalg.norm(tt) < 1e-6: tt = track.t tangents.append(tt.astype(np.float32)) tangents = np.array(tangents, dtype=np.float32) # 逐 slice 生成截面顶点并投影画出来 for i in range(bases.shape[0]): V = clearance_vertices_world(track, bases[i], tangents[i]) # (M,3) uv, valid = F.project_points_lidar_to_img(V, calib, T_lidar2cam, W, H, use_distortion=F.USE_DISTORTION) draw_poly_uv(vis, uv, valid, color=(0, 255, 255), thickness=2) cv2.imwrite(os.path.join(out_dir, "12_clearance_overlay.jpg"), vis) def build_tunnel_mesh(track: TrackModel, n_slices: int = 60) -> Tuple[np.ndarray, np.ndarray]: """ 沿中心线生成“隧道式”mesh: - slices: n_slices - 每个 slice 是 poly_lv 的 M 个点 返回: vertices (Ns*M,3) faces (F,3) int """ cl = track.centerline_world if cl.shape[0] < 2: return np.zeros((0, 3), np.float32), np.zeros((0, 3), np.int32) # 累计长度采样 d = np.linalg.norm(cl[1:] - cl[:-1], axis=1) s = np.concatenate([[0.0], np.cumsum(d)], axis=0) total = float(s[-1]) if total < 1e-6: return np.zeros((0, 3), np.float32), np.zeros((0, 3), np.int32) n_slices = max(2, int(n_slices)) qs = np.linspace(0.0, total, n_slices).astype(np.float32) # bases bases = [] for q in qs: j = int(np.searchsorted(s, q, side="right") - 1) j = max(0, min(j, s.shape[0] - 2)) t = (q - s[j]) / max(1e-6, (s[j+1] - s[j])) p = (1 - t) * cl[j] + t * cl[j+1] bases.append(p.astype(np.float32)) bases = np.array(bases, dtype=np.float32) # tangents tangents = [] for i in range(bases.shape[0]): if i == 0: tt = bases[1] - bases[0] elif i == bases.shape[0] - 1: tt = bases[-1] - bases[-2] else: tt = bases[i+1] - bases[i-1] if np.linalg.norm(tt) < 1e-6: tt = track.t tangents.append(tt.astype(np.float32)) tangents = np.array(tangents, dtype=np.float32) M = track.poly_lv.shape[0] V_all = [] for i in range(n_slices): V = clearance_vertices_world(track, bases[i], tangents[i]) # (M,3) V_all.append(V) vertices = np.vstack(V_all).astype(np.float32) # (n_slices*M,3) # faces: connect slice i and i+1 faces = [] for si in range(n_slices - 1): base0 = si * M base1 = (si + 1) * M for k in range(M): k2 = (k + 1) % M a = base0 + k b = base0 + k2 c = base1 + k2 d0 = base1 + k # two triangles faces.append([a, b, c]) faces.append([a, c, d0]) faces = np.array(faces, dtype=np.int32) return vertices, faces def write_ply_mesh(path: str, vertices: np.ndarray, faces: np.ndarray): vertices = np.asarray(vertices, dtype=np.float32) faces = np.asarray(faces, dtype=np.int32) with open(path, "w", encoding="utf-8") as f: f.write("ply\n") f.write("format ascii 1.0\n") f.write(f"element vertex {vertices.shape[0]}\n") f.write("property float x\nproperty float y\nproperty float z\n") f.write(f"element face {faces.shape[0]}\n") f.write("property list uchar int vertex_indices\n") f.write("end_header\n") for v in vertices: f.write(f"{v[0]} {v[1]} {v[2]}\n") for tri in faces: f.write(f"3 {int(tri[0])} {int(tri[1])} {int(tri[2])}\n") def visualize_open3d(points_xyz: np.ndarray, vertices: np.ndarray, faces: np.ndarray, dets: List[F.Det3D]): try: import open3d as o3d except Exception as e: print(f"[WARN] open3d not available: {e}") return geoms = [] # point cloud pcd = o3d.geometry.PointCloud() pts = points_xyz if pts.shape[0] > 150000: idx = np.random.choice(pts.shape[0], 150000, replace=False) pts = pts[idx] pcd.points = o3d.utility.Vector3dVector(pts.astype(np.float64)) geoms.append(pcd) # tunnel mesh if vertices.shape[0] > 0 and faces.shape[0] > 0: mesh = o3d.geometry.TriangleMesh() mesh.vertices = o3d.utility.Vector3dVector(vertices.astype(np.float64)) mesh.triangles = o3d.utility.Vector3iVector(faces.astype(np.int32)) mesh.compute_vertex_normals() # 统一颜色(黄) mesh.paint_uniform_color([1.0, 1.0, 0.0]) geoms.append(mesh) # det boxes as LineSet edges = [(0,1),(1,2),(2,3),(3,0), (4,5),(5,6),(6,7),(7,4), (0,4),(1,5),(2,6),(3,7)] for d in dets: c = F.boxes3d_to_corners(d.box7.reshape(1, 7).astype(np.float32))[0] # (8,3) lines = o3d.geometry.LineSet() lines.points = o3d.utility.Vector3dVector(c.astype(np.float64)) lines.lines = o3d.utility.Vector2iVector(np.array(edges, dtype=np.int32)) lines.colors = o3d.utility.Vector3dVector(np.tile(np.array([[0.0, 1.0, 0.0]]), (len(edges), 1))) geoms.append(lines) o3d.visualization.draw_geometries(geoms) # ========================= # 主流程 # ========================= def parse_args(): ap = argparse.ArgumentParser() ap.add_argument("--frame_id", type=str, default="", help="指定帧号(不含扩展名)") ap.add_argument("--seg_config", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/xuanguashi-s_2xb6-120k_1024x1024-cityscapes.py") ap.add_argument("--seg_ckpt", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation/work_dirs/xuanguashi-s_2xb6-120k_1024x1024-cityscapes/iter_120000.pth") ap.add_argument("--seg_device", type=str, default="cuda:0") ap.add_argument("--rail_id", type=int, default=RAIL_CLASS_ID_DEFAULT) ap.add_argument("--seed", type=int, default=42) ap.add_argument("--topk", type=int, default=20, help="侵界判定最多取多少个 fused det") ap.add_argument("--mmseg_python", type=str, default="/home/jd/anaconda3/envs/mmseg3.0/bin/python", help="mmseg3.0 虚拟环境 python 路径") ap.add_argument("--mmseg_repo", type=str, default="/media/jd/4997BB1603CFE2C4/lw/mmsegmentation", help="mmsegmentation repo 根目录(用于 sys.path 兜底,可空)") # ROI(只影响边缘提取,不画) ap.add_argument("--roi_enable", action="store_true", help="启用梯形 ROI(只用于边缘提取)") # 限界选择(来自设计规范附录 A/B/C) ap.add_argument("--clear_profile", type=str, default="B", choices=["A", "B", "C"], help="限界坐标来源:A=中铁科工, B=中车四方(2.4m车宽), C=中车铺镇") ap.add_argument("--clear_kind", type=str, default="equipment", choices=["vehicle", "equipment"], help="选择车辆限界 or 设备限界") ap.add_argument("--clear_use_hull", action="store_true", help="对限界点取凸包(确保凸性;会自动去共线点)") # 隧道 mesh ap.add_argument("--mesh_slices", type=int, default=60, help="隧道沿中心线采样截面数") ap.add_argument("--vis_o3d", action="store_true", help="用 open3d 交互显示 点云+隧道+检测框") # 图像 overlay 隧道切片数 ap.add_argument("--img_clear_slices", type=int, default=6, help="把隧道投影回图像时画多少个截面") return ap.parse_args() def main(): args = parse_args() random.seed(args.seed) np.random.seed(args.seed) global ROI_ENABLE ROI_ENABLE = bool(args.roi_enable) # 0) 构建“限界截面”(l,v) 米(来自规范附录坐标) clearance_poly_lv = build_clearance_polygon_lv_m( profile=args.clear_profile, kind=args.clear_kind, use_hull=bool(args.clear_use_hull) ) print(f"[CLEARANCE] profile={args.clear_profile} kind={args.clear_kind} vertices={clearance_poly_lv.shape[0]} use_hull={args.clear_use_hull}") # 1) load det models device, yolo_model, cfg, dataset, pcdet_model, load_data_to_gpu_fn = F.load_models() idx_map = {Path(p).stem: i for i, p in enumerate(dataset.sample_file_list)} # 2) choose frame bin_files = sorted(list(Path(F.PCDET_POINTS_DIR).glob("*.bin"))) frame_ids = [p.stem for p in bin_files if F.find_image_for_frame(p.stem) is not None and p.stem in idx_map] if not frame_ids: print("[ERROR] no matched (img,bin) frames.") return fid = args.frame_id if args.frame_id else random.choice(frame_ids) if fid not in idx_map: print(f"[ERROR] frame_id {fid} not in dataset index.") return out_dir = os.path.join(OUT_ROOT, fid) os.makedirs(out_dir, exist_ok=True) # 3) read image img_path = F.find_image_for_frame(fid) img = cv2.imread(img_path) if img is None: print(f"[ERROR] cannot read image: {img_path}") return H, W = img.shape[:2] cv2.imwrite(os.path.join(out_dir, "00_raw.jpg"), img) # 4) ROI mask(可能全 1) roi_mask = make_roi_mask(H, W) # 5) segmentation print("[INFO] Running PIDNet(mmseg) in subprocess...") pred = infer_rail_mask_by_subprocess( img_path=img_path, out_npy=os.path.join(out_dir, "seg_pred.npy"), seg_config=args.seg_config, seg_ckpt=args.seg_ckpt, seg_device=args.seg_device, mmseg_python=args.mmseg_python, mmseg_repo=args.mmseg_repo ) rail_info = build_masks_from_rail_pred(pred, args.rail_id, H, W, roi_mask) save_edge_debug_images(out_dir, img, rail_info) print(f"[SEG] rows_valid={rail_info.dbg.get('rows_valid')} rows_kept={rail_info.dbg.get('rows_kept')} median_width_px={rail_info.dbg.get('median_width_px')}") # 6) det inference yolo_dets = F.infer_yolo(yolo_model, img) _, raw_points, pcdet_dets = F.infer_pcdet(cfg, dataset, pcdet_model, load_data_to_gpu_fn, idx_map[fid], device) if raw_points is None or raw_points.shape[0] == 0: print("[ERROR] raw_points empty.") return # 7) choose extrinsic T = F.get_extrinsic_matrix(F.CALIB, raw_points[:, :3], W, H) # 8) fuse fused = F.fuse_frame(yolo_dets, pcdet_dets, W, H, F.CALIB, T, raw_points) fused = sorted(fused, key=lambda d: d.score, reverse=True)[:max(1, args.topk)] fused_keep = [d for d in fused if d.score >= 0.10] print(f"[DETS] yolo={len(yolo_dets)} pcdet={len(pcdet_dets)} fused={len(fused)} keep(score>=0.10)={len(fused_keep)}") # 9) project points to image pts_xyz = raw_points[:, :3].astype(np.float32) uv, valid = F.project_points_lidar_to_img(pts_xyz, F.CALIB, T, W, H, use_distortion=F.USE_DISTORTION) vis_all = img.copy() draw_projected_points(vis_all, uv, valid, (200, 200, 200), step=12) vis_all = overlay_mask(vis_all, rail_info.rail_mask01, color=(0,0,255), alpha=0.35) cv2.imwrite(os.path.join(out_dir, "06_proj_all_points.jpg"), vis_all) # 10) rail top points by rail mask rail_mask255 = (rail_info.rail_mask01 * 255).astype(np.uint8) if MASK_DILATE_PX > 0: kd = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (MASK_DILATE_PX, MASK_DILATE_PX)) rail_mask255 = cv2.dilate(rail_mask255, kd, iterations=1) rail_mask01_dil = (rail_mask255 > 0).astype(np.uint8) rail_pts = select_points_by_mask(pts_xyz, uv, valid, rail_mask01_dil) print(f"[RAIL_PTS] selected={rail_pts.shape[0]}") # 11) edge points (for debug) left_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.left_edge_mask, EDGE_MATCH_PX) right_edge_pts = match_points_near_edge(pts_xyz, uv, valid, rail_info.right_edge_mask, EDGE_MATCH_PX) print(f"[EDGE_PTS] left={left_edge_pts.shape[0]} right={right_edge_pts.shape[0]}") # 12) build track + clearance model track = build_track_model(rail_info, F.CALIB, T, rail_pts, clearance_poly_lv) if not track.ok: print("[ERROR] build_track_model failed:", track.dbg) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: f.write("build_track_model failed\n") f.write(json.dumps(track.dbg, ensure_ascii=False, indent=2)) return print("\n[TRACK_MODEL]") print(f" origin={track.origin.tolist()}") print(f" t={track.t.tolist()}") print(f" n={track.n.tolist()}") print(f" up={track.up.tolist()}") print(f" s_range=[{track.s_min:.2f}, {track.s_max:.2f}]") print(f" clearance vertices={track.poly_lv.shape[0]}") print(f" dbg={track.dbg}") # 13) intrusion metrics metrics: List[IntrusionMetrics] = [intrusion_for_det(track, d) for d in fused_keep] print("\n========== INTRUSION METRICS ==========") if not metrics: print("No fused detections (score>=0.10).") for i, m in enumerate(metrics): print(f"[{i}] cls={m.cls:14s} detScore={m.score:.3f} INTRUDE={m.intrude} ({m.reason})") print(f" s_overlap={m.s_overlap:.2f}m s_ratio={m.s_ratio:.3f}") print(f" area_ratio(l-v)={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}") print(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}m") out_json = { "frame_id": fid, "clearance": {"profile": args.clear_profile, "kind": args.clear_kind, "use_hull": bool(args.clear_use_hull)}, "seg_dbg": rail_info.dbg, "track_dbg": track.dbg, "rules": { "INTRUDE_VOL_RATIO_THR": INTRUDE_VOL_RATIO_THR, "INTRUDE_AREA_RATIO_THR": INTRUDE_AREA_RATIO_THR, "FORCE_CENTER_INSIDE_INTRUDE": FORCE_CENTER_INSIDE_INTRUDE, }, "dets": [m.__dict__ for m in metrics] } with open(os.path.join(out_dir, "intrusion_metrics.json"), "w", encoding="utf-8") as f: json.dump(out_json, f, ensure_ascii=False, indent=2) with open(os.path.join(out_dir, "intrusion_metrics.txt"), "w", encoding="utf-8") as f: for i, m in enumerate(metrics): f.write(f"[{i}] cls={m.cls} score={m.score:.3f} intrude={m.intrude} reason={m.reason}\n") f.write(f" s_overlap={m.s_overlap:.3f} s_ratio={m.s_ratio:.3f}\n") f.write(f" area_ratio={m.area_ratio:.3f} volume_ratio={m.volume_ratio:.3f}\n") f.write(f" center_inside={m.center_inside} signed_dist_center={m.signed_dist_center:.3f}\n") # 14) 把“限界隧道”投影回图像(像素尺度检查) save_clearance_overlay_image(out_dir, img, track, F.CALIB, T, W, H, n_slices=args.img_clear_slices) # 15) 生成隧道 mesh 并导出 ply verts, faces = build_tunnel_mesh(track, n_slices=args.mesh_slices) ply_path = os.path.join(out_dir, "13_clearance_tunnel.ply") if verts.shape[0] > 0 and faces.shape[0] > 0: write_ply_mesh(ply_path, verts, faces) print(f"[MESH] saved tunnel mesh: {ply_path} V={verts.shape[0]} F={faces.shape[0]}") else: print("[MESH] failed to build tunnel mesh (too few centerline points?)") # 16) 可选:open3d 交互查看 if args.vis_o3d: visualize_open3d(pts_xyz, verts, faces, fused_keep) print(f"\n[DONE] saved all debug files in: {out_dir}") if __name__ == "__main__": main()
bashCUDA_VISIBLE_DEVICES=1 python fusion/qinjiepanding.py
例如用附录 C 的车辆限界:
bashCUDA_VISIBLE_DEVICES=1 python fusion/qinjiepanding.py \ --clear_profile C --clear_kind vehicle
bashCUDA_VISIBLE_DEVICES=1 python fusion/qinjiepanding.py --vis_o3d
如果你没装 open3d:
bashpip install open3d
在 debug_intrusion_pidnet/<frame_id>/:
12_clearance_overlay.jpg13_clearance_tunnel.ply如果你跑完后发现:
12_clearance_overlay.jpg 的限界位置整体偏离轨道你把 12_clearance_overlay.jpg 和该帧的 intrusion_metrics.json 贴出来(或把轨道点云范围/外参选择日志贴出来),我可以继续把“轨面参考到底是哪一层”的问题彻底校准到位。
现在我遇到了如下问题,第一个问题是,你分割轨道没有错误,但是识别轨道区域绘制三条线,最左侧的是蓝色线条,最右侧是红色,中间为绿色,但是你作三维限界为什么沿着中间绿色线条直接和蓝色线一直向左(或者向右)绘制黄色限界,导致点云那个限界也是错的的,你应该在现实点云中沿着这个方向往地面向下做限界,左侧轨道板宽度加上限界左侧安全冗余不就是左侧限界,按照绿色线作右侧对称,要利用的肯定是这三条线构成的区域和他们的向下的区域,而不是一直向左,一直偏向一个方向这肯定是错误的,这是悬挂式列车,列车悬挂在轨道下方,所以你是给列车往下方做限界,也就是点云中限界是向下作,你现在的全是偏轨道左或者右的一侧,另一侧不使用,同样地,点云的限界也是按照思路重新修改,现在的方向完全是错误的,还有就是限界点云颜色不要这么多,就只要黑白对比点云和背景,然后限界是其他颜色就行,同时限界要向立方体的面,是实际的墙一样,可以调整透明度,不是现在切换不同角度才能看到一个面,理解吗,同时你要理解的是,轨道只是参考,你需要将限界和点云结合起来,限界是沿着轨道点云延申,不是现在的随着图像分割方向延伸,而不考虑点云走向,现在还不能指定帧,只能通过seed随机抽取
你这个“限界一直偏到左/右侧”的根因,本质上是 轨道坐标系的方向(t/n/up)算错了:
你原来是用中心线(绿线)做 ray-plane 得到一串 3D 点,然后用 PCA/端点差去估计 t(沿线方向)。但 PCA 很容易在某些帧里把主方向拿成“横向”(或者符号不稳定),接着 n = up × t 就会变成“沿线/横向乱跳”,导致你看到黄色限界在点云里跑到一侧,看起来像“只用了一边”。
另外你说的黄色梯形框,其实是 ROI(只为限制边缘提取的搜索区域),确实不应该让人误会成“限界截面”。
你要的行为是:
我现在优先使用 左/右边缘(蓝/红) 做 ray-plane 求交:
up(点云决定“向上”方向)pL(y), pR(y)pC(y) = (pL+pR)/2(保证严格居中,不会偏向一边)pR - pL(跨轨道宽度方向),并投影到平面t = n × up(这样 t 一定在轨道平面内、并且与横向正交)(l,v) 上构建,其中 v<0 表示向下(悬挂式列车正确)我把脚本整理成一个新的完整文件:
intrusion_clearance_pidnet_o3d.py
👉 下载: intrusion_clearance_pidnet_o3d.py
bashpython intrusion_clearance_pidnet_o3d.py --list_frames
bashpython intrusion_clearance_pidnet_o3d.py --frame_id 2025042116142702
bashpython intrusion_clearance_pidnet_o3d.py --frame_index 0 python intrusion_clearance_pidnet_o3d.py --frame_index -1 # 最后一帧
bashpython intrusion_clearance_pidnet_o3d.py --frame_id 2025042116142702 --roi_enable 0
bashpython intrusion_clearance_pidnet_o3d.py --frame_id 2025042116142702 --mesh_alpha 0.25
输出目录:debug_intrusion_pidnet/<frame_id>/
12_clearance_overlay.jpg:把若干个八边形截面投影回图像(你能直观看到它是否居中、对称、向下)13_clearance_tunnel.ply:三角网格“隧道包络体”,可用 MeshLab/CloudCompare/Open3D 打开Open3D 会弹出窗口,默认:
你上传的《悬挂式单轨交通设计规范》里给了限界制定的基本参数范围:
车辆最大宽度 2300~2500mm、车体底部距轨面高度 3400~3700mm 等(单轨设计.pdf)。
脚本默认用的是上限(2.5m、3.7m),同时把这些都做成了命令行参数,你可以直接按你的车型/工况改:
bashpython intrusion_clearance_pidnet_o3d.py \ --frame_id 2025042116142702 \ --vehicle_width_m 2.4 \ --vehicle_bottom_h_m 3.6 \ --side_margin_m 0.05 \ --height_extra_m 0.20
另外,规范附录 A/B/C 里其实给了更精细的 Y-Z 坐标点表(车辆限界/设备限界),例如附录 B 的表格就是一串坐标点(单轨设计.pdf)。
你如果希望“截面严格按规范坐标点(不一定是八边形)”,我也可以在这份脚本上再加一个--profile_from_spec A/B/C --profile_type vehicle/equipment,直接用那些点生成截面(并镜像成左右对称),但你当前明确要求“八边形”,所以我先按“八边形 + 参数化”把方向/对称/向下/隧道体这些关键问题彻底修正了。
(左边缘交点 + 右边缘交点)/2n 用左右边缘决定,t = n × up--frame_id / --frame_index / --list_frames如果你跑完后发现 12_clearance_overlay.jpg 里八边形顶边没贴住轨道(顶边太宽/太窄),你把那张图和 intrusion_metrics.json 里 track_dbg 的 half_w_track、px_per_meter_est 发我,我可以再把“顶边宽度=轨道梁/轨道板真实宽度+安全间隙”的那一套,直接从你点云里估计得更稳定(比如用 rail_pts 的横向分位数来替代 mask 边缘宽度)。