def get_class_index_map(class_info_path='/home/tan...
创建于:2025年5月27日
创建于:2025年5月27日
def get_class_index_map(class_info_path='/home/tanggq/ASD_model/ASD_annotations/Class Index_Detection.txt'):
txt = np.loadtxt(class_info_path, dtype=str)
originidx_to_idx = {}
idx_to_class = {}
for idx, l in enumerate(txt):
originidx_to_idx[int(l[0])] = idx + 1
idx_to_class[idx + 1] = l[1]
print('ASD')
return originidx_to_idx, idx_to_class
def get_video_info(video_info_path):
df_info = pd.DataFrame(pd.read_csv(video_info_path)).values[:]
video_infos = {}
for info in df_info:
video_infos[info[0]] = {
'fps': info[1],
'sample_fps': info[2],
'count': info[3],
'sample_count': info[4]
}
return video_infos
def get_video_anno(video_infos,
video_anno_path):
df_anno = pd.DataFrame(pd.read_csv(video_anno_path)).values[:]
originidx_to_idx, idx_to_class = get_class_index_map()
video_annos = {}
for anno in df_anno:
video_name = anno[0]
originidx = anno[2]
start_frame = anno[-2]
end_frame = anno[-1]
count = video_infos[video_name]['count']
sample_count = video_infos[video_name]['sample_count']
ratio = sample_count * 1.0 / count
start_gt = start_frame * ratio
end_gt = end_frame * ratio
class_idx = originidx_to_idx[originidx]
if video_annos.get(video_name) is None:
video_annos[video_name] = [[start_gt, end_gt, class_idx]]
else:
video_annos[video_name].append([start_gt, end_gt, class_idx])
return video_annos
def annos_transform(annos, clip_length):
res = []
for anno in annos:
res.append([
anno[0] * 1.0 / clip_length,
anno[1] * 1.0 / clip_length,
anno[2]
])
return res
def split_videos(video_infos,
video_annos,
clip_length=config['dataset']['training']['clip_length'],
stride=config['dataset']['training']['clip_stride']):
# video_infos = get_video_info(config['dataset']['training']['video_info_path'])
# video_annos = get_video_anno(video_infos,
# config['dataset']['training']['video_anno_path'])
training_list = []
min_anno_dict = {}
for video_name in video_annos.keys():
min_anno = clip_length
sample_count = video_infos[video_name]['sample_count']
annos = video_annos[video_name]
if sample_count <= clip_length:
offsetlist = [0]
min_anno_len = min([x[1] - x[0] for x in annos])
if min_anno_len < min_anno:
min_anno = min_anno_len
else:
offsetlist = list(range(0, sample_count - clip_length + 1, stride))
if (sample_count - clip_length) % stride:
offsetlist += [sample_count - clip_length]
for offset in offsetlist:
left, right = offset + 1, offset + clip_length
cur_annos = []
save_offset = False
for i,anno in enumerate(annos):
max_l = max(left, anno[0])
min_r = min(right, anno[1])
ioa = (min_r - max_l) * 1.0 / (anno[1] - anno[0])
if ioa >= 1.0:
save_offset = True
if ioa >= 0.5:
cur_annos.append([max(anno[0] - offset, 1),
min(anno[1] - offset, clip_length),
anno[2]])
if len(cur_annos) > 0:
min_anno_len = min([x[1] - x[0] for x in cur_annos])
if min_anno_len < min_anno:
min_anno = min_anno_len
textanno_1 = anno if save_offset: start = np.zeros([clip_length]) end = np.zeros([clip_length]) for anno in cur_annos: s, e, id = anno d = max((e - s) / 10.0, 2.0) start_s = np.clip(int(round(s - d / 2.0)), 0, clip_length - 1) start_e = np.clip(int(round(s + d / 2.0)), 0, clip_length - 1) + 1 start[start_s: start_e] = 1 end_s = np.clip(int(round(e - d / 2.0)), 0, clip_length - 1) end_e = np.clip(int(round(e + d / 2.0)), 0, clip_length - 1) + 1 end[end_s: end_e] = 1 training_list.append({ 'video_name': video_name, 'offset': offset, 'annos': cur_annos, 'start': start, 'end': end, 'anno_k': i, 'anno': anno_1 }) min_anno_dict[video_name] = math.ceil(min_anno) return training_list, min_anno_dict
帮我详细介绍spilt_video函数的作用原理,其中clip_length取256,我想知道当我的视频片段较大,比如anno中有些视频持续3分钟也就是接近1800帧时,这个函数对这种片段有什么用,那个ioa的计算我还是不太理解不太认同,我觉得视频长度过大比如导致ioa无法为1
pythonannotation_len = 1800 clip_length = 256 ioas = [] for offset in range(0, annotation_len-clip_length+1, 256): left = offset +1 right = offset + clip_length max_l = max(left,1) min_r = min(right, annotation_len) ioa = (min_r - max_l)/annotation_len ioas.append(ioa) max(ioas) 0.14166666666666666
pythonannotation_len = 1800 clip_length = 256 ioas = [] for offset in range(0, annotation_len-clip_length+1, 256): left = offset +1 right = offset + clip_length max_l = max(left,1) min_r = min(right, annotation_len) ioa = (min_r - max_l)/annotation_len ioas.append(ioa) max(ioas) 0.14166666666666666 0.14166666666666666
下面按 思路—流程—细节—为什么长片段进不来—如何修改 的次序,把 split_videos
的整个工作原理拆开讲(假定 clip_length = 256
)。
clip_length
(256 帧)的训练切片(clip)。training_list
:每一条元素对应一个 clip,其中包含
video_name
:原视频名offset
:窗口在抽帧序列里的起点annos
:本窗口内(或与窗口重叠≥50 %)的所有标注(已重新映射到窗口内坐标)start
, end
:长度为 256 的 0/1 序列,给边界回归网络用min_anno_dict
:记录每个视频中最短标注的长度(帧)步骤 | 关键变量 | 作用 | 说明 |
---|---|---|---|
① 预处理 | video_infos , video_annos | 每个视频的帧数、抽帧数、标注帧段 | 前面几个函数已经准备好 |
② 生成 offsetlist | range(0, sample_count-clip_length+1, stride) | 确定窗口起点 | 如果剩余帧不足一次 stride,会再补一个“尾巴”窗口 |
③ 遍历窗口 | left = offset+1 , right = offset+clip_length | 当前窗口在抽帧序列里的左右边界(闭区间) | 注意与原始帧序列区别 |
④ 计算交集 | ioa = (min_r - max_l) / (anno_len) | IoA = 交/标注长度 | 而不是 IoU,也不是交/窗口长度 |
⑤ 决定是否保留窗口 | save_offset = True 当且仅当 IoA ≥ 1 | 意味着“标注完全包含在窗口内” | |
⑥ 过滤/收集标注 | 只要 IoA ≥ 0.5 就加入 cur_annos | 用于生成 start/end 概率分布 | |
⑦ 生成概率分布 | 以边界 ±d(d=10 % anno_len 或 2 帧)涂 1 | 回归网络更关注靠近边界的帧 | |
⑧ 写入 training_list | 只有 save_offset == True 才真正 append | 保证 clip 至少包含一个完整事件 |
设一条标注长度 L = 1800,窗口长度 W = 256。
任何窗口与这条标注的最大交集 ≤W,因此:
save_offset
永远得不到 True
,于是这些窗口不会写入 training_list
。clip_length
,或切割成多个子事件)。cur_annos
的截断标注。save_offset
不为 True
,长标注被整段忽略。min_anno_len
会更新,但这只用来做数据分析/调参提示。clip_length
提高到 ≥1800。save_offset
条件改宽,例如 ioa ≥ 0.5
也 append。代码片段 | 问题 | 建议 |
---|---|---|
anno_1 = anno 在 for‑loop 外使用 | anno 只保存了循环最后一次的值 | 如果想调试或记录,应该在进入循环前设置局部变量 |
(sample_count - clip_length) % stride | 尾部窗口可能与倒数第二个窗口高度重叠 | 若担心样本重复,可对重叠度做额外过滤 |
max((e-s)/10.0, 2.0) | 当标注极短时,d 可能 <2 帧,但被强制设 2 帧 | 可视情况调参 |
split_videos
的核心门槛就是 IoA ≥ 1
。clip_length
或放宽 save_offset
的判断,抑或先把标注拆分——否则它们永远不会进入训练集。