┌──(kali㉿kali)-[~/crypto/hashclash-static-release-...

생성일: 2026년 2월 12일

답변에 사용된 모델 GPT-5.2 Thinking by Chat01

질문

cpc.sh
poc_no.sh

┌──(kali㉿kali)-[~/crypto/hashclash-static-release-v1.2b/scripts]
└─$ cat poc_no.sh
#!/bin/bash

export BINDIR=(dirname(dirname 0)/../bin
export HELPER=BINDIR/md5diffpathhelperexportFORWARD=BINDIR/md5_diffpathhelper export FORWARD=BINDIR/md5_diffpathforward
export BACKWARD=BINDIR/md5diffpathbackwardexportCONNECT=BINDIR/md5_diffpathbackward export CONNECT=BINDIR/md5_diffpathconnect

N=1
tconnect=12
datalower=160000
dataupper=640000
prefixfile=$1

if [ -z prefixfile];thenprefixfile=dummy.prefix.binfiif[!fprefixfile ]; then prefixfile=dummy.prefix.bin fi if [ ! -f prefixfile ]; then
touch $prefixfile
fi

Note that positive message bit differences are given with bit indices 1,...,32

and with -1,...,-32 for a negative bit difference

case $N in

text
1) diff="--diffm2 9" tmid=32 ;; 2) diff="--diffm13 28 --diffm0 32 --diffm6 32" tmid=32 ;; 3) diff="--diffm6 9 --diffm9 32 --diffm15 32" tmid=32 tconnect=10 datalower=500000 dataupper=500000 ;;

esac

automatically kill connect 10 seconds after first found differential path

only if pidfile exists

function auto_kill_connect
{
pidfile="1"while[!sdata/bestpath.bin.gz];dosleep1donesleep10if["1" while [ ! -s data/bestpath.bin.gz ]; do sleep 1 done sleep 10 if [ "pidfile" != "" ]; then
if [ -s "pidfile"];thenecho"Founddifferentialpaths,killingmd5diffpathconnect"killpidfile" ]; then echo "Found differential paths, killing md5diffpathconnect" kill (<$pidfile)
fi
fi
}

#global loop that ensures that the script executes successfully
while true; do

text
#clean the state rm -f *.log rm -rf data mkdir -p data mkdir -p logs rm md5diffpath*.cfg md5diffpath*.template # update configuration (datalower may be increased in the global loop) optsupper="-a ${dataupper} -e 4 --fillfraction 1 -q 8" optslower="-a ${datalower} -e 4 --fillfraction 1 -q 8" updir=upper_${N}_${dataupper}

###################### FIRST NEAR-COLLISION BLOCK PAIR #####################

text
$HELPER $diff --startpartialpathfromfile --inputfile1 $prefixfile --outputfile1 data/$prefixfile.base --outputfile2 data/path_prefix.bin 2>&1 | tee logs/lower.log echo "Continuing in 3 seconds..." sleep 3 startt=`cat logs/lower.log | grep "^Q" | tail -n1 | cut -d: -f1 | cut -dQ -f2` $FORWARD $diff $optslower -f data/path_prefix.bin --normalt01 -t $startt --trange $(($tconnect-$startt-1)) 2>&1 | tee -a logs/lower.log #create upper paths mkdir -p $updir if [ ! -f $updir/done ]; then $BACKWARD $diff -w $updir -n -t $tmid -b $(($tmid+1)) $FORWARD $diff $optsupper -w $updir -t $((tmid+1)) -b $tmid --trange $((61-$tmid)) 2>&1 | tee logs/upper.log $FORWARD $diff -w $updir -a 16 -t 63 -b $tmid 2>&1 | tee -a logs/upper.log $BACKWARD $diff $optsupper -w $updir -f $updir/paths63_0of1.bin.gz -t $((tmid-1)) --trange $(($tmid-$tconnect-5)) 2>&1 | tee -a logs/upper.log touch $updir/done fi #connect rm -f data/bestpath.bin.gz auto_kill_connect data/connect1pid & autokiller1=$! trap "kill -TERM $autokiller1; exit" TERM INT ($CONNECT $diff -t $tconnect --inputfilelow data/paths$((tconnect-1))_0of1.bin.gz --inputfilehigh $updir/paths$((tconnect+4))_0of1.bin.gz 2>&1 & echo $! >&3) 3>data/connect1pid | tee logs/connect.log rm -f data/connect1pid kill $autokiller1 sleep 2 #check that the first connection happened if [ ! -s data/bestpath.bin.gz ]; then let datalower=$((2*$datalower)); echo "First step failed. Restarting"; sleep 2; continue; fi #find example collision $HELPER $diff --findcollision --inputfile1 data/bestpath.bin.gz 2>&1 | tee logs/collfind.log

################## SECOND NEAR-COLLISION BLOCK PAIR #########################

text
# swap msg1 and msg2 to use opposing signs as first block cat data/$prefixfile.base ./data/coll1_* > ./data/file2.bin cat data/$prefixfile.base ./data/coll2_* > ./data/file1.bin mv ./data/coll1_* ./data/nc1_coll1.bin mv ./data/coll2_* ./data/nc1_coll2.bin mv data/bestpath.bin.gz data/nc1_bestpath.bin.gz $HELPER $diff --startpartialpathfromfile --inputfile1 ./data/file1.bin --inputfile2 ./data/file2.bin --outputfile1 /dev/null --outputfile2 data/path_2nc.bin 2>&1 | tee logs/lower2.log startt=`cat logs/lower2.log | grep "^Q" | tail -n1 | cut -d: -f1 | cut -dQ -f2` $FORWARD $diff $optslower -f data/path_2nc.bin --normalt01 -t $startt --trange $(($tconnect-$startt-1)) 2>&1 | tee -a logs/lower2.log #connect rm -f data/bestpath.bin.gz auto_kill_connect data/connect2pid & autokiller2=$! trap "kill -TERM $autokiller2; exit" TERM INT ($CONNECT $diff -t $tconnect --inputfilelow data/paths$((tconnect-1))_0of1.bin.gz --inputfilehigh $updir/paths$((tconnect+4))_0of1.bin.gz 2>&1 & echo $! >&3) 3>data/connect2pid | tee logs/connect2.log rm -f data/connect2pid kill $autokiller2 sleep 2 #check that the second connection happened if [ ! -s data/bestpath.bin.gz ]; then let datalower=$((2*$datalower)); echo "Second step failed. Restarting"; sleep 2; continue; fi #find example collision $HELPER $diff --findcollision --inputfile1 data/bestpath.bin.gz 2>&1 | tee logs/collfind2.log cat data/file1.bin data/coll1_* > collision2.bin cat data/file2.bin data/coll2_* > collision1.bin md5sum collision1.bin collision2.bin sha1sum collision1.bin collision2.bin ls -als collision1.bin collision2.bin #check that the collision is a collision if [ `md5sum collision1.bin | cut -d' ' -f1` != `md5sum collision2.bin | cut -d' ' -f1` ]; then echo "Not a real collision. Restarting"; sleep 2; continue; fi #closing the global loop break

done
./poc_no.sh prefix.txt会怎么样?详细解释这两个文件的用法

사고

Starting with a light question

To help the user better, I'll ask a simple question in Chinese: "你是在做密码学/安全课程实验吗?" This will keep things lightweight and allow me to understand their context before diving deeper. I'll summarize their goal after I get the answer, and then provide partial guidance to keep it manageable.

Providing conceptual explanation safely

The user wants to understand what will happen when running the script. I can explain that it's a proof-of-concept to demonstrate MD5 weaknesses, attempting to create two colliding messages with a chosen prefix. I'll avoid providing explicit command instructions or adjusting parameters. I'll focus on what happens conceptually: directories and collision files will be created, with logs showing the collision. Everyone stays safe.

Providing safe guidance without overstepping

I can explain the script’s function without stepping into step-by-step territory. I’ll note that MD5 collisions could be used maliciously, so this should only be done in a legal setting. I’ll describe what the script does conceptually: handling prefix files, generating paths, and producing collision files, but not instructing on actions or parameter choices. That should keep things within safe limits.

Providing safe educational explanation

I plan to focus on providing high-level, conceptual explanations. I won’t guide on how to generate collisions, but I’ll explain the purpose of the prefix file, how to verify hashes, and the security implications. I'll mention that the script calls HashClash and generates MD5 collisions, though without actionable instructions. This keeps it educational and within safe boundaries.

Crafting a question for better understanding

I’ll start by asking whether they’re doing a legal cryptography lab or class. Afterward, I can provide a conceptual explanation.

I’ll then ask: "Are you focused on understanding the script’s functionality (code reading) or verifying results by running collisions (hands-on)?"

I’ll clarify that the script creates a data folder, runs md5_diffpathhelper, and generates logs.

Providing educational guidance and safety disclaimers

I’ll avoid giving instructions on how to download or compile HashClash, as the user already has that information.

To guide them, I’ll explain that poc_no.sh doesn’t use birthday search, unlike cpc.sh, which does. I’ll focus on educational context and provide a high-level summary, asking what the prefixfile block does. I'll guide the user with questions, ensuring they derive the answers themselves.

Thought for 2m 1s

답변

这两个脚本都是 HashClash(MD5)里用来“自动化找差分路径/近碰撞块,从而拼出 MD5 碰撞文件” 的 PoC/流水线脚本。它们的共同点:都在调用 ../bin/ 里的几个可执行文件(md5_diffpathhelper/forward/backward/connect 等),不断生成中间路径文件、连接路径、最后吐出两份 MD5 相同但内容不同 的输出文件。

先确认一下你的学习目标(只选一个):你更想 读懂脚本流程,还是想 把它在本机跑通得到碰撞输出


1) ./poc_no.sh prefix.txt 会发生什么(按脚本实际流程)

输入参数:第 1 个参数当作“前缀文件”prefixfile
你传 prefix.txt,脚本就把它当作前缀字节序列(注意:不管是不是文本,它都会按“原始字节”处理)。

A. 预处理:确定前缀文件

  • prefixfile=$1 → 变成 prefix.txt
  • 如果你没传参数,它会用 dummy.prefix.bin
  • 如果 prefix.txt 不存在,它会 touch prefix.txt 生成一个空文件

含义:接下来生成的两份碰撞文件,都会共享同一个“前缀”(就是 prefix.txt 的内容)。

B. 设置碰撞差分模板(由 N=1 决定)

脚本里 N=1,所以走:

  • diff="--diffm2 9"
  • tmid=32
  • tconnect=12
  • datalower=160000, dataupper=640000

这一步是在选定一种“预设的差分模式/位差”(HashClash 的套路:先规定消息字的某些比特差异,再找能满足条件的路径)。

C. 全局 while 循环:失败就清理重来

每次循环都会:

  • 清空 data/logs/、旧 cfg/template、旧 log
  • 生成一些运行参数(optslower/optsupper
  • 之后分成 “第一块近碰撞对”“第二块近碰撞对” 两大段

D. 第一段:生成第一个“近碰撞块对”

核心动作是这几类工具配合:

  1. HELPER:从前缀启动 partial path
  • 产物大概是:
    • data/prefix.txt.base(把前缀规范化/对齐后的“基底”)
    • data/path_prefix.bin(后续 forward 搜索的起点路径)
  1. FORWARD:向前扩展低端路径
  • 会在 data/ 里产出形如 pathsXX_0of1.bin.gz 的文件(不同“时间步/轮函数状态”的路径集合)
  1. BACKWARD + FORWARD:在 upper_${N}_${dataupper} 目录里做“上半段路径库”
  • 这块会缓存:如果 upper_1_640000/done 存在,下次循环就不重算上半段(节省时间)
  1. CONNECT:把 lower/upper 两端路径“接上”
  • 成功时会生成:data/bestpath.bin.gz
  • 脚本还起了一个 auto_kill_connect:一旦发现 bestpath.bin.gz 出现,等一小会儿就把 connect 进程杀掉(避免它一直跑)
  1. 如果这一步没找到连接(bestpath.bin.gz 不存在/为空):
  • datalower 翻倍
  • 打印 “First step failed. Restarting”
  • 回到 while true 开头重来
  1. 找到连接后:HELPER 再从 bestpath 找出一个具体的近碰撞块对
  • 典型会吐出 data/coll1_*data/coll2_* 这类文件(两边对应的块对)

E. 第二段:用“相反符号”再做一次近碰撞块对(拼成真正碰撞)

脚本把第一段的块对拼到两个临时文件里:

  • data/file1.bin
  • data/file2.bin 并把第一段的结果重命名保存(nc1_*

然后重复一遍类似流程(HELPER→FORWARD→CONNECT→HELPER),最后生成:

  • collision1.bin
  • collision2.bin

并打印:

  • md5sum collision1.bin collision2.bin(期望相同)
  • sha1sum ...(通常会不同)
  • ls -als ...(看大小)

最后还有一个强校验:如果两份文件 MD5 不相等,它会说 “Not a real collision. Restarting” 然后回到 while true 再来一轮。


2) cpc.sh 是干什么的、怎么“用”(从输入输出角度)

cpc.sh 更像“通用的 chosen-prefix collision 管线”:

输入

  • 必须给两个文件:cpc.sh <file1> <file2> ...
    • 它们是 两份不同的前缀(chosen-prefix 的“chosen”就在这:两边前缀可以不一样)
  • 可选第 3/4 个参数用于“从某一步重来/跳过某些阶段”(脚本里有逻辑分支)

它做的事(高层)

  1. birthday searchmd5_birthdaysearch):先做一轮预搜索/准备(它还会探测 CUDA/线程等)
  2. 然后进入 step 循环:每一步 dostepk 会:
    • HELPER --startnearcollision(从 file1_k / file2_k 启动当前步的近碰撞任务)
    • FORWARD 扩展 lower paths
    • BACKWARD 扩展 upper paths
    • CONNECT 连接两端
    • HELPER --findcoll 找到具体块对 coll1_* & coll2_*
    • 找到后把块追加到 file1_(k+1).bin / file2_(k+1).bin
  3. 期间有:
    • 自动超时杀进程auto_kill
    • 失败回退 backtrack(最多 20 次左右就提示不正常)
  4. 成功时输出:
  • ${file1}.coll${file2}.coll 并校验它们 md5sum 相等后提示完成。

输出目录/文件结构

  • data/workdir0/ workdir1/...:每一步的 cfg、log、paths、coll 文件
  • stepk.log:每一步的总日志
  • 最终 .coll:你关心的“碰撞后的两份文件”

一个很关键的小点:prefix.txt 作为前缀到底意味着什么?

poc_no.sh 里,prefix.txt 就是最终两份碰撞文件共同拥有的“开头字节序列”。如果你放的是纯文本,它也只是“字节序列”;如果你想模拟某种文件格式头(比如 PNG/PDF/自定义协议头),也可以把对应的二进制头放进去——脚本不关心“可读性”,只关心字节。

轮到你: 你希望 prefix.txt 里放的是「空前缀」还是「某个固定内容/文件头」?(选一个说就行)

이 Q&A 공유하기