┌──(kali㉿kali)-[~/crypto/hashclash-static-release-...

Question

┌──(kali㉿kali)-[~/crypto/hashclash-static-release-v1.2b/scripts]
└─$ cat poc_no.sh
#!/bin/bash

export BINDIR= $(dirname$ 0)/../bin
export HELPER= $BINDIR/md5_diffpathhelper export FORWARD=$ BINDIR/md5_diffpathforward
export BACKWARD= $BINDIR/md5_diffpathbackward export CONNECT=$ BINDIR/md5_diffpathconnect

N=1
tconnect=12
datalower=160000
dataupper=640000
prefixfile=$1

if [ -z $prefixfile ]; then prefixfile=dummy.prefix.bin fi if [ ! -f$ prefixfile ]; then
touch $prefixfile
fi

Note that positive message bit differences are given with bit indices 1,...,32

and with -1,...,-32 for a negative bit difference

case $N in

text
    1)
    diff="--diffm2 9"
    tmid=32
    ;;

    2)
    diff="--diffm13 28 --diffm0 32 --diffm6 32"
    tmid=32
    ;;

    3)
    diff="--diffm6 9 --diffm9 32 --diffm15 32"
    tmid=32
    tconnect=10
    datalower=500000
    dataupper=500000
    ;;

esac

automatically kill connect 10 seconds after first found differential path

only if pidfile exists

function auto_kill_connect
{
pidfile=" $1" while [ ! -s data/bestpath.bin.gz ]; do sleep 1 done sleep 10 if [ "$ pidfile" != "" ]; then
if [ -s " $pidfile" ]; then echo "Found differential paths, killing md5diffpathconnect" kill$ (<$pidfile)
fi
fi
}

#global loop that ensures that the script executes successfully
while true; do

text
    #clean the state
    rm -f *.log
    rm -rf data
    mkdir -p data
    mkdir -p logs
    rm md5diffpath*.cfg md5diffpath*.template

    # update configuration (datalower may be increased in the global loop)
    optsupper="-a ${dataupper} -e 4 --fillfraction 1 -q 8"
    optslower="-a ${datalower} -e 4 --fillfraction 1 -q 8"
    updir=upper_${N}_${dataupper}

###################### FIRST NEAR-COLLISION BLOCK PAIR #####################

text
    $HELPER $diff --startpartialpathfromfile --inputfile1 $prefixfile --outputfile1 data/$prefixfile.base --outputfile2 data/path_prefix.bin 2>&1 | tee logs/lower.log
    echo "Continuing in 3 seconds..."
    sleep 3
    startt=`cat logs/lower.log | grep "^Q" | tail -n1 | cut -d: -f1 | cut -dQ -f2`
    $FORWARD $diff $optslower -f data/path_prefix.bin --normalt01 -t $startt --trange $(($tconnect-$startt-1)) 2>&1 | tee -a logs/lower.log

    #create upper paths
    mkdir -p  $updir
    if [ ! -f $updir/done ]; then
            $BACKWARD $diff -w $updir -n -t $tmid -b $(($tmid+1))
            $FORWARD $diff $optsupper -w $updir -t $((tmid+1)) -b $tmid --trange $((61-$tmid)) 2>&1 | tee logs/upper.log
            $FORWARD $diff -w $updir -a 16 -t 63 -b $tmid 2>&1 | tee -a logs/upper.log

            $BACKWARD $diff $optsupper -w $updir -f $updir/paths63_0of1.bin.gz -t $((tmid-1)) --trange $(($tmid-$tconnect-5)) 2>&1 | tee -a logs/upper.log
            touch $updir/done
    fi

    #connect
    rm -f data/bestpath.bin.gz
    auto_kill_connect data/connect1pid &
    autokiller1=$!
    trap "kill -TERM $autokiller1; exit" TERM INT
    ($CONNECT $diff -t $tconnect --inputfilelow data/paths$((tconnect-1))_0of1.bin.gz --inputfilehigh $updir/paths$((tconnect+4))_0of1.bin.gz 2>&1 & echo $! >&3) 3>data/connect1pid | tee logs/connect.log
    rm -f data/connect1pid
    kill $autokiller1
    sleep 2

    #check that the first connection happened
    if [ ! -s data/bestpath.bin.gz ]; then let datalower=$((2*$datalower)); echo "First step failed. Restarting"; sleep 2; continue; fi


    #find example collision
    $HELPER $diff --findcollision --inputfile1 data/bestpath.bin.gz 2>&1 | tee logs/collfind.log

################## SECOND NEAR-COLLISION BLOCK PAIR #########################

text
    # swap msg1 and msg2 to use opposing signs as first block
    cat data/$prefixfile.base ./data/coll1_* > ./data/file2.bin
    cat data/$prefixfile.base ./data/coll2_* > ./data/file1.bin
    mv ./data/coll1_* ./data/nc1_coll1.bin
    mv ./data/coll2_* ./data/nc1_coll2.bin
    mv data/bestpath.bin.gz data/nc1_bestpath.bin.gz

    $HELPER $diff --startpartialpathfromfile --inputfile1 ./data/file1.bin --inputfile2 ./data/file2.bin --outputfile1 /dev/null --outputfile2 data/path_2nc.bin 2>&1 | tee logs/lower2.log
    startt=`cat logs/lower2.log | grep "^Q" | tail -n1 | cut -d: -f1 | cut -dQ -f2`
    $FORWARD $diff $optslower -f data/path_2nc.bin --normalt01 -t $startt --trange $(($tconnect-$startt-1)) 2>&1 | tee -a logs/lower2.log

    #connect
    rm -f data/bestpath.bin.gz
    auto_kill_connect data/connect2pid &
    autokiller2=$!
    trap "kill -TERM $autokiller2; exit" TERM INT
    ($CONNECT $diff -t $tconnect --inputfilelow data/paths$((tconnect-1))_0of1.bin.gz --inputfilehigh $updir/paths$((tconnect+4))_0of1.bin.gz 2>&1 & echo $! >&3) 3>data/connect2pid | tee logs/connect2.log
    rm -f data/connect2pid
    kill $autokiller2
    sleep 2

    #check that the second connection happened
    if [ ! -s data/bestpath.bin.gz ]; then let datalower=$((2*$datalower)); echo "Second step failed. Restarting"; sleep 2; continue; fi

    #find example collision
    $HELPER $diff --findcollision --inputfile1 data/bestpath.bin.gz 2>&1 | tee logs/collfind2.log

    cat data/file1.bin data/coll1_* > collision2.bin
    cat data/file2.bin data/coll2_* > collision1.bin
    md5sum collision1.bin collision2.bin
    sha1sum collision1.bin collision2.bin
    ls -als collision1.bin collision2.bin

    #check that the collision is a collision
    if [ `md5sum collision1.bin | cut -d' ' -f1` != `md5sum collision2.bin | cut -d' ' -f1` ]; then echo "Not a real collision. Restarting"; sleep 2; continue; fi

    #closing the global loop
    break

done
./poc_no.sh prefix.txt会怎么样？详细解释这两个文件的用法

Answer 1

这两个脚本都是 HashClash（MD5）里用来“自动化找差分路径/近碰撞块，从而拼出 MD5 碰撞文件” 的 PoC/流水线脚本。它们的共同点：都在调用 ../bin/ 里的几个可执行文件（md5_diffpathhelper/forward/backward/connect 等），不断生成中间路径文件、连接路径、最后吐出两份 MD5 相同但内容不同 的输出文件。

先确认一下你的学习目标（只选一个）：你更想 读懂脚本流程，还是想 把它在本机跑通得到碰撞输出？

1) `./poc_no.sh prefix.txt` 会发生什么（按脚本实际流程）

输入参数：第 1 个参数当作“前缀文件”prefixfile。
你传 prefix.txt，脚本就把它当作前缀字节序列（注意：不管是不是文本，它都会按“原始字节”处理）。

A. 预处理：确定前缀文件

prefixfile=$1 → 变成 prefix.txt
如果你没传参数，它会用 dummy.prefix.bin
如果 prefix.txt 不存在，它会 touch prefix.txt 生成一个空文件

含义：接下来生成的两份碰撞文件，都会共享同一个“前缀”（就是 prefix.txt 的内容）。

B. 设置碰撞差分模板（由 `N=1` 决定）

脚本里 N=1，所以走：

diff="--diffm2 9"
tmid=32
tconnect=12
datalower=160000, dataupper=640000

这一步是在选定一种“预设的差分模式/位差”（HashClash 的套路：先规定消息字的某些比特差异，再找能满足条件的路径）。

C. 全局 while 循环：失败就清理重来

每次循环都会：

清空 data/、logs/、旧 cfg/template、旧 log
生成一些运行参数（optslower/optsupper）
之后分成 “第一块近碰撞对” 和 “第二块近碰撞对” 两大段

D. 第一段：生成第一个“近碰撞块对”

核心动作是这几类工具配合：

HELPER：从前缀启动 partial path

产物大概是：
- data/prefix.txt.base（把前缀规范化/对齐后的“基底”）
- data/path_prefix.bin（后续 forward 搜索的起点路径）

FORWARD：向前扩展低端路径

会在 data/ 里产出形如 pathsXX_0of1.bin.gz 的文件（不同“时间步/轮函数状态”的路径集合）

BACKWARD + FORWARD：在 upper_${N}_${dataupper} 目录里做“上半段路径库”

这块会缓存：如果 upper_1_640000/done 存在，下次循环就不重算上半段（节省时间）

CONNECT：把 lower/upper 两端路径“接上”

成功时会生成：data/bestpath.bin.gz
脚本还起了一个 auto_kill_connect：一旦发现 bestpath.bin.gz 出现，等一小会儿就把 connect 进程杀掉（避免它一直跑）

如果这一步没找到连接（bestpath.bin.gz 不存在/为空）：

datalower 翻倍
打印 “First step failed. Restarting”
回到 while true 开头重来

找到连接后：HELPER 再从 bestpath 找出一个具体的近碰撞块对

典型会吐出 data/coll1_* 和 data/coll2_* 这类文件（两边对应的块对）

E. 第二段：用“相反符号”再做一次近碰撞块对（拼成真正碰撞）

脚本把第一段的块对拼到两个临时文件里：

data/file1.bin
data/file2.bin 并把第一段的结果重命名保存（nc1_*）

然后重复一遍类似流程（HELPER→FORWARD→CONNECT→HELPER），最后生成：

collision1.bin
collision2.bin

并打印：

md5sum collision1.bin collision2.bin（期望相同）
sha1sum ...（通常会不同）
ls -als ...（看大小）

最后还有一个强校验：如果两份文件 MD5 不相等，它会说 “Not a real collision. Restarting” 然后回到 while true 再来一轮。

2) `cpc.sh` 是干什么的、怎么“用”（从输入输出角度）

cpc.sh 更像“通用的 chosen-prefix collision 管线”：

输入

必须给两个文件：cpc.sh <file1> <file2> ...
- 它们是 两份不同的前缀（chosen-prefix 的“chosen”就在这：两边前缀可以不一样）
可选第 3/4 个参数用于“从某一步重来/跳过某些阶段”（脚本里有逻辑分支）

它做的事（高层）

birthday search（md5_birthdaysearch）：先做一轮预搜索/准备（它还会探测 CUDA/线程等）
然后进入 step 循环：每一步 dostepk 会：
- HELPER --startnearcollision（从 file1_k / file2_k 启动当前步的近碰撞任务）
- FORWARD 扩展 lower paths
- BACKWARD 扩展 upper paths
- CONNECT 连接两端
- HELPER --findcoll 找到具体块对 coll1_* & coll2_*
- 找到后把块追加到 file1_(k+1).bin / file2_(k+1).bin
期间有：
- 自动超时杀进程（auto_kill）
- 失败回退 backtrack（最多 20 次左右就提示不正常）
成功时输出：

${file1}.coll 和 ${file2}.coll 并校验它们 md5sum 相等后提示完成。

输出目录/文件结构

data/、workdir0/ workdir1/...：每一步的 cfg、log、paths、coll 文件
stepk.log：每一步的总日志
最终 .coll：你关心的“碰撞后的两份文件”

一个很关键的小点：`prefix.txt` 作为前缀到底意味着什么？

在 poc_no.sh 里，prefix.txt 就是最终两份碰撞文件共同拥有的“开头字节序列”。如果你放的是纯文本，它也只是“字节序列”；如果你想模拟某种文件格式头（比如 PNG/PDF/自定义协议头），也可以把对应的二进制头放进去——脚本不关心“可读性”，只关心字节。

轮到你： 你希望 prefix.txt 里放的是「空前缀」还是「某个固定内容/文件头」？（选一个说就行）

┌──(kali㉿kali)-[~/crypto/hashclash-static-release-...

질문

Note that positive message bit differences are given with bit indices 1,...,32

and with -1,...,-32 for a negative bit difference

automatically kill connect 10 seconds after first found differential path

only if pidfile exists

사고

Starting with a light question

Providing conceptual explanation safely

Providing safe guidance without overstepping

Providing safe educational explanation

Crafting a question for better understanding

Providing educational guidance and safety disclaimers

답변

1) `./poc_no.sh prefix.txt` 会发生什么（按脚本实际流程）

A. 预处理：确定前缀文件

B. 设置碰撞差分模板（由 `N=1` 决定）

C. 全局 while 循环：失败就清理重来

D. 第一段：生成第一个“近碰撞块对”

E. 第二段：用“相反符号”再做一次近碰撞块对（拼成真正碰撞）

2) `cpc.sh` 是干什么的、怎么“用”（从输入输出角度）

输入

它做的事（高层）

输出目录/文件结构

一个很关键的小点：`prefix.txt` 作为前缀到底意味着什么？

이 Q&A 공유하기

질문

Note that positive message bit differences are given with bit indices 1,...,32

and with -1,...,-32 for a negative bit difference

automatically kill connect 10 seconds after first found differential path

only if pidfile exists

사고

Starting with a light question

Providing conceptual explanation safely

Providing safe guidance without overstepping

Providing safe educational explanation

Crafting a question for better understanding

Providing educational guidance and safety disclaimers

답변

1) ./poc_no.sh prefix.txt 会发生什么（按脚本实际流程）

A. 预处理：确定前缀文件

B. 设置碰撞差分模板（由 N=1 决定）

C. 全局 while 循环：失败就清理重来

D. 第一段：生成第一个“近碰撞块对”

E. 第二段：用“相反符号”再做一次近碰撞块对（拼成真正碰撞）

2) cpc.sh 是干什么的、怎么“用”（从输入输出角度）

输入

它做的事（高层）

输出目录/文件结构

一个很关键的小点：prefix.txt 作为前缀到底意味着什么？

이 Q&A 공유하기

1) `./poc_no.sh prefix.txt` 会发生什么（按脚本实际流程）

B. 设置碰撞差分模板（由 `N=1` 决定）

2) `cpc.sh` 是干什么的、怎么“用”（从输入输出角度）

一个很关键的小点：`prefix.txt` 作为前缀到底意味着什么？