%=================================================...

Question

%======================================================================
% GLOBAL CONSISTENCY CLEAN VERSION (Five-stand, thickness+tension only,
% actuators: roll gap + stand speed; Δx = deviation, Δu = increment)
%======================================================================

%========================
\section{Construction of Dataset}
%========================

\subsection{Five-stand tandem mill setting and unified notation}
\label{subsec:notation_clean}

Consider a five-stand tandem cold rolling mill indexed by $i\in\{1,2,3,4,5\}$ .
Let $t_n$ denote the $n$ -th sampling instant and $\delta_n=t_{n+1}-t_n$ the corresponding sampling interval length.
Define the local within-interval time variable $\tau=t-t_n\in[0,\delta_n]$ .

\paragraph{Thickness and tension signals (what is controlled).}
Let $h_i(t)$ denote the exit thickness of stand $i$ ( $i=1,\dots,5$ ),
and let $T_i(t)$ denote the inter-stand strip tension between stand $i$ and $i+1$ ( $i=1,\dots,4$ ).
The system is strongly coupled because the inter-stand tensions propagate along the mill line and are affected by neighboring stands' actions.

\paragraph{Reference trajectories and deviation-state definition (meaning of $\Delta x$ ).}
Let $h_i^{\mathrm{ref}}(t)$ and $T_i^{\mathrm{ref}}(t)$ be the desired references (setpoints) given by process requirements
(e.g., schedule-based references or constant setpoints).
We define deviation (tracking-error) variables
\begin{equation}
\Delta h_i(t)\triangleq h_i(t)-h_i^{\mathrm{ref}}(t),\qquad
\Delta T_i(t)\triangleq T_i(t)-T_i^{\mathrm{ref}}(t).
\label{eq:dev_def}
\end{equation}
Throughout this paper, the symbol `` $\Delta$ '' attached to \emph{states} always means \emph{deviation from reference}.

\paragraph{Local state vector (where thickness and tension appear).}
For each stand $i$ , we choose the local deviation state as
\begin{equation}
\Delta x_i(t)\triangleq
\begin{bmatrix}
\Delta h_i(t)\
\Delta T_{i-1}(t)\
\Delta T_i(t)
\end{bmatrix}\in\mathbb{R}^{d},\qquad d=3,
\label{eq:xi_def_clean}
\end{equation}
with the boundary convention $\Delta T_0(t)\equiv 0$ and $\Delta T_5(t)\equiv 0$ to keep a unified dimension $d=3$ for all stands.
(Equivalently, one may remove nonexistent boundary tensions from the state and use varying dimensions; here we keep a unified form.)

\paragraph{Neighbor sets and coupling representation (five-stand chain).}
For a five-stand tandem mill, the dominant coupling is between adjacent stands, hence we define
\begin{equation}
Z_1={2},\quad
Z_i={i-1,i+1}\ (i=2,3,4),\quad
Z_5={4}.
\label{eq:Zi_clean}
\end{equation}
Define the neighbor-state stack
\begin{equation}
\Delta x_{Z_i}(t_n)=\mathrm{col}{\Delta x_k(t_n),|,k\in Z_i}.
\label{eq:xZi_clean}
\end{equation}

\paragraph{Actuators and increment inputs (meaning of $u$ and $\Delta u$ ).}
Each stand $i$ is manipulated by \emph{roll gap} (screw-down/hydraulic gap) $s_i(t)$ and \emph{stand speed} $v_i(t)$ :
\begin{equation}
u_i(t)=
\begin{bmatrix}
s_i(t)\
v_i(t)
\end{bmatrix}\in\mathbb{R}^{n_u},\qquad n_u=2.
\label{eq:ui_clean}
\end{equation}
To ensure smooth actuation and match industrial practice, we optimize \emph{discrete input increments}:
\begin{equation}
\Delta u_i(t_n)\triangleq u_i(t_n)-u_i(t_{n-1})

\begin{bmatrix}
\Delta s_i(t_n)\
\Delta v_i(t_n)
\end{bmatrix}.
\label{eq:du_discrete_clean}
\end{equation}
Throughout this paper, the symbol `` $\Delta$ '' attached to \emph{inputs} $\Delta u_i(t_n)$ means \emph{sample-to-sample increment}.
Thus, $\Delta x$ (deviation state) and $\Delta u$ (input increment) are conceptually different, and this is fixed by definition.

\paragraph{Disturbance.}
Let $d_i(t)$ denote exogenous disturbances (e.g., entry thickness fluctuation, friction variation, material parameter drift, etc.).
We denote the interval-level equivalent disturbance by $\Delta d_i(t_n)$ (defined via interval averaging below).

\paragraph{Basic matrix notation.}
$I_d$ denotes the $d\times d$ identity matrix; $0_{a\times b}$ denotes the $a\times b$ zero matrix.

\subsection{Discrete interval mapping and data-driven learning objective}
\label{subsec:mapping_clean}

The stand-wise deviation-state evolution over $[t_n,t_{n+1}]$ can be expressed by a discrete-time mapping
\begin{equation}
\Delta x_i(t_{n+1})

\Phi_i\Big(\Delta x_i(t_n),,\Delta x_{Z_i}(t_n),,\Delta u_i([t_n,t_{n+1}]),,\Delta d_i([t_n,t_{n+1}])\Big),
\label{eq:true_mapping_clean}
\end{equation}
where $\Phi_i(\cdot)$ is generally nonlinear and coupled due to rolling deformation and tension propagation.
A commonly used \emph{conceptual} equivalent discrete linear form is
\begin{equation}
\Delta x_i(t_{n+1})

M_d,\Delta x_i(t_n)
+
N_d,\Delta u_i(t_n)
+
F_d,\Delta d_i(t_n),
\label{eq:linear_form_concept}
\end{equation}
where $M_d,N_d,F_d$ represent equivalent discrete-time matrices around operating conditions.
In a practical five-stand cold rolling mill, accurately deriving/identifying these matrices and disturbance models from first principles is difficult,
due to strong coupling, unmodeled nonlinearities, and time-varying operating regimes.
Therefore, this paper aims to learn a high-fidelity approximation of the interval evolution from data and then embed it into distributed MPC.

\begin{remark}
In fact, due to the existence of complex coupling relationships, it is difficult to directly and accurately establish \eqref{eq:linear_form_concept}
based on first principles. Therefore, in this paper, we learn an approximate mapping of \eqref{eq:true_mapping_clean} from data.
\end{remark}

%========================
\subsection{Interval-level parameterization and one-step dataset}
%========================

\paragraph{Why interval-level parameterization is reasonable in the five-stand setting.}
Although decisions are updated at discrete instants $t_n$ , the hydraulic gap and drive systems evolve continuously inside each interval,
and abrupt within-interval changes may excite tension oscillations and deteriorate thickness stability.
Thus, parameterizing the within-interval increment trajectory by a low-order polynomial:
(i) yields a compact finite-dimensional decision representation;
(ii) enforces smooth profiles inside the interval;
(iii) enables enforcing increment constraints for all $\tau\in[0,\delta_n]$ .
This is appropriate when $\delta_n$ is not excessively large relative to actuator bandwidth and the within-interval evolution is well approximated by a low-order basis.

\paragraph{Vector quadratic polynomial parameterization (two inputs).}
On the interval $[t_n,t_{n+1}]$ , parameterize the control increment trajectory as
\begin{equation}
\Delta u_{i,n}(\tau;\Gamma_{i,n})

\Gamma_{i,n0}+\Gamma_{i,n1}\tau+\Gamma_{i,n2}\tau^2,
\qquad \tau\in[0,\delta_n],
\label{eq:du_poly_vec_clean}
\end{equation}
where $\Gamma_{i,n0},\Gamma_{i,n1},\Gamma_{i,n2}\in\mathbb{R}^{n_u}$ are coefficient vectors ( $n_u=2$ ).
Component-wise, \eqref{eq:du_poly_vec_clean} corresponds to
\begin{equation}
\begin{aligned}
\Delta s_{i,n}(\tau) &= \gamma^{(s)}{i,n0}+\gamma^{(s)}{i,n1}\tau+\gamma^{(s)}{i,n2}\tau^2,\
\Delta v{i,n}(\tau) &= \gamma^{(v)}{i,n0}+\gamma^{(v)}{i,n1}\tau+\gamma^{(v)}{i,n2}\tau^2.
\end{aligned}
\label{eq:du_components_clean}
\end{equation}
Define the stacked parameter vector
\begin{equation}
\Gamma{i,n}\triangleq
\big[
(\Gamma_{i,n0})^\top,,
(\Gamma_{i,n1})^\top,,
(\Gamma_{i,n2})^\top
\big]^\top
\in\mathbb{R}^{p},
\qquad
p=3n_u=6.
\label{eq:Gamma_clean}
\end{equation}
Here, $\Gamma_{i,n0}$ is the baseline increment at $\tau=0$ , while $\Gamma_{i,n1}$ and $\Gamma_{i,n2}$ describe the linear and quadratic variation rates.

\paragraph{Equivalent discrete-time (interval-averaged) increments.}
Define the interval-averaged equivalent increments as
\begin{equation}
\begin{aligned}
\Delta u_i(t_n) &\triangleq \frac{1}{\delta_n}\int_0^{\delta_n}\Delta u_{i,n}(\tau),d\tau,\
\Delta d_i(t_n) &\triangleq \frac{1}{\delta_n}\int_0^{\delta_n}\Delta d_i(\tau),d\tau.
\end{aligned}
\label{eq:avg_def_clean}
\end{equation}
With \eqref{eq:du_poly_vec_clean}, the input average has a closed form:
\begin{equation}
\Delta u_i(t_n)=
\Gamma_{i,n0}
+\Gamma_{i,n1}\frac{\delta_n}{2}
+\Gamma_{i,n2}\frac{\delta_n^2}{3}.
\label{eq:avg_closed_clean}
\end{equation}

\paragraph{Sampling domains for offline data generation.}
Let $\mathcal{I}_x$ denote the sampling domain (ranges) of deviation states $\Delta x_i(t_n)$ and neighbor stacks $\Delta x_{Z_i}(t_n)$ ,
and let $\mathcal{I}_\Gamma$ denote the sampling domain of polynomial parameters $\Gamma_{i,n}$ (covering both gap and speed channels).
These domains specify the operating envelope used to generate supervised training data.

\paragraph{One-step sample generation (five-stand coupled simulation).}
Given the above parameterization, one training sample is generated on each interval $[t_n,t_{n+1}]$ .
In addition to the local deviation state, the neighbor deviation states are included to represent inter-stand coupling.
The process is summarized in Table~\ref{tab:interval_sample_generation_en}.

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.15}
\caption{Procedure for generating one interval-level sample on $[t_n,t_{n+1}]$ (five-stand coupled mill).}
\label{tab:interval_sample_generation_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.09\linewidth} X}
\toprule
\textbf{Step} & \textbf{Operation} \
\midrule
1 & \textbf{State sampling:} sample $\Delta x_i(t_n)$ and $\Delta x_{Z_i}(t_n)$ from $\mathcal{I}_x$ . \
2 & \textbf{Parameter sampling:} draw $\Gamma_{i,n}\sim\mathcal{I}_\Gamma$ (coefficients for both $\Delta s_{i,n}(\tau)$ and $\Delta v_{i,n}(\tau)$ ). \
3 & \textbf{Control construction:} compute $\Delta u_{i,n}(\tau)$ via \eqref{eq:du_poly_vec_clean}. \
4 & \textbf{State propagation:} integrate the \emph{five-stand coupled} mill model on $[t_n,t_{n+1}]$ (e.g., RK4) using the within-interval control trajectory, and record $\Delta x_i(t_{n+1})$ . \
\bottomrule
\end{tabularx}
\end{table}

Accordingly, an interval sample for subsystem $i$ can be represented as
\begin{equation}
\mathcal{D}{i,n}=\big{\Delta x_i(t_n),\ \Delta x{Z_i}(t_n),\ \Delta u_{i,n}(\tau),\ \Delta x_i(t_{n+1})\big}.
\label{eq:interval_sample_clean}
\end{equation}
Note that $\Delta u_{i,n}(\tau)$ is fully determined by $(\Gamma_{i,n},\delta_n)$ via \eqref{eq:du_poly_vec_clean},
therefore it is sufficient to store $(\Gamma_{i,n},\delta_n)$ as the learning input.

For each subsystem $i$ , by repeating the above procedure across multiple intervals and randomized draws,
the local one-step training dataset is formed as
\begin{equation}
\begin{split}
S_i=\Big{&
\big(\Delta x_i^{(j)}(t_n),,\Delta x_{Z_i}^{(j)}(t_n),,\Delta x_i^{(j)}(t_{n+1});,
\Gamma_{i,n}^{(j)},,\delta_n^{(j)}\big)
\ \Big|\ j=1,\ldots,J
\Big}.
\end{split}
\label{eq:S_i_clean}
\end{equation}
Here $J$ is the number of one-step samples for subsystem $i$ .
The overall dataset for the five-stand mill is denoted by $\{S_i\}_{i=1}^{5}$ .
The point-cloud visualization of the training dataset is shown in Figure~\ref{2}.

\begin{figure*}[htbp]
\centering
\includegraphics[scale=0.5]{picture/Fig2.pdf}
\caption{Point cloud map of the training dataset.}\label{2}
\end{figure*}

%========================
\subsection{Multi-step rollout segment dataset}
%========================

The one-step set $S_i$ is sufficient for one-step regression, but it is not sufficient for training with multi-step rollout loss
and reciprocal-consistency regularization, because these objectives require ground-truth deviation-state trajectories over a horizon of $K$ consecutive intervals.
Therefore, without changing the single-interval sampling mechanism above, we additionally organize the offline-simulated samples
into $K$ -step trajectory segments.

Specifically, during offline simulation, for each starting time $t_n$ we generate a segment of length $K$ by consecutively sampling
$\{\Gamma_{i,n+s},\delta_{n+s}\}_{s=0}^{K-1}$ (and the corresponding inputs/disturbances),
and integrating the five-stand coupled mill model over $[t_{n+s},t_{n+s+1}]$ for $s=0,\ldots,K-1$ .
Hence, we obtain the deviation-state sequence $\{\Delta x_i(t_{n+s})\}_{s=0}^{K}$ as well as the neighbor stacks
$\{\Delta x_{Z_i}(t_{n+s})\}_{s=0}^{K}$ .

Define a $K$ -step segment sample for subsystem $i$ as
\begin{equation}
\begin{aligned}
\mathcal{W}{i,n}=
\Big{&
\big(\Delta x_i(t{n+s}),,\Delta x_{Z_i}(t_{n+s}),,\Gamma_{i,n+s},,\delta_{n+s}\big){s=0}^{K-1}; \
&\big(\Delta x_i(t{n+s+1})\big){s=0}^{K-1}
\Big}.
\end{aligned}
\label{eq:segment_clean}
\end{equation}
By repeating the above segment generation, we form the multi-step training set
\begin{equation}
S_i^{(K)}=\Big{\mathcal{W}{i,n}^{(j)}\ \Big|\ j=1,\ldots,J_K\Big},
\label{eq:S_i_K_clean}
\end{equation}
where $J_K$ is the number of $K$ -step segment samples.
Note that $S_i$ can be viewed as the marginal one-step projection of $S_i^{(K)}$ (keeping only $s=0$ ),
thus the original dataset design is preserved, and only an additional \emph{segment organization} is introduced for multi-step training.

%========================
\section{Construction of Residual Neural Network}
%========================

\subsection{Network architecture (what is learned, why residual, why include control)}
\label{subsec:net_clean}

\paragraph{Learning target (one-step controlled deviation-state evolution).}
Given the dataset, the neural network model is trained to learn a stand-wise, control-dependent one-step evolution law of deviation states:
\begin{equation}
\Delta x_i(t_{n+1})
\approx
\Delta x_i(t_n)+
\mathcal{N}i!\Big(\Delta x_i(t_n),,\Delta x{Z_i}(t_n),,\Gamma_{i,n},,\delta_n;,\Theta_i\Big),
\label{eq:learned_dyn_clean}
\end{equation}
where $\mathcal{N}_i(\cdot)$ outputs the one-step deviation-state change and $\Theta_i$ are trainable parameters.

\paragraph{Why the model must include $u$ (difference between with $u$'' and without $u$ '').}
If $\mathcal{N}_i$ does not take control information as input (here $\Gamma_{i,n}$ and $\delta_n$ ),
the predictor becomes an autoregressive model that only reproduces trajectories under the training input patterns
and cannot answer the counterfactual question: ``what will happen if we choose a different roll gap/speed trajectory?''
Since MPC optimizes over candidate decisions, a control-dependent predictor \eqref{eq:learned_dyn_clean} is necessary
to evaluate the predicted thickness/tension behavior under different candidate actuator trajectories.

\paragraph{Input/output dimensions.}
Let $d=3$ (state dimension), $|Z_i|$ be the number of neighbors of stand $i$ in \eqref{eq:Zi_clean}, and $p=6$ in \eqref{eq:Gamma_clean}.
Define the input vector
\begin{equation}
X_{i,\text{in}} \triangleq
\big[
\Delta x_i(t_n)^\top,,
\Delta x_{Z_i}(t_n)^\top,,
\Gamma_{i,n}^\top,,
\delta_n
\big]^\top
\in \mathbb{R}^{d(1+|Z_i|)+p+1}.
\label{eq:X_in_clean}
\end{equation}
The network mapping is
\begin{equation}
\mathcal{N}_i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d}.
\end{equation}

\paragraph{Residual (shortcut) structure.}
To improve training stability and long-horizon rollout robustness, we use a residual form.
Let $\hat{I}_i\in\mathbb{R}^{d\times(d(1+|Z_i|)+p+1)}$ be a selection matrix extracting the local state block:
\begin{equation}
\hat{I}i = [I_d,, 0{d\times(d|Z_i|+p+1)}].
\label{eq:Ihat_clean}
\end{equation}
Then the one-step predictor is written as
\begin{equation}
X_{i,\text{out}} = \hat{I}i X{i,\text{in}} + \mathcal{N}i(X{i,\text{in}}; \Theta_i),
\label{eq:res_predict_clean}
\end{equation}
where $X_{i,\text{out}}$ represents the predicted $\Delta x_i(t_{n+1})$ .
This structure implements a baseline-plus-correction interpretation:
the shortcut propagates the current deviation state $\Delta x_i(t_n)$ , while the network learns the correction capturing
unmodeled nonlinearities and inter-stand coupling (via $\Delta x_{Z_i}$ ) under varying operating conditions.

\paragraph{Auxiliary branch for variable interval length (avoid symbol conflict).}
To improve robustness when $\delta_n$ varies, we introduce an auxiliary branch inside $\mathcal{N}_i$ :
\begin{equation}
\mathcal{N}i(X{i,\text{in}};\Theta_i)\triangleq
\psi_i(X_{i,\text{in}};\Theta_{\psi_i}) + \rho_i(X_{i,\text{in}};\theta_i),
\label{eq:aux_clean}
\end{equation}
where $\psi_i(\cdot)$ is a lightweight feedforward branch that captures low-frequency/scale effects strongly related to $\delta_n$ ,
and $\rho_i(\cdot)$ captures the remaining nonlinear coupling corrections.
When $\psi_i(\cdot)\equiv 0$ , the model reduces to a standard residual network.
(We use $\psi_i$ and $\rho_i$ to avoid notation conflicts with the sampling sets $\mathcal{I}_x,\mathcal{I}_\Gamma$ and the optimizer learning rate.)

\paragraph{One-step supervised target.}
For the $j$ -th sample in \eqref{eq:S_i_clean}, define
\begin{equation}
X_{i,\text{in}}^{(j)} =
\big[
\Delta x_i^{(j)}(t_n),\ \Delta x_{Z_i}^{(j)}(t_n),\
\Gamma_{i,n}^{(j)},\ \delta_n^{(j)}
\big]^{\top},
\end{equation}
and the supervised residual target
\begin{equation}
\Delta r_i^{(j)}=\Delta x_i^{(j)}(t_{n+1})-\Delta x_i^{(j)}(t_n).
\label{eq:target_clean}
\end{equation}

\subsection{Training, learned model, and system prediction (multi-step stability and control usage)}
\label{subsec:train_clean}

To suppress accumulation drift induced by long-horizon recursion and to improve long-term predictive stability,
we train the forward predictor jointly with an auxiliary backward residual model
and impose a multi-step reciprocal-consistency regularization over a $K$ -step segment from $S_i^{(K)}$ .

\paragraph{Backward residual model.}
Construct a backward residual network
\begin{equation}
\mathcal{B}i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d},
\end{equation}
parameterized by $\bar{\Theta}_i$ . For the backward step associated with interval $[t_n,t_{n+1}]$ , define
\begin{equation}
\begin{aligned}
X{i,\mathrm{in}}^{b}
&=
\big[
\Delta x_i(t_{n+1}),\ \Delta x_{Z_i}(t_{n+1}),
\Gamma_{i,n},\ \delta_n
\big]^{\top},\
X_{i,\mathrm{out}}^{b}
&=
\hat{I}i X{i,\mathrm{in}}^{b} + \mathcal{B}i(X{i,\mathrm{in}}^{b};\bar{\Theta}i),
\end{aligned}
\label{eq:back_clean}
\end{equation}
where $X_{i,\mathrm{out}}^{b}$ represents the backward estimate of $\Delta x_i(t_n)$ .
The supervised backward residual target is
\begin{equation}
\Delta r_i^{b}=\Delta x_i(t_n)-\Delta x_i(t{n+1}).
\end{equation}

\paragraph{Forward rollout on a $K$ -step segment.}
Given a segment sample $\mathcal{W}_{i,n}\in S_i^{(K)}$ , initialize
\begin{equation}
\Delta \hat{x}i(t_n)=\Delta x_i(t_n),
\end{equation}
and recursively apply the forward predictor for $K$ steps:
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1})
&=
\Delta \hat{x}i(t{n+s})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s}),,\Delta \hat{x}{Z_i}(t{n+s}),,
\Gamma_{i,n+s},,\delta_{n+s};,\Theta_i
\Big),\
&\qquad s=0,\ldots,K-1.
\end{aligned}
\label{eq:fwd_roll_clean}
\end{equation}

\paragraph{Backward rollout and reciprocal consistency.}
Set the terminal condition
\begin{equation}
\Delta \bar{x}i(t{n+K})=\Delta \hat{x}i(t{n+K}),
\end{equation}
and roll back using $\mathcal{B}_i$ :
\begin{equation}
\begin{aligned}
\Delta \bar{x}i(t{n+s})
&=
\hat{I}i X{i,\mathrm{in}}^{b}(t_{n+s})
+
\mathcal{B}i!\Big(X{i,\mathrm{in}}^{b}(t_{n+s});,\bar{\Theta}i\Big),
\quad s=K-1,\ldots,0,
\end{aligned}
\label{eq:bwd_roll_clean}
\end{equation}
where
\begin{equation}
X{i,\mathrm{in}}^{b}(t_{n+s})=
\big[
\Delta \bar{x}i(t{n+s+1}),\ \Delta \hat{x}{Z_i}(t{n+s+1}),
\Gamma_{i,n+s},\ \delta_{n+s}
\big]^{\top}.
\end{equation}

Define the multi-step reciprocal prediction error
\begin{equation}
E_i(t_n)

\sum_{s=0}^{K}
\left|
\Delta \hat{x}i(t{n+s})-\Delta \bar{x}i(t{n+s})
\right|^2.
\end{equation}

\paragraph{Training objectives (meaning of each loss).}
We jointly minimize:
\begin{equation}
\begin{aligned}
L_{\mathrm{1step}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s+1})-\Delta x_i^{(j)}(t_{n+s})\big)
-\mathcal{N}i!\left(
X{i,\mathrm{in}}^{(j)}(t_{n+s});\Theta_i
\right)
\Big|^2,\[2mm]
L_{\mathrm{bwd}}(\bar{\Theta}i)
&= \frac{1}{J_K}\sum{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s})-\Delta x_i^{(j)}(t_{n+s+1})\big)
-\mathcal{B}i!\left(
X{i,\mathrm{in}}^{b,(j)}(t_{n+s});\bar{\Theta}i
\right)
\Big|^2,\[2mm]
L{\mathrm{msrp}}(\Theta_i,\bar{\Theta}i)
&= \frac{1}{J_K}\sum{j=1}^{J_K} E_i^{(j)}(t_n),\[2mm]
L_{\mathrm{roll}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\sum_{s=1}^{K}
\Big|
\Delta x_i^{(j)}(t_{n+s})-\Delta \hat{x}i^{(j)}(t{n+s})
\Big|^2.
\end{aligned}
\label{eq:loss_clean}
\end{equation}
Here, $L_{\mathrm{1step}}$ enforces one-step accuracy; $L_{\mathrm{roll}}$ explicitly suppresses long-horizon drift under recursion;
$L_{\mathrm{msrp}}$ regularizes the learned dynamics by enforcing reciprocal consistency between forward and backward rollouts;
and $L_{\mathrm{bwd}}$ trains the backward model for the consistency regularization.
In implementation, these terms are combined as
\begin{equation}
L_{\mathrm{total}}=\lambda_1 L_{\mathrm{1step}}+\lambda_2 L_{\mathrm{roll}}+\lambda_3 L_{\mathrm{msrp}}+\lambda_4 L_{\mathrm{bwd}},
\end{equation}
where $\lambda_1,\lambda_2,\lambda_3,\lambda_4>0$ are tuned on a validation set.

\paragraph{Learned forward predictor used in control.}
After training, the forward predictor is
\begin{equation}
\Delta \hat{x}i(t{n+1})

\Delta x_i(t_n)
+
\mathcal{N}i!\Big(
\Delta x_i(t_n),,\Delta x{Z_i}(t_n),,
\Gamma_{i,n},,\delta_n;,\Theta_i^*
\Big),
\label{eq:pred_clean}
\end{equation}
and multi-step prediction is obtained by recursive rollout of \eqref{eq:pred_clean}.
This learned predictor is the internal model used by the MPC optimizer in the next section.

Finally, network parameters are optimized using Adam:
\begin{equation}
\Theta_{i,t+1} = \Theta_{i,t} - \alpha \frac{\hat{m}{i,t}}{\sqrt{\hat{v}{i,t}} + \varepsilon},
\end{equation}
where $\alpha$ is the learning rate (we use $\alpha$ to avoid conflict with other symbols),
$\hat{m}_{i,t}$ and $\hat{v}_{i,t}$ are bias-corrected moment estimates, and $\varepsilon>0$ is a small constant for numerical stability.
Figure~\ref{fig:rnn_logic} illustrates the overall structure.

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.85]{picture/x6.pdf}
\caption{Logic diagram of the residual neural network.}
\label{fig:rnn_logic}
\end{figure}

%========================
\section{Nash Equilibrium-Based RNE-DMPC}
%========================

The five-stand tandem cold rolling system is strongly coupled through inter-stand tension propagation.
As a result, changes in control actions (roll gap and stand speed) at one stand can affect both upstream and downstream stands,
making centralized online optimization over all stands' decision variables computationally demanding.

To mitigate this issue, we decompose the global predictive-control problem into $N=5$ local subproblems associated with individual stands.
Each local controller optimizes its own decision variables while accounting for coupling via limited information exchange with neighboring controllers.
Motivated by game-theoretic coordination \citep{rawlings2008coordinating}, we formulate distributed coordination as a Nash-equilibrium-seeking iteration.
Based on the trained residual neural network surrogate model, we construct a Nash-equilibrium-based distributed MPC method (RNE-DMPC)
for coordinated thickness--tension regulation/tracking. The overall control structure is shown in Figure~\ref{4}.

\begin{figure*}[htbp]
\centering
\includegraphics[width=\linewidth]{picture/x2.pdf}
\caption{Schematic diagram of the control architecture for a tandem cold rolling mill.}\label{4}
\end{figure*}

\subsection{Prediction model used in MPC and the prediction--control interface}
\label{subsec:interface_clean}

\paragraph{Key idea: prediction serves control through model constraints.}
At each sampling time $t_n$ , MPC evaluates candidate actuator trajectories (encoded by $\Gamma_{i,n+s}$ ) by rolling out predictions of
thickness/tension deviations using the learned surrogate \eqref{eq:pred_clean}.
Therefore, the learned predictor directly provides the multi-step forecasts required to compute the MPC objective and enforce constraints.

\paragraph{Local polynomial parameterization over the horizon.}
Over each interval $[t_{n+s},t_{n+s+1}]$ with length $\delta_{n+s}$ , the control increment trajectory of stand $i$ is
\begin{equation}
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\tau
+\Gamma_{i,n+s,2}\tau^2,\qquad \tau \in [0,\delta_{n+s}],
\label{eq:du_poly_mpc_clean}
\end{equation}
where $\Gamma_{i,n+s}\in\mathbb{R}^{p}$ with $p=6$ .

\paragraph{Neural-network-based multi-step prediction inside MPC.}
Define the prediction horizon $N_p$ (number of future intervals predicted) and the control horizon $N_c$ (number of future intervals optimized),
with $N_c\le N_p$ .
Given the current measured/estimated deviation states $\Delta x_i(t_n)$ and a candidate decision sequence
$\mathbf{\Gamma}_i(t_n)=\big[\Gamma_{i,n}^\top,\ldots,\Gamma_{i,n+N_c-1}^\top\big]^\top$ ,
stand $i$ predicts its deviation-state evolution by
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1})
&=
\Delta \hat{x}i(t{n+s})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s}),,
\Delta \hat{x}{Z_i}(t_{n+s}),,
\Gamma_{i,n+s},,
\delta_{n+s};,
\Theta_i^*
\Big), \
&\qquad s=0,\ldots,N_p-1,
\end{aligned}
\label{eq:rollout_mpc_clean}
\end{equation}
with initialization $\Delta \hat{x}_i(t_n)=\Delta x_i(t_n)$ .
Here $\Delta \hat{x}_{Z_i}(t_{n+s})$ is obtained from the latest communication with neighbors during the Nash iteration.
Equation \eqref{eq:rollout_mpc_clean} is the explicit mathematical interface: \textbf{control decisions} $(\Gamma_{i,n+s})$
$\rightarrow$ \textbf{predicted thickness/tension deviations} $(\Delta \hat{x}_i)$ $\rightarrow$ \textbf{objective/constraints evaluation}.

\subsection{Local optimization problem (objective, reference meaning, constraints, and numerical solution)}
\label{subsec:local_opt_clean}

\paragraph{Decision variables.}
At time $t_n$ , the local decision vector for stand $i$ is
\begin{equation}
\mathbf{\Gamma}_i(t_n)

\big[
\Gamma_{i,n}^\top,,
\Gamma_{i,n+1}^\top,,
\ldots,,
\Gamma_{i,n+N_c-1}^\top
\big]^\top
\in \mathbb{R}^{pN_c}.
\label{eq:Gamma_seq_clean}
\end{equation}

\paragraph{Reference meaning (remove ambiguity of $\Delta x_{\mathrm{ref}}$ ).}
Because the deviation state is defined as $\Delta x_i(t)=x_i(t)-x_i^{\mathrm{ref}}(t)$ in \eqref{eq:dev_def} and \eqref{eq:xi_def_clean},
the desired regulation/tracking objective in deviation coordinates is always
\begin{equation}
\Delta x_i(t)\rightarrow 0.
\end{equation}
Therefore, the reference in deviation form is simply the zero vector, i.e.,
\begin{equation}
\Delta x_{i,\mathrm{ref}}(t_{n+s})\equiv 0\in\mathbb{R}^{d}.
\label{eq:dxref_zero_clean}
\end{equation}
Equivalently, one can view the cost as penalizing $(\hat x_i-x_i^{\mathrm{ref}})$ in absolute coordinates; in this paper we keep the deviation formulation.

\paragraph{Local objective (explicitly thickness+tension).}
Let $\Delta \hat{x}_i(t_{n+s})=[\Delta \hat h_i(t_{n+s}),\,\Delta \widehat T_{i-1}(t_{n+s}),\,\Delta \widehat T_i(t_{n+s})]^\top$ .
The local cost is
\begin{equation}
\begin{aligned}
J_i
&=
\sum_{s=1}^{N_p}
\big|
\Delta \hat{x}i(t{n+s}) - \Delta x_{i,\mathrm{ref}}(t_{n+s})
\big|{Q_i}^2
+
\sum{s=0}^{N_c-1}
\big|
\Gamma_{i,n+s}
\big|_{R_i}^2,
\end{aligned}
\label{eq:Ji_clean}
\end{equation}
where $Q_i\in\mathbb{R}^{d\times d}$ weights thickness and tension deviations,
and $R_i\in\mathbb{R}^{p\times p}$ penalizes the polynomial-parameter magnitude to encourage smooth increments.
Using \eqref{eq:dxref_zero_clean}, the tracking term reduces to penalizing predicted deviation states directly.

\paragraph{Constraints.}
We enforce both absolute-input bounds and increment-trajectory bounds.

\emph{Absolute input bounds:}
\begin{equation}
u_{i,\min} \le u_i(t_{n+s}) \le u_{i,\max},
\qquad s=0,\ldots,N_p-1,
\label{eq:u_abs_clean}
\end{equation}
where $u_{i,\min},u_{i,\max}\in\mathbb{R}^{2}$ provide component-wise bounds for $(s_i,v_i)$ .

\emph{Increment trajectory bounds for all $\tau$ :}
\begin{equation}
\Delta u_{i,\min}
\le
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})
\le
\Delta u_{i,\max},
\qquad \forall\tau\in[0,\delta_{n+s}],
\label{eq:du_traj_clean}
\end{equation}
where $\Delta u_{i,\min},\Delta u_{i,\max}\in\mathbb{R}^{2}$ are component-wise bounds for $(\Delta s,\Delta v)$ .

\paragraph{Practical enforcement of \eqref{eq:du_traj_clean} (no nonstandard symbols).}
For a scalar quadratic $q(\tau)=a+b\tau+c\tau^2$ on $\tau\in[0,\delta]$ , its extrema over the interval occur at
$\tau=0$ , $\tau=\delta$ , and possibly at the stationary point $\tau^\star=-b/(2c)$ if $c\neq 0$ and $\tau^\star\in[0,\delta]$ .
Therefore, to enforce \eqref{eq:du_traj_clean} for the two-channel vector $\Delta u_{i,n+s}(\tau)=[\Delta s_{i,n+s}(\tau),\,\Delta v_{i,n+s}(\tau)]^\top$ ,
we check the above candidate points \emph{separately for each channel} using the corresponding coefficients in \eqref{eq:du_components_clean}.

\paragraph{Consistency between within-interval trajectory and discrete execution (interval average).}
To update the discrete absolute input and enforce \eqref{eq:u_abs_clean} consistently, we use the interval-averaged increment:
\begin{equation}
\Delta u_i(t_{n+s})

\frac{1}{\delta_{n+s}}\int_0^{\delta_{n+s}}\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s}),d\tau

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\frac{\delta_{n+s}}{2}
+\Gamma_{i,n+s,2}\frac{\delta_{n+s}^2}{3}.
\label{eq:du_avg_clean}
\end{equation}
Then propagate the absolute input sequence:
\begin{equation}
\begin{aligned}
u_i(t_n) &= u_i(t_{n-1}) + \Delta u_i(t_n), \
u_i(t_{n+s}) &= u_i(t_{n+s-1}) + \Delta u_i(t_{n+s}), \qquad s=1,\ldots,N_p-1.
\end{aligned}
\label{eq:u_prop_clean}
\end{equation}

\paragraph{Local optimization problem (solved at each Nash iteration).}
At Nash-iteration index $l$ , subsystem $i$ solves
\begin{equation}
\mathbf{\Gamma}i^{(l)}=\arg\min{\mathbf{\Gamma}_i}; J_i
\quad \text{s.t.}\quad
\eqref{eq:rollout_mpc_clean},\ \eqref{eq:u_abs_clean},\ \eqref{eq:du_traj_clean},\ \eqref{eq:u_prop_clean}.
\label{eq:local_prob_clean}
\end{equation}
Because the learned surrogate $\mathcal{N}_i(\cdot)$ is differentiable, \eqref{eq:local_prob_clean} is a differentiable nonlinear program (NLP),
which can be solved by standard gradient-based NLP solvers (e.g., SQP or interior-point methods) using automatic differentiation to compute gradients.

\subsection{Nash equilibrium coordination (where conflict comes from, how it is resolved, and how the solution is obtained)}
\label{subsec:nash_clean}

\paragraph{Why a Nash iteration is needed (explicit conflict explanation).}
Inter-stand tensions are shared coupling variables: the tension $T_i$ is influenced by both stand $i$ and stand $i+1$ (notably through their speed actions),
and changes in roll gap can also indirectly affect tensions via strip deformation and transport.
Therefore, purely independent local optimization can lead to conflicting actions: improving local thickness may worsen neighbor tensions, and vice versa.
To resolve this coupling conflict with limited communication, we adopt a Nash-equilibrium-seeking distributed best-response iteration.

\paragraph{Distributed best-response iteration.}
At iteration $l$ , each stand computes its best response to the latest neighbor strategies and predictions.
The procedure is summarized in Table~\ref{tab:nash_iter_en}.

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.12}
\setlength{\tabcolsep}{3.5pt}
\caption{Distributed Nash best-response iteration for RNE-DMPC (five-stand).}
\label{tab:nash_iter_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.11\linewidth} X}
\toprule
\textbf{Step} & \textbf{Description} \
\midrule
A &
Initialize $l=1$ and initialize $\mathbf{\Gamma}_i^{(0)}$ for all subsystems (e.g., warm-start from previous time step). \

B &
Using \eqref{eq:rollout_mpc_clean}, compute $\Delta \hat{x}_i^{(l)}(t_{n+s})$ for $s=1,\ldots,N_p$ \
&
given $\mathbf{\Gamma}_i^{(l-1)}$ and the latest neighbor predictions
$\Delta \hat{x}_{Z_i}^{(l-1)}(t_{n+s})$ . \

C & Solve the local NLP \eqref{eq:local_prob_clean} to update $\mathbf{\Gamma}_i^{(l)}$ (best response). \

D &
Broadcast $\mathbf{\Gamma}_i^{(l)}$ and predicted trajectories
$\Delta \hat{x}_i^{(l)}(t_{n+s})$ to the communication system. \

E &
Update neighbor predictions $\Delta \hat{x}_{Z_i}^{(l)}(t_{n+s})$ using received information; re-generate predictions if needed. \

F & Compute the maximum relative change $\varsigma^{(l)}$ as the convergence metric. \

G &
If $\varsigma^{(l)}\le \varsigma_{\mathrm{tol}}$ , stop and set $\mathbf{\Gamma}_i^*=\mathbf{\Gamma}_i^{(l)}$ ; \
& otherwise set $l\leftarrow l+1$ and repeat Steps B--F. \
\bottomrule
\end{tabularx}
\end{table}

\paragraph{Convergence metric.}
Define
\begin{equation}
\varsigma^{(l)}

\max_i
\frac{\left|
\mathbf{\Gamma}_i^{(l)}-\mathbf{\Gamma}_i^{(l-1)}
\right|_2}{
\left|
\mathbf{\Gamma}_i^{(l-1)}
\right|_2+\epsilon},
\label{eq:nash_metric_clean}
\end{equation}
where $\epsilon>0$ is a small constant to avoid division by zero.

\paragraph{Receding-horizon implementation (how the final control is applied).}
Only the first-interval parameters $\Gamma_{i,n}^$ are applied.
The increment trajectory over $[t_n,t_{n+1}]$ is
\begin{equation}
\Delta u_{i,n}(\tau)=\Delta u_{i,n}(\tau;\Gamma_{i,n}^),
\quad \tau\in[0,\delta_n].
\end{equation}
The discrete input increment applied for updating the absolute input is the interval average:
\begin{equation}
\Delta u_i(t_n)

\frac{1}{\delta_n}\int_0^{\delta_n}\Delta u_{i,n}(\tau),d\tau

\Gamma_{i,n,0}^*
+
\Gamma_{i,n,1}^\frac{\delta_n}{2}
+
\Gamma_{i,n,2}^\frac{\delta_n^2}{3}.
\label{eq:apply_avg_clean}
\end{equation}
Then the absolute input is updated by
\begin{equation}
u_i(t_n)=u_i(t_{n-1})+\Delta u_i(t_n),
\label{eq:apply_u_clean}
\end{equation}
which ensures smooth evolution of both roll gap and stand speed and avoids abrupt actuator changes.

\paragraph{Closed-loop prediction--control connection (complete loop).}
At each sampling instant $t_n$ :
(i) measure/estimate the current deviation states $\Delta x_i(t_n)$ (thickness and tensions) and initialize neighbor information;
(ii) perform Nash best-response iterations; in each iteration solve \eqref{eq:local_prob_clean} using the learned predictor \eqref{eq:rollout_mpc_clean};
(iii) after convergence, apply $\Gamma_{i,n}^*$ by generating the within-interval increment trajectory and updating $u_i(t_n)$ via \eqref{eq:apply_avg_clean}--\eqref{eq:apply_u_clean};
(iv) move to $t_{n+1}$ and repeat.
In this way, the learned neural predictor supplies the multi-step forecasts needed by MPC, and the distributed optimization computes coordinated
gap/speed increment trajectories that regulate thickness and tensions while resolving coupling conflicts via Nash equilibrium.

The overall control flow chart is shown in Fig.~\ref{liu}.

\begin{figure}[htbp]
\centering
\includegraphics[width=\linewidth]{picture/x5.pdf}
\caption{The overall system control flow chart}\label{liu}
\end{figure}

%========================
% Optional: a compact symbol paragraph you can keep or move to Appendix
%========================
\paragraph{Summary of key symbols (for reviewer clarity).}
$i\in\{1,\dots,5\}$ : stand index; $t_n$ : sampling instant; $\delta_n$ : sampling interval; $\tau$ : within-interval time.
$h_i,T_i$ : thickness and inter-stand tension; $h_i^{\mathrm{ref}},T_i^{\mathrm{ref}}$ : references.
$\Delta h_i,\Delta T_i$ : deviations; $\Delta x_i$ : local deviation state in \eqref{eq:xi_def_clean}.
$u_i=[s_i,v_i]^\top$ : actuators (gap and speed); $\Delta u_i(t_n)=u_i(t_n)-u_i(t_{n-1})$ : discrete input increment.
$\Delta u_{i,n}(\tau;\Gamma_{i,n})$ : within-interval increment trajectory; $\Gamma_{i,n}\in\mathbb{R}^{p}$ : polynomial parameters ( $p=6$ ).
$Z_i$ : neighbor set; $\Delta x_{Z_i}$ : neighbor stack.
$\mathcal{N}_i(\cdot)$ : learned forward residual model; $\mathcal{B}_i(\cdot)$ : backward model.
$K$ : segment length; $J,J_K$ : numbers of one-step and segment samples.
$N_p,N_c$ : prediction and control horizons; $Q_i,R_i$ : weights; $\epsilon,\varepsilon$ : small constants.
这是我的论文第二到第四部分。老师说优化目标函数看不出来多个机架博弈；建立了优化目标求解方案是不是得改变。

Answer 1

你老师这句话的意思通常是：你现在写出来的 $J_i$ 看起来像“各机架各自做局部 MPC”，但“博弈”这一层（每个玩家的收益依赖别人的策略）在公式里没有被显式写出来。
这不一定要求你推翻求解框架（Nash best-response 仍然可以用），但需要把“目标函数对他人决策的依赖”写得更明确，必要时再加一两个“共享变量一致性/耦合惩罚”项，让博弈冲突一眼可见。

下面给你两种改法（从“只改写法”到“改目标更明显”），按你论文工作量选。

1）为什么现在“看不出博弈”

你目前的目标

J_i=\sum_{s=1}^{N_p}\|\Delta \hat x_i(t_{n+s})\|_{Q_i}^2+\sum_{s=0}^{N_c-1}\|\Gamma_{i,n+s}\|_{R_i}^2

从表面看只含“自己的预测状态、自己的控制参数”。虽然你在预测模型里用了邻居预测 $\Delta \hat x_{Z_i}$ ，但你没有把它写成“由邻居的决策 $\mathbf\Gamma_{Z_i}$ 产生”，于是读者容易理解成：邻居轨迹是外部已知信号，而不是“其他玩家的策略”。

关键修正点：把 $\Delta \hat x_i(\cdot)$ 明确写成 $\Delta \hat x_i(\cdot;\mathbf\Gamma_i,\mathbf\Gamma_{-i})$ 或至少 $\Delta \hat x_i(\cdot;\mathbf\Gamma_i,\mathbf\Gamma_{Z_i})$ 。

2）最省事的改法：不改求解流程，只把“博弈依赖”写进定义（强烈建议先做这个）

（a）把预测显式写成“策略的函数”

把你 4.1 节的滚动预测式改成这种写法（含义不变，但读者立刻懂“互相影响”）：

\Delta \hat x_i(t_{n+s+1};\mathbf\Gamma_i,\mathbf\Gamma_{Z_i}) = \Delta \hat x_i(t_{n+s};\mathbf\Gamma_i,\mathbf\Gamma_{Z_i}) + \mathcal N_i\!\Big( \Delta \hat x_i(t_{n+s};\cdot), \Delta \hat x_{Z_i}(t_{n+s};\mathbf\Gamma_{Z_i}), \Gamma_{i,n+s},\delta_{n+s} \Big).

并补一句定义邻居轨迹来自邻居策略：

\Delta \hat x_{Z_i}(t_{n+s};\mathbf\Gamma_{Z_i}) =\mathrm{col}\big\{\Delta \hat x_k(t_{n+s};\mathbf\Gamma_k)\mid k\in Z_i\big\}.

（b）把目标函数写成 payoff（收益）形式： $J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i})$

J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i}) = \sum_{s=1}^{N_p} \big\|\Delta \hat x_i(t_{n+s};\mathbf\Gamma_i,\mathbf\Gamma_{-i})\big\|_{Q_i}^2 + \sum_{s=0}^{N_c-1}\|\Gamma_{i,n+s}\|_{R_i}^2.

（c）给出 Nash 均衡的标准定义（这是“博弈”最关键的一行）

在 4.3 节加：

\mathbf\Gamma^*=\{\mathbf\Gamma_1^*,\ldots,\mathbf\Gamma_5^*\}\ \text{是 Nash 均衡当且仅当}\ \forall i,\ \mathbf\Gamma_i^*\in \arg\min_{\mathbf\Gamma_i\in\Omega_i} J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i}^*).

其中 $\Omega_i$ 是你那些约束（输入上下界、增量轨迹约束、传播约束等）定义出来的可行域。

做完这三步，很多老师/审稿人就不会再说“看不出博弈”了，因为你把“他人的策略进入我的代价”写成了数学对象。

3）让“冲突/博弈”更一眼可见：加一个“共享张力一致性/耦合惩罚”项（推荐）

你系统里最典型的共享耦合量就是机架间张力 $T_i$ ：
它同时出现在 stand $i$ 的状态里（ $\Delta T_i$ ）也出现在 stand $i+1$ 的状态里（ $\Delta T_{(i+1)-1}=\Delta T_i$ ）。
这天然就是“两个玩家共同影响同一个量”，但你现在的写法没有把“共享”强调出来。

做法 A：一致性惩罚（consensus-like）

让每个机架在成本里额外惩罚“我预测的共享张力”和“邻居预测的共享张力”的差：

J_i^{\text{new}} = J_i + \mu_i \sum_{s=1}^{N_p} \big\| \Delta \widehat T_i^{(i)}(t_{n+s};\mathbf\Gamma_i,\mathbf\Gamma_{-i}) - \Delta \widehat T_i^{(i+1)}(t_{n+s};\mathbf\Gamma_{i+1},\mathbf\Gamma_{-(i+1)}) \big\|^2.

$\Delta \widehat T_i^{(i)}$ ：由 stand $i$ 的预测轨迹读出来的 $T_i$
$\Delta \widehat T_i^{(i+1)}$ ：由 stand $i+1$ 的预测轨迹读出来的同一个 $T_i$

这项一加，读者会立刻明白：同一根张力在两个玩家之间必须协调，否则成本上升 —— 博弈/协调味道非常强。

做法 B：把博弈升级成“广义 Nash”（GNE）也很加分

如果你把“共享张力一致性”不写成惩罚而写成约束（例如两边对 $T_i$ 的预测必须一致/在通信一致变量上满足等式），那就是典型的 generalized Nash equilibrium（可行域与他人策略耦合）。这会更“理论味”，但写起来稍重。

4）“是不是得改变求解方案？”

多数情况下：不用推倒重来，你现在的 best-response Nash 迭代仍然适配上面两种改法。

但我建议你补一个非常实用的小改动，让“博弈求解”更像样、也更稳：

在 best-response 更新里加松弛（relaxation）

很多耦合系统直接用纯 best-response 会振荡。写成：

\mathbf\Gamma_i^{(l)} \leftarrow (1-\omega)\mathbf\Gamma_i^{(l-1)} + \omega\,\mathrm{BR}_i(\mathbf\Gamma_{-i}^{(l-1)}), \quad \omega\in(0,1].

并在文字里说： $\omega$ 用于增强收敛性（工程上很常见）。

你可以直接照搬到论文里的“最小改动清单”

把 $\Delta \hat x_i$ 写成 $\Delta \hat x_i(\cdot;\mathbf\Gamma_i,\mathbf\Gamma_{Z_i})$ 。
把 $J_i$ 写成 $J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i})$ 。
加一条 Nash 均衡定义式。
4.（推荐）加一个共享张力一致性惩罚项，突出冲突来自 $T_i$ 被两机架共同影响。
5.（推荐）best-response 加松弛系数 $\omega$ 。

如果你愿意，我也可以按你这段 LaTeX 的风格，把第 4 章（尤其 4.2–4.3）直接改写成“Game Formulation + NE Definition + Relaxed Best-response Algorithm”的可投版本，保证老师一眼看到“多个机架在博弈”。

Answer 2

下面给你一份可直接粘贴替换的 LaTeX（建议整体替换你第 4 章里 4.1–4.3 三个 \subsection）。改动目标是让审稿/老师一眼看到：

每个机架是一个玩家；
每个玩家的代价显式依赖邻居策略（ $J_i(\Gamma_i;\Gamma_{-i})$ ）；
“冲突/耦合”通过共享张力 $T_i$ 的“双边一致性（consensus）”项体现出来；
求解方案仍是 Nash best-response，但加了常见的松弛更新 $\omega$ ，理论和工程上都更稳，也更像“博弈求解”。

你不需要改你前面 RNN 预测器的结构；这里只是把“控制—预测—博弈—求解”在数学上写清楚、写得更像 game。

latex
%========================
\subsection{Prediction model used in MPC and the prediction--control interface (game-coupled form)}
\label{subsec:interface_clean}

\paragraph{Players and strategies.}
At each sampling instant $t_n$, the five-stand mill is viewed as a finite-horizon dynamic game with player set
$\mathcal{I}=\{1,2,3,4,5\}$.
Player $i$ chooses its local polynomial-parameter sequence (strategy)
\begin{equation}
\mathbf{\Gamma}_i(t_n)
=
\big[
\Gamma_{i,n}^\top,\,
\Gamma_{i,n+1}^\top,\,
\ldots,\,
\Gamma_{i,n+N_c-1}^\top
\big]^\top\in\mathbb{R}^{pN_c}.
\end{equation}
Let $\mathbf{\Gamma}(t_n)=\mathrm{col}\{\mathbf{\Gamma}_i(t_n)\}_{i=1}^{5}$ denote the joint strategy,
$\mathbf{\Gamma}_{-i}$ the strategies of all players except $i$,
and $\mathbf{\Gamma}_{Z_i}$ the strategies of neighboring players in $Z_i$.

\paragraph{Key idea: prediction depends on both own and neighbors' strategies.}
The learned predictor provides multi-step forecasts used by each player to evaluate its objective and constraints.
Crucially, due to tension propagation and transport coupling, the predicted trajectory of player $i$
is not only a function of $\mathbf{\Gamma}_i$ but also of neighbors' strategies $\mathbf{\Gamma}_{Z_i}$ through the neighbor states.

\paragraph{Neural-network-based multi-step prediction inside MPC (explicit strategy dependence).}
Given the current measured/estimated deviation states $\Delta x_i(t_n)$ and a candidate strategy profile
$(\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})$, player $i$ predicts
\begin{equation}
\begin{aligned}
\Delta \hat{x}_i(t_{n+s+1};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})
&=
\Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})
+
\mathcal{N}_i\!\Big(
\Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i}),\,
\Delta \hat{x}_{Z_i}(t_{n+s};\mathbf{\Gamma}_{Z_i}),\\
&\qquad\qquad\qquad
\Gamma_{i,n+s},\,
\delta_{n+s};\,
\Theta_i^*
\Big),\qquad s=0,\ldots,N_p-1,
\end{aligned}
\label{eq:rollout_mpc_game}
\end{equation}
with initialization $\Delta \hat{x}_i(t_n;\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})=\Delta x_i(t_n)$.
Here
\begin{equation}
\Delta \hat{x}_{Z_i}(t_{n+s};\mathbf{\Gamma}_{Z_i})
=
\mathrm{col}\big\{
\Delta \hat{x}_k(t_{n+s};\mathbf{\Gamma}_k)\ \big|\ k\in Z_i
\big\}
\end{equation}
is obtained from the latest communicated neighbor predictions during the Nash iteration.

\paragraph{Interface-tension extraction (to expose coupling explicitly).}
Recall $\Delta x_i=[\Delta h_i,\Delta T_{i-1},\Delta T_i]^\top$.
Define two selection vectors
\begin{equation}
e^- \triangleq [0\ \ 1\ \ 0],\qquad
e^+ \triangleq [0\ \ 0\ \ 1],
\end{equation}
so that $e^- \Delta x_i=\Delta T_{i-1}$ (upstream tension) and $e^+ \Delta x_i=\Delta T_i$ (downstream tension).
The shared inter-stand tension between stand $i$ and $i{+}1$ is $\Delta T_i$, which appears as
$e^+\Delta x_i$ in player $i$ and as $e^-\Delta x_{i+1}$ in player $i{+}1$.


%========================
\subsection{Local optimization problem (payoff with explicit game coupling)}
\label{subsec:local_opt_clean}

\paragraph{Reference meaning.}
Because $\Delta x_i(t)=x_i(t)-x_i^{\mathrm{ref}}(t)$, the deviation-coordinate target is
\begin{equation}
\Delta x_i(t)\rightarrow 0,\qquad
\Delta x_{i,\mathrm{ref}}(t_{n+s})\equiv 0\in\mathbb{R}^{d}.
\end{equation}

\paragraph{Coupling-aware local payoff (game form).}
Player $i$ minimizes a payoff that explicitly depends on other players' strategies:
\begin{equation}
J_i(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i})
=
J_i^{\mathrm{trk}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i})
+
J_i^{\mathrm{act}}(\mathbf{\Gamma}_i)
+
J_i^{\mathrm{ten}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i}),
\label{eq:Ji_game_def}
\end{equation}
where
\begin{equation}
J_i^{\mathrm{trk}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i})
=
\sum_{s=1}^{N_p}
\big\|
\Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})
\big\|_{Q_i}^2,
\qquad
J_i^{\mathrm{act}}(\mathbf{\Gamma}_i)
=
\sum_{s=0}^{N_c-1}
\big\|
\Gamma_{i,n+s}
\big\|_{R_i}^2.
\label{eq:Ji_trk_act}
\end{equation}

\paragraph{Explicit tension-consensus (conflict) term.}
To make the inter-stand game coupling visible and to prevent conflicting actions on shared tensions,
we add a consensus penalty on the shared tension forecasts across neighboring players:
\begin{equation}
\begin{aligned}
J_i^{\mathrm{ten}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i})
&=
\mu_i^-\sum_{s=1}^{N_p}
\Big\|
e^- \Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})
-
e^+ \Delta \hat{x}_{i-1}(t_{n+s};\mathbf{\Gamma}_{i-1})
\Big\|^2 \\
&\quad +
\mu_i^+\sum_{s=1}^{N_p}
\Big\|
e^+ \Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})
-
e^- \Delta \hat{x}_{i+1}(t_{n+s};\mathbf{\Gamma}_{i+1})
\Big\|^2,
\end{aligned}
\label{eq:tension_consensus_cost}
\end{equation}
with boundary convention $\Delta \hat{x}_0\equiv 0$ and $\Delta \hat{x}_6\equiv 0$ so the formula holds for all $i$.
The weights $\mu_i^-,\mu_i^+\ge 0$ tune the coupling strength; when $\mu_i^\pm=0$, the formulation reduces to the uncoupled local MPC cost.
Importantly, \eqref{eq:tension_consensus_cost} makes the ``conflict'' explicit:
the same shared tension $\Delta T_i$ is simultaneously influenced by the decisions of players $i$ and $i{+}1$.

\paragraph{Constraints and feasible set.}
Let $\Omega_i(t_n)$ denote the local feasible set induced by the prediction model and constraints:
\begin{equation}
\Omega_i(t_n)=\Big\{\mathbf{\Gamma}_i\ \Big|\ 
\eqref{eq:rollout_mpc_game},\ \eqref{eq:u_abs_clean},\ \eqref{eq:du_traj_clean},\ \eqref{eq:u_prop_clean}\ \text{hold over the horizon}
\Big\}.
\end{equation}

\paragraph{Local best-response problem (solved per Nash iteration).}
At Nash-iteration index $l$, player $i$ solves the best-response NLP:
\begin{equation}
\mathrm{BR}_i\big(\mathbf{\Gamma}_{-i}^{(l-1)}\big)
=
\arg\min_{\mathbf{\Gamma}_i\in\Omega_i(t_n)}\ 
J_i(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i}^{(l-1)}),
\label{eq:best_response_problem}
\end{equation}
where the neighbor predictions in \eqref{eq:rollout_mpc_game} and \eqref{eq:tension_consensus_cost}
are treated as fixed signals generated by $\mathbf{\Gamma}_{Z_i}^{(l-1)}$.


%========================
\subsection{Nash equilibrium coordination and relaxed best-response iteration}
\label{subsec:nash_clean}

\paragraph{Game definition and Nash equilibrium.}
At time $t_n$, the distributed MPC coordination defines a finite-horizon game
$\mathcal{G}(t_n)=\langle \mathcal{I},\{\Omega_i(t_n)\}_{i\in\mathcal{I}},\{J_i(\cdot;\cdot)\}_{i\in\mathcal{I}}\rangle$.
A joint strategy $\mathbf{\Gamma}^*(t_n)=\mathrm{col}\{\mathbf{\Gamma}_i^*(t_n)\}$ is a Nash equilibrium if
\begin{equation}
\forall i\in\mathcal{I},\qquad
\mathbf{\Gamma}_i^*(t_n)\in
\arg\min_{\mathbf{\Gamma}_i\in\Omega_i(t_n)}
J_i\big(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i}^*(t_n)\big).
\label{eq:NE_definition}
\end{equation}
Because $J_i$ depends on neighbors through \eqref{eq:rollout_mpc_game} and \eqref{eq:tension_consensus_cost},
\eqref{eq:NE_definition} explicitly represents the multi-stand game coupling.

\paragraph{Relaxed best-response (to improve convergence).}
Pure best-response updates may oscillate in strongly coupled mills.
We therefore use a relaxed update with $\omega\in(0,1]$:
\begin{equation}
\mathbf{\Gamma}_i^{(l)}
=
(1-\omega)\mathbf{\Gamma}_i^{(l-1)}
+
\omega\,
\mathrm{BR}_i\big(\mathbf{\Gamma}_{-i}^{(l-1)}\big).
\label{eq:relaxed_BR}
\end{equation}

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.12}
\setlength{\tabcolsep}{3.5pt}
\caption{Relaxed distributed Nash best-response iteration for RNE-DMPC (five-stand).}
\label{tab:nash_iter_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.11\linewidth} X}
\toprule
\textbf{Step} & \textbf{Description} \\
\midrule
A &
Initialize $l=1$ and initialize $\mathbf{\Gamma}_i^{(0)}$ for all players (warm-start from the previous sampling time). \\

B &
Given $\mathbf{\Gamma}^{(l-1)}$, each player $i$ performs forward rollout \eqref{eq:rollout_mpc_game} to obtain
$\Delta \hat{x}_i^{(l-1)}(t_{n+s})$ for $s=1,\ldots,N_p$. \\

C &
Each player $i$ solves the best-response NLP \eqref{eq:best_response_problem} to obtain
$\widetilde{\mathbf{\Gamma}}_i^{(l)}=\mathrm{BR}_i(\mathbf{\Gamma}_{-i}^{(l-1)})$. \\

D &
Apply relaxation:
$\mathbf{\Gamma}_i^{(l)}=(1-\omega)\mathbf{\Gamma}_i^{(l-1)}+\omega\,\widetilde{\mathbf{\Gamma}}_i^{(l)}$. \\

E &
Broadcast $\mathbf{\Gamma}_i^{(l)}$ and updated predictions $\Delta \hat{x}_i^{(l)}(t_{n+s})$ to neighbors. \\

F &
Compute the maximum relative change $\varsigma^{(l)}$ using \eqref{eq:nash_metric_clean}. \\

G &
If $\varsigma^{(l)}\le \varsigma_{\mathrm{tol}}$, stop and set $\mathbf{\Gamma}_i^*=\mathbf{\Gamma}_i^{(l)}$; 
otherwise set $l\leftarrow l+1$ and repeat Steps B--F. \\
\bottomrule
\end{tabularx}
\end{table}

\paragraph{Receding-horizon implementation.}
After convergence, only the first-interval parameters $\Gamma_{i,n}^*$ are applied.
The within-interval increment trajectory and the interval-averaged increment are computed by
\eqref{eq:du_poly_mpc_clean} and \eqref{eq:apply_avg_clean}, and the absolute input is updated by \eqref{eq:apply_u_clean}.
This preserves smooth actuator evolution while coordinating shared tensions through the Nash game \eqref{eq:NE_definition}.

你改完后，老师那句“看不出来多个机架博弈”基本就能对上了

因为现在你有三处“铁证”：

$J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i})$ ：目标函数显式依赖他人策略；
共享张力一致性项 \eqref{eq:tension_consensus_cost}：冲突来自同一 $\Delta T_i$ 被两边共同作用；
Nash 定义 \eqref{eq:NE_definition} + relaxed BR 更新 \eqref{eq:relaxed_BR}：求解方案与博弈目标严格一致。

如果你还想再“更博弈一点”，我也可以把 4.2 里“约束集合”升级成**广义 Nash（GNE）**版本（共享张力做等式耦合约束而不是惩罚项），但那会更重、篇幅更长；按你老师反馈，先用上面这个版本通常已经够用了。

Answer 3

因为你在文中把每个机架 $i$ 的“局部偏差状态”定义成了 3 个量的列向量：

\Delta x_i(t)\triangleq \begin{bmatrix} \Delta h_i(t)\\ \Delta T_{i-1}(t)\\ \Delta T_i(t) \end{bmatrix}

所以它的维度自然就是

d=\dim(\Delta x_i)=3.

这 3 个量分别代表什么？

$\Delta h_i$ ：机架 $i$ 出口厚度偏差（本机架最核心的质量指标）
$\Delta T_{i-1}$ ：机架 $i$ 上游张力偏差（来自 $i-1$ 与 $i$ 之间的张力）
$\Delta T_i$ ：机架 $i$ 下游张力偏差（来自 $i$ 与 $i+1$ 之间的张力）

在五机架串联系统里，耦合主要就是通过相邻机架间的张力传播，所以“本机架厚度 + 上下游两侧张力”是一个很典型、也很直接的局部建模选取。

那机架 1 和 5 为什么也能用 $d=3$ ？

因为你采用了边界统一约定：

\Delta T_0(t)\equiv 0,\qquad \Delta T_5(t)\equiv 0

对机架 1： $\Delta x_1=[\Delta h_1,\ \Delta T_0(=0),\ \Delta T_1]^\top$
对机架 5： $\Delta x_5=[\Delta h_5,\ \Delta T_4,\ \Delta T_5(=0)]^\top$

这样做的目的就是：所有机架的状态维数一致，方便

数据集结构统一（网络输入输出维数固定）
网络结构统一（不用为边界机架单独设计）
MPC/DMPC公式更整齐

可以不是 3 吗？

完全可以。 $d=3$ 是你“状态选取”的设计选择，不是物理定律要求。
如果你希望更高保真，可以把局部状态扩成比如：

加入口厚度、压下量/轧制力、摩擦系数估计、机架速度偏差、积分误差等
那 $d$ 就会变大，但训练数据需求和优化复杂度也会上去。

如果你把你论文里“为什么选这三个状态”的那段（或老师具体意见）也贴出来，我可以帮你补一段论文风格的解释文字，放在符号定义后面，专门回答“为何 $d=3$ 、为何足够描述主要耦合”。

Answer 4

有，但在冷轧五机架/连轧文献里，“把未来控制轨迹用多项式（如二次/三次）参数化”这种表述并不算主流；更常见的写法是把未来控制序列用**基函数展开（basis-function parameterization）**来降维——其中 Laguerre 基函数在“连轧厚度–张力 DMPC/博弈”方向里出现得比较明确，你的二次多项式其实可以被看作“基函数参数化”的一种特例（基函数选 $\{1,\tau,\tau^2\}$ ）。

下面给你几篇可以拿来支撑“参数化控制轨迹（而不是逐点优化）”、且与“连轧/多机架/厚度张力控制”贴得比较近的论文/资料（都能作为你论文里“方法依据/相关工作”的引用）：

1) 连轧（厚度–张力）DMPC/博弈里用“基函数参数化”（Laguerre）

Hu Y., Sun J., Peng W., Zhang D. 等：Nash equilibrium-based distributed predictive control strategy for thickness and tension control on tandem cold rolling system，Journal of Process Control，97：92–102，2021。该文明确是“连轧厚度+张力、Nash/DMPC”，并提到为降低计算复杂度引入 Laguerre 函数做控制参数化。 (J-GLOBAL)

你可以在文中这样用它来“背书”你的二次多项式：
“类似于连轧 DMPC 中常用的基函数参数化（如 Laguerre 展开）以降低在线优化维数，我们采用低阶多项式基 $\{1,\tau,\tau^2\}$ 对区间内增量轨迹进行参数化……”。

2) 连轧（厚度–张力）DMPC 方向的对照文献（不一定多项式，但同一问题域）

Hu Y., Sun J. 等：Distributed model predictive control based on neighborhood optimization for thickness and tension control system in tandem cold rolling mill（ISA Transactions，DOI: 10.1016/j.isatra.2021.12.030）。这是同一类“连轧厚度–张力 + 分布式 MPC”问题域的代表文献，可作为你方法章节的背景对照。 (Europe PMC)

3) 若你想正面引用“多项式/基函数参数化”这件事：PFC（Predictive Functional Control）

PFC 的经典表述就是：操纵量（MV）用若干“基函数”加权求和表示；并且当参考/扰动用多项式叠加表示时，MV 也用类似的“基本函数”（可以是多项式）来表示——这正好能给你“用多项式参数化轨迹”提供方法论支撑：

Haber, Rossiter, Zabet 的 PFC 教程稿里明确写到：MV 是“weighted basis functions”的和；且当参考是“sum of polynomial functions”时，MV 由类似基本函数构成，需要最小化二次型误差。 (White Rose Research Online)

PFC 在“连轧/多机架冷轧”上的直接应用

Friebel, Zabet, Haber, Jelali (2015)：Predictive functional control of tandem cold metal rolling，IEEE CCA 2015（会议论文，DOI: 10.1109/CCA.2015.7320649）。这是“tandem cold rolling”场景下用 PFC（本质是基函数/参数化）的一篇直接应用文献。 (DBLP)
Zhi (2013)：Predictive Functional Control for Tension of Cold Continuous Rolling Mill（Springer 会议论文），是冷连轧张力控制里用 PFC 的例子。 (Springer Nature)

我这次检索到的“空缺点”（避免你引用时踩坑）

我没有检索到一篇公开可直接读到全文、并明确写成“five-stand tandem cold rolling + MPC + within-interval quadratic/cubic polynomial parameterization of roll gap/speed”这种一模一样表述的论文。现有更常见的是：

用 Laguerre/正交基函数给控制序列降维（连轧 DMPC/博弈里有明确例子）；(ScienceDirect)
或者直接用分段常值/分段线性（本质上也是“0/1 次多项式”）。

如果你愿意，我也可以顺手把你论文里“多项式参数化”的动机段落改成更像“站得住脚的引用链”写法：
“连轧在线优化通常采用基函数参数化降维（Laguerre/PFC），本文选用低阶多项式基函数实现区间内平滑与约束可检验”——把上面这些引用自然嵌进去即可。

Answer 5

这段 不需要“额外某个新公式”，它本质上是在定义可行域集合 $\Omega_i$ ：把你前面已经写过的预测方程和约束（输入上下界、增量轨迹上下界、输入递推）统一打包成一个集合，方便后面写成 $\min_{\mathbf\Gamma_i\in\Omega_i} J_i(\cdot)$ 。

为什么显示成问号（??）

LaTeX 里 \eqref{...} 显示 ?? / 问号，几乎都是交叉引用没解析成功，常见原因就这几类：

没有编译两遍
交叉引用需要至少 pdflatex 编译两次（或用 latexmk 自动多次编译）。第一次生成 .aux，第二次才能把编号填回去。
label 名字对不上/写错了
比如你引用了 eq:rollout_mpc_game，但前面实际 \label{eq:rollout_mpc_game} 不存在，或者少了一个下划线/多了空格。
\label 放的位置不对
最好确保写法是：

latex
\begin{equation}
...公式...
\label{eq:rollout_mpc_game}
\end{equation}

\label 要放在 equation 环境里面（通常放在公式后面最稳）。

引用的公式在另一个文件但没被编译进来
比如你用 \includeonly{...} 或者分文件编译时，含 label 的那部分没被编译。
辅助文件坏了/缓存没更新
Overleaf 可以点 Recompile from scratch；本地就删掉 .aux .toc .lof .lot .out 再编译两遍。

这段“要什么公式”才更规范（两种写法）

写法 A：你现在这样“用 \eqref 打包”是对的（推荐）

你这段集合定义没问题，前提是这些 label 都能找到并成功编译。

写法 B：如果你不想依赖 \eqref（避免问号），就把约束展开写出来

比如写成（示意，按你文中的约束原样展开）：

latex
\begin{equation}
\Omega_i \triangleq
\left\{\mathbf{\Gamma}_i\ \middle|\ 
\begin{aligned}
&\Delta \hat{x}_i(t_{n+s+1})=
\Delta \hat{x}_i(t_{n+s})+\mathcal N_i(\cdot),\ s=0,\ldots,N_p-1,\\
&u_{i,\min}\le u_i(t_{n+s})\le u_{i,\max},\ s=0,\ldots,N_p-1,\\
&\Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max},
\ \forall\tau\in[0,\delta_{n+s}],\\
&u_i(t_{n+s})=u_i(t_{n+s-1})+\Delta u_i(t_{n+s}),\ s=0,\ldots,N_p-1
\end{aligned}
\right\}.
\end{equation}

这样即使引用编号出问题，也不会出现 ??。

你现在最该做的快速排查

确认你前面那几个公式真的有：
\label{eq:rollout_mpc_game}、\label{eq:u_abs_clean}、\label{eq:du_traj_clean}、\label{eq:u_prop_clean}
清理辅助文件后 编译两遍
若仍然 ??：把其中一个 label（比如 eq:rollout_mpc_game）在全文里搜一下，看是不是拼写不一致或重复定义

如果你把那四个被引用公式的 \begin{equation}...\label{...}...\end{equation} 原文各贴一小段，我可以直接帮你定位到底是哪一个 label 出错。

Answer 6

对，这些 \eqref{...} 必须在前文真的存在对应的 \label{...}，否则就会显示 ??（你说的“问号”）。

1）你这几个 label 应该在前面哪里出现？

eq:u_abs_clean、eq:du_traj_clean、eq:u_prop_clean
这三个在你原稿里确实已经有（你贴的第 4.2 小节里就有：绝对输入约束、增量轨迹约束、输入递推）。
eq:rollout_mpc_game
这个很可能是我给你改写时新增的“game-coupled form”的编号名。如果你前面实际还叫 eq:rollout_mpc_clean，那这里引用就会找不到，直接 ??。

✅ 所以最常见的情况是：你文里只有

latex
\label{eq:rollout_mpc_clean}

但你在集合里引用了

latex
\eqref{eq:rollout_mpc_game}

——名字不一致就会问号。

2）怎么确认是不是“前面没有这个标号”？

在 LaTeX 源码里全文搜索：

搜 \label{eq:rollout_mpc_game}
- 找不到：那就说明前面确实没有这个标号（所以一定 ??）。
同理搜 \label{eq:u_abs_clean} 等。

3）解决办法（任选其一）

办法 A（最简单）：把引用名改回你文里已有的 label
如果你预测式仍然用的是 eq:rollout_mpc_clean，那就把集合里这一句改成：

latex
\eqref{eq:rollout_mpc_clean}\ \text{holds and}\ 
\eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}\ \text{are satisfied}

办法 B：在你的预测公式下面补上对应 label
确保预测式写成这样（label 名要和你引用的一致）：

latex
\begin{equation}
...  % 你的预测滚动方程
\label{eq:rollout_mpc_game}
\end{equation}

4）别忘了：编译至少两遍

即使 label 都对，第一次编译也可能还是 ??，第二次才会正常。
如果你改完仍旧 ??，通常是：

label 名字拼写不一致 / 多了空格 / 少了下划线
或者你在分文件编译，含 \label 的那一章没被编译进来
或者辅助文件没更新（删 .aux 再编译两遍）

如果你把你“滚动预测那条公式”的 \begin{equation}...\label{...} 那一小段贴出来（只要那几行），我可以直接告诉你：你现在应该引用 eq:rollout_mpc_clean 还是把它改成 eq:rollout_mpc_game。

Answer 7

在我给你“新改的那版”里，这三个约束我没有改名——也就是说：

绝对输入约束仍然叫 \label{eq:u_abs_clean}
区间内增量轨迹约束仍然叫 \label{eq:du_traj_clean}
输入递推/传播仍然叫 \label{eq:u_prop_clean}

也就是你在可行域里写的

latex
\eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}

理论上是不用动的，前提是你原文那三条约束下面确实分别有对应的 \label{...}，而且拼写一致。

这三个分别对应你原文哪里（方便你核对）

eq:u_abs_clean：你原文的 “Absolute input bounds” $u_{i,\min}\le u_i(t_{n+s})\le u_{i,\max}$
eq:du_traj_clean：你原文的 “Increment trajectory bounds for all $\tau$ ” $\Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max}$
eq:u_prop_clean：你原文的 “propagate the absolute input sequence” $u_i(t_{n+s})=u_i(t_{n+s-1})+\Delta u_i(t_{n+s})$

你现在真正容易出问题的是哪个？

大概率是 eq:rollout_mpc_game（新加/新命名的），而不是这三个老标签。

如果你前面预测滚动方程还是原来的 \label{eq:rollout_mpc_clean}，那可行域里就把 eq:rollout_mpc_game 改回 eq:rollout_mpc_clean 就行。

把你滚动预测那条公式附近的 \label{...} 那两三行贴出来，我可以直接告诉你该引用哪个名字，避免你来回试。

Answer 8

你这段里出现“有的公式标号没有这个公式/显示??”，本质原因只有一个：你引用的 label（比如 eq:u_abs_clean）在当前编译出来的文档里没有成功定义（或定义没被编译进来），所以 LaTeX 只能给 ??。

结合你贴的代码，最常见的情况是下面几种：

1）你引用的这三个 label 在“新文件/新章节”里其实没被编译进来

你在这里引用了：

eq:u_abs_clean
eq:du_traj_clean
eq:u_prop_clean

但这三个约束不在你贴的这一段里，而是在前面章节（你原稿 4.2）里。

如果你现在是分文件写作（比如 main.tex + chap4.tex），就会出现这种情况：

你在 chap4.tex 引用 eq:u_abs_clean
但 main.tex 里并没有 \include{chapX} 把包含这些 \label{eq:u_abs_clean} 的文件编进来
或者你用了 \includeonly{chap4} 导致前面的章节没编译

✅ 解决：

确认包含那三个约束的章节文件确实被 \include{...} / \input{...} 进主文件，并且不在 \includeonly 排除名单里。

2）label 名字在你前文里改过，但这里仍用旧名字

比如你前面其实写的是：

latex
\label{eq:u_abs}

但这里引用的是 eq:u_abs_clean，那就必然 ??。

✅ 解决：

全文搜索 \label{eq:u_abs_clean}、\label{eq:du_traj_clean}、\label{eq:u_prop_clean}
找不到就说明“前面没有这个标号”，把这里的 \eqref{...} 改成你真实存在的 label 名。

3）编译次数不够 / 辅助文件没更新

交叉引用需要至少编译两遍（第一次生成 .aux，第二次回填编号）。

✅ 解决：

连续编译两次（Overleaf 也一样）
还不行就“从头编译”：删除 .aux/.out/.toc 等或 Overleaf 选择 Recompile from scratch

4）最稳的“自检方法”

看编译日志（log）里会有这种提示：

LaTeX Warning: Reference 'eq:u_abs_clean' on page ... undefined

这会直接告诉你到底哪个 label 不存在。

5）如果你想让这一节完全自洽（不依赖前文），可以把三条约束在这里再写一遍并打 label

但注意：不要和前文重复 label，否则会变成 “Label multiply defined”。

如果你确实要在这一节重写约束，建议改成新名字，例如：

latex
\begin{equation}
u_{i,\min} \le u_i(t_{n+s}) \le u_{i,\max},\ s=0,\ldots,N_p-1.
\label{eq:u_abs_game}
\end{equation}

\begin{equation}
\Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max},
\ \forall\tau\in[0,\delta_{n+s}].
\label{eq:du_traj_game}
\end{equation}

\begin{equation}
u_i(t_{n+s}) = u_i(t_{n+s-1}) + \Delta u_i(t_{n+s}),\ s=0,\ldots,N_p-1.
\label{eq:u_prop_game}
\end{equation}

然后把你可行域集合里的引用同步改成 ..._game。

你这份代码里，`eq:rollout_mpc_game` 已经定义了

你贴的滚动预测式下面有：

latex
\label{eq:rollout_mpc_game}

所以它不会是 ?? 的来源；真正容易 ?? 的就是那三个“前文约束”的 label。

如果你把那三条约束在你全文里实际写的那几行（含 \label{...}）贴出来，我可以直接告诉你：这里应该引用什么名字、或你主文件哪里没 include 进来。

Answer 9

可以的。你现在这节里既然“新版前面没写约束”，那就把约束在这一节重新写一遍并重新打 label（不要再用 ..._clean，避免和旧稿/别处冲突）。下面这段你可以直接粘贴到你那句 “We keep the same constraints …” 的位置（或者紧跟在 \eqref{eq:coupling_cost} 后面）。

latex
\paragraph{Constraints and feasible set.}
We enforce the same types of constraints as in the practical mill: absolute-input bounds and within-interval increment bounds.
To keep this section self-contained, we restate them here.

\emph{Absolute input bounds (roll gap and speed):}
\begin{equation}
u_{i,\min}\le u_i(t_{n+s})\le u_{i,\max},
\qquad s=0,\ldots,N_p-1,
\label{eq:u_abs_game}
\end{equation}
where $u_i(t)=[s_i(t),\,v_i(t)]^\top$.

\emph{Within-interval increment-trajectory bounds (for all $\tau$):}
\begin{equation}
\Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max},
\qquad \forall\tau\in[0,\delta_{n+s}],\ s=0,\ldots,N_p-1,
\label{eq:du_traj_game}
\end{equation}
where $\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})$ is given by the polynomial parameterization.

\emph{Consistency with discrete execution (interval average and propagation):}
Define the interval-averaged increment
\begin{equation}
\Delta u_i(t_{n+s})
=
\frac{1}{\delta_{n+s}}\int_{0}^{\delta_{n+s}}\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\,d\tau
=
\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\frac{\delta_{n+s}}{2}
+\Gamma_{i,n+s,2}\frac{\delta_{n+s}^2}{3},
\label{eq:du_avg_game}
\end{equation}
and propagate the absolute input along the horizon by
\begin{equation}
u_i(t_n)=u_i(t_{n-1})+\Delta u_i(t_n),\qquad
u_i(t_{n+s})=u_i(t_{n+s-1})+\Delta u_i(t_{n+s}),\ s=1,\ldots,N_p-1,
\label{eq:u_prop_game}
\end{equation}
where $u_i(t_{n-1})$ is the applied (measured) input from the previous sampling instant.

\emph{Compact feasible set:}
\begin{equation}
\Omega_i \triangleq
\Big\{\mathbf{\Gamma}_i\ \Big|\ 
\eqref{eq:rollout_mpc_game}\ \text{holds and}\
\eqref{eq:u_abs_game},\eqref{eq:du_traj_game},\eqref{eq:u_prop_game}\ \text{are satisfied}
\Big\}.
\label{eq:Omega_i_game}
\end{equation}

你这样改的好处

这一节自洽：不再依赖“前面某处的 eq:u_abs_clean 等 label”，也就不会出现 ??。
label 全是新的：eq:u_abs_game / eq:du_traj_game / eq:u_prop_game，不怕和旧稿重复。

最后提醒一句：改完后至少编译两遍（或 Overleaf “Recompile from scratch”），引用号才能稳定显示。

Answer 10

可以。你这第三部分已经把“残差网络 + 前向/后向 + MSRP”写得很完整了；老师要你补的其实是再加一小节，把你这个网络结构从“机理模型（连续时间动力学）→演化算子→残差网络时间积分器”的角度串起来，让读者觉得它不是“黑盒拟合”，而是在结构上能对上机理。

下面我给你一段可直接粘贴进第 3 节的新小节（建议放在你式 \eqref{eq:res_predict_clean} 和 \eqref{eq:aux_clean} 后面，作为一个新 \subsection 或 \paragraph）。写法是仿照你发的那篇 aero-engine 文章里“ResNet ≈ 演化算子/时间积分器、残差≈增量、gResNet 用辅助算子处理较大时间步长、MSRP 用前后向一致性稳定长预测”的逻辑来写的：ResNet 作为时间积分器的解释、残差对应增量/积分项、gResNet 的辅助算子思想 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)，以及 MSRP 前后向回环误差的定义 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)。

latex
%========================
\subsection{Mechanism-consistent interpretation and explainability of the residual network}
\label{subsec:mech_interp}

\paragraph{Mechanistic viewpoint: coupled continuous-time dynamics and evolution operator.}
Although the five-stand cold rolling mill involves complex rolling deformation and tension-transport coupling,
its stand-wise deviation dynamics can be abstractly described by a coupled nonlinear ODE:
\begin{equation}
\frac{d}{dt}\Delta x_i(t)
=
f_i\!\Big(\Delta x_i(t),\,\Delta x_{Z_i}(t),\,u_i(t),\,d_i(t)\Big),
\qquad i=1,\ldots,5,
\label{eq:mech_ode}
\end{equation}
where $f_i(\cdot)$ summarizes the mechanistic effects (plastic deformation, strip elasticity, friction, transport, etc.).
For a given sampling interval length $\delta_n$ and a within-interval input trajectory $u_i(t_n+\tau)$,
the state transition over one interval can be written through an evolution operator:
\begin{equation}
\Delta x_i(t_{n+1})=\Phi_{i,\delta_n}\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,u_i([t_n,t_{n+1}]),\,d_i([t_n,t_{n+1}])\Big).
\label{eq:evolution_operator_mill}
\end{equation}
By the fundamental theorem of calculus, \eqref{eq:mech_ode} implies the increment form
\begin{equation}
\Delta x_i(t_{n+1})
=
\Delta x_i(t_n)
+
\underbrace{\int_{0}^{\delta_n}
f_i\!\Big(\Delta x_i(t_n+\tau),\,\Delta x_{Z_i}(t_n+\tau),\,u_i(t_n+\tau),\,d_i(t_n+\tau)\Big)\,d\tau}_{\triangleq\ \varphi_{i,n}},
\label{eq:increment_integral}
\end{equation}
where $\varphi_{i,n}$ is the one-interval state increment generated by the mechanistic dynamics.

\paragraph{Why the residual structure is interpretable (network output $\approx$ mechanistic increment).}
Our learned model \eqref{eq:learned_dyn_clean} adopts the same increment form as \eqref{eq:increment_integral}:
\begin{equation}
\Delta x_i(t_{n+1})
\approx
\Delta x_i(t_n)
+
\mathcal{N}_i\!\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,\Gamma_{i,n},\,\delta_n;\Theta_i\Big).
\end{equation}
Here, $\mathcal{N}_i(\cdot)$ plays the role of a data-driven approximation of the integral increment $\varphi_{i,n}$,
i.e., it approximates the accumulated effect of the mechanistic dynamics over $[t_n,t_{n+1}]$.
This is consistent with the well-known interpretation that a residual network behaves like a one-step time integrator:
the identity (shortcut) path propagates the current state, while the residual branch represents the increment over the time lag.

\paragraph{Control-trajectory parameterization matches the integral dependence.}
Inside each interval, we do not optimize point-wise $u_i(t)$ but parameterize the increment trajectory by
$\Delta u_{i,n}(\tau;\Gamma_{i,n})$.
Hence, the mechanistic increment $\varphi_{i,n}$ in \eqref{eq:increment_integral} depends on the \emph{whole} within-interval trajectory.
Feeding $(\Gamma_{i,n},\delta_n)$ into $\mathcal{N}_i$ is therefore a compact way to represent how different candidate
gap/speed trajectories change the integral effect and thus the next thickness--tension state.

\paragraph{Auxiliary branch as a learnable ``coarse physics'' predictor.}
When $\delta_n$ varies and is not very small, directly learning $\varphi_{i,n}$ may be harder.
Motivated by the generalized residual idea (output = auxiliary prediction + residual correction),
we decompose the increment predictor into two parts in \eqref{eq:aux_clean}:
\begin{equation}
\mathcal{N}_i(\cdot)=\psi_i(\cdot)+\rho_i(\cdot).
\end{equation}
Conceptually, $\psi_i(\cdot)$ captures low-frequency / scale effects strongly related to $\delta_n$
(e.g., coarse drift dominated by interval length or operating regime),
while $\rho_i(\cdot)$ captures the remaining nonlinear coupling corrections.
This provides a mechanism-consistent interpretation: a baseline increment (coarse integrator) plus a residual correction
that compensates unmodeled nonlinearities and inter-stand coupling.

\paragraph{Backward model and reciprocal consistency as an approximate invertibility constraint.}
In mechanistic dynamics, evolving forward by $\delta_n$ and then evolving backward by the same lag should approximately return to the initial state
(up to disturbances and numerical errors). Therefore, we introduce a backward residual model and a multi-step reciprocal-consistency loss.
This can be interpreted as enforcing a data-driven approximate inverse of the learned evolution operator, which suppresses long-horizon drift
and improves rollout stability.

\begin{remark}
The above interpretation links the proposed residual network to the mechanistic state transition:
the shortcut corresponds to state propagation, and the learned residual corresponds to the integrated effect of nonlinear rolling and tension coupling
over one sampling interval. Hence the network is not an arbitrary black box but a structured approximation of the mill evolution operator.
\end{remark}

你需要做的两点小改动/提示

你文里已经有 $\Delta u_{i,n}(\tau;\Gamma_{i,n})$ 的定义，所以在上面“Control-trajectory parameterization”那段不会冲突；如果你愿意更严谨，可在 \eqref{eq:increment_integral} 里把 $u_i(t_n+\tau)$ 写成 $u_i(t_n)+\Delta u_{i,n}(\tau;\Gamma_{i,n})$ （可选）。
aero-engine 那篇文章里对应的“演化算子—残差网络—增量—gResNet—MSRP”逻辑来源分别在：演化算子定义 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)、ResNet 输出与残差 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)、增量 $\varphi_\Delta$ 与 gResNet 辅助算子 $\eta$ (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)、MSRP 前后向回环误差 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)。你可以在论文里按需要加一两句 “inspired by …” 并配上对应参考文献（不一定要引用整篇，只要引用 ResNet-as-integrator / gResNet / MSRP 的那几篇也行）。

如果你把你论文参考文献里准备怎么引用那篇 aero-engine（或你想改成引用它的[27][32]那条链），我也可以帮你把这段里该插的 \cite{...} 一并补到位，保证格式统一、审稿读起来更顺。

Answer 11

下面给你一段中文 + 可直接粘贴的 LaTeX 小节，用“机理模型 → 演化算子（时间积分器）→ 残差网络增量”的逻辑，把你第 3 部分的网络结构和机理模型对上，从而更好解释“可解释性”。写法思路参考你给的那篇文章：先给“演化算子定义”，再说明“ResNet 可看作时间积分器/学习增量”，再说明“广义残差（辅助算子）对应你这里的辅助分支”，最后用“前后向 reciprocal/MSRP”解释长时预测稳定性。对应依据在该文对演化算子、ResNet 增量形式、gResNet 辅助算子、MSRP 的描述里都有明确表述。(State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)

latex
%========================
\subsection{残差网络的机理一致性解释与可解释性}
\label{subsec:mech_interp_cn}
%========================

\paragraph{机理视角：五机架耦合动力学与“演化算子”。}
尽管五机架冷轧过程包含复杂的变形、摩擦与张力传递机理，但从系统论角度，
其机架 $i$ 的偏差状态可抽象为一个耦合非线性动力学方程：
\begin{equation}
\frac{d}{dt}\Delta x_i(t)=
f_i\!\Big(\Delta x_i(t),\,\Delta x_{Z_i}(t),\,u_i(t),\,d_i(t)\Big),\qquad i=1,\ldots,5,
\label{eq:mech_ode_mill}
\end{equation}
其中 $f_i(\cdot)$ 汇总了轧制变形、材料弹性、运输与张力传播等机理效应。
对给定采样间隔长度 $\delta_n$ 与区间内输入轨迹 $u_i(t_n+\tau)$，
系统从 $t_n$ 到 $t_{n+1}$ 的状态映射可用“演化算子/状态转移算子”表示为
\begin{equation}
\Delta x_i(t_{n+1})=
\Phi_{i,\delta_n}\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,u_i([t_n,t_{n+1}]),\,d_i([t_n,t_{n+1}])\Big).
\label{eq:evolution_operator_mill}
\end{equation}
从微分方程的基本积分关系出发，可得一步增量形式
\begin{equation}
\Delta x_i(t_{n+1})
=
\Delta x_i(t_n)
+
\underbrace{\int_{0}^{\delta_n}
f_i\!\Big(\Delta x_i(t_n+\tau),\,\Delta x_{Z_i}(t_n+\tau),\,u_i(t_n+\tau),\,d_i(t_n+\tau)\Big)\,d\tau}_{\triangleq\ \varphi_{i,n}},
\label{eq:increment_integral_mill}
\end{equation}
其中 $\varphi_{i,n}$ 表示在一个采样区间内由机理动力学累积得到的状态增量。

\paragraph{残差结构的可解释性：网络输出近似“机理增量”。}
本文的学习模型采用
\begin{equation}
\Delta x_i(t_{n+1})
\approx
\Delta x_i(t_n)
+
\mathcal{N}_i\!\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,\Gamma_{i,n},\,\delta_n;\Theta_i\Big),
\end{equation}
其结构与 \eqref{eq:increment_integral_mill} 完全同形：快捷支路传递当前状态，
而网络分支输出对应“一步增量” $\varphi_{i,n}$ 的数据驱动近似。
因此该残差网络并非任意黑盒映射，而是在结构上逼近机理系统的区间演化算子：
学习的是“从 $t_n$ 到 $t_{n+1}$ 的累积效应”，而不是直接拟合状态本身。

\paragraph{控制轨迹参数化与机理积分依赖的一致性。}
机理增量 $\varphi_{i,n}$ 本质依赖于区间内的整个输入轨迹 $u_i(t_n+\tau)$。
本文将区间内增量轨迹参数化为 $\Delta u_{i,n}(\tau;\Gamma_{i,n})$，
并把 $(\Gamma_{i,n},\delta_n)$ 作为网络输入的一部分，
等价于用低维参数对“输入轨迹对积分增量的影响”进行编码，
从而支持 MPC 在优化不同候选轨迹时进行“反事实预测”（不同 $\Gamma$ 对应不同演化结果）。

\paragraph{辅助分支的机理含义：粗尺度增量 + 残差修正。}
当采样间隔 $\delta_n$ 变化或不够小，直接学习 $\varphi_{i,n}$ 的难度增大。
因此本文采用
\begin{equation}
\mathcal{N}_i(\cdot)=\psi_i(\cdot)+\rho_i(\cdot),
\end{equation}
可解释为：$\psi_i(\cdot)$ 学习与 $\delta_n$ 强相关的“粗尺度/低频”增量（类似一个可学习的基准积分器），
$\rho_i(\cdot)$ 再对剩余的耦合非线性与未建模效应做残差补偿。
该分解使得网络内部具有“基准预测 + 误差修正”的结构化意义，有助于解释与泛化。

\paragraph{前后向模型与互逆一致性：抑制长时递推漂移。}
在理想机理动力学下，若忽略扰动与数值误差，前向推进再按同一时间尺度回推应近似回到初值。
因此本文引入后向残差模型并施加多步“前后向一致性/互逆一致性”约束，
可解释为：在数据驱动框架下对演化算子施加近似可逆（或近似互逆）正则，
从而抑制长时递推误差累积并提升多步预测稳定性。

\begin{remark}
综上，本文残差网络的快捷支路与“状态传递”一致，网络分支对应“区间内机理效应的累积增量”，
辅助分支对应“粗尺度积分效应”，前后向一致性约束对应“演化算子互逆结构”的稳定性要求。
因此，该网络结构在机理层面具有明确的对应关系与可解释性。
\end{remark}

如果你希望“对比更像老师想要的那种”，我还能再给你补一段对照段落：
把“传统机理模型离散化（如欧拉/RK4）的一步形式”写成 $\Delta x_{n+1}=\Delta x_n + \delta_n f(\cdot)$ 或 $\Delta x_{n+1}=\Delta x_n + \int f(\cdot)$ ，然后一句话点明：你这个 ResNet 残差块就相当于把积分项/增量项用网络替代（这会更像“机理可匹配”的对比）。

Answer 12

可以。你“之前写的那部分”主要是在讲：网络要学什么（一步演化/残差）、输入输出维度、残差结构与辅助分支、前向/后向网络、MSRP+rollout 等训练目标。所以标题建议突出“结构设计 + 训练方法”。

给你几个可选标题（按你论文风格从简到更学术）：

残差神经网络结构设计与训练方法（最通用、最稳）
面向五机架厚度–张力预测的残差网络建模与训练（强调应用对象）
控制依赖的一步残差预测器构建与多步互逆一致性训练（把 MSRP 亮出来）
前向–后向残差网络与多步递推稳定训练策略（突出 forward/backward + 稳定性）
基于残差学习的离散区间演化建模：结构、目标与训练（更“论文味”）

如果你后面新增那部分标题叫“机理一致性解释与可解释性”，那我最推荐搭配用：
“残差神经网络结构设计与训练方法” + “机理一致性解释与可解释性”。

Answer 13

下面给你把“可解释性/机理一致性”这一节按 Hu & Sun 这篇五机架冷轧机理模型来重写（你可以直接作为第三部分的第二小节/后一小节粘贴）。核心思路是：** 你的残差网络学的就是机理模型离散化后的一步“增量/演化算子”；而且你的输入（邻居状态 + $\Gamma,\delta$ ）在机理模型里分别对应耦合项（相邻张力等）和执行机构/控制量（辊缝、速度）**。

机理模型依据：该文给出了出口厚度增量与辊缝/张力等的关系式(3) (1-s2.0-S0959152420303371-main.pdf)、张力动力学(15) (1-s2.0-S0959152420303371-main.pdf)、HGC/ASR 一阶执行机构模型(16)(17) (1-s2.0-S0959152420303371-main.pdf)，以及子系统状态空间模型(18)与状态/输入/扰动的组成 (1-s2.0-S0959152420303371-main.pdf)。

latex
%========================
\subsection{与机理模型的一致性解释与可解释性（基于五机架冷轧机理模型）}
\label{subsec:mech_interp_mill}
%========================

\paragraph{机理模型回顾：厚度--张力耦合与执行机构动力学。}
针对五机架冷轧过程，文献中基于轧制基本理论建立了厚度与张力的增量模型与子系统状态空间模型。
其中，机架出口厚度增量可写为
\begin{equation}
\delta h_{\mathrm{out},i}
=
e_{1i}\delta S_i + e_{2i}\delta h_{\mathrm{in},i}
+ e_{3i}\delta T_{\mathrm{in},i} + e_{4i}\delta T_{\mathrm{out},i},
\label{eq:mech_thickness_inc}
\end{equation}
表明出口厚度在局部上受辊缝变化与入口厚度、入口/出口张力等共同影响（厚度--张力强耦合）。

张力动力学由胡克定律、质量流守恒与滑移关系推导后，可整理为相邻机架耦合的微分方程形式，
其核心特点是：$\delta T$ 的变化不仅与本机架量有关，还显式包含相邻机架张力、速度与辊缝等耦合项。
例如文献中给出
\begin{equation}
\frac{d(\delta T_{\mathrm{in},i})}{dt}
= \xi_1 \delta T_{\mathrm{in},i-1} + \xi_2 \delta T_{\mathrm{in},i} + \xi_3 \delta T_{\mathrm{in},i+1} + \cdots,
\label{eq:mech_tension_dyn}
\end{equation}
体现了典型的“相邻耦合链”结构。

此外，辊缝与速度并非理想瞬时执行，文献将 HGC（辊缝）与 ASR（速度）建模为一阶环节：
\begin{equation}
\dot S_i = \frac{U_{S_i}}{\tau_s} - \frac{S_i}{\tau_s},\qquad
\dot V_i = \frac{U_{V_i}}{\tau_V} - \frac{V_i}{\tau_V},
\label{eq:mech_actuator_1st}
\end{equation}
即控制指令（参考）$U_{S_i},U_{V_i}$ 经过时间常数 $\tau_s,\tau_V$ 的动态后才体现为实际辊缝/速度。

基于上述关系，文献把子系统 $i$ 写成连续时间状态空间模型
\begin{equation}
\dot x_i(t)=A_i x_i(t)+B_i u_i(t)+W_{i1} d_i(t),\qquad
y_i(t)=C_i x_i(t)+D_i u_i(t)+W_{i2} d_i(t),
\label{eq:mech_ss}
\end{equation}
并明确给出状态、输入与扰动向量的组成，
其中扰动项 $d_i(t)$ 中包含相邻机架张力、相邻辊缝、相邻速度及入口厚度等耦合量，
从结构上说明了“邻居变量进入本机架动力学”的机理来源。

\paragraph{残差网络的机理一致性：学习离散化后的“一步增量/演化算子”。}
将机理模型 \eqref{eq:mech_ss} 在采样时刻 $t_n$ 离散化（或等价地写成积分形式），可得到一步映射
\begin{equation}
x_i(t_{n+1}) = x_i(t_n) + \int_{0}^{\delta_n}
\Big(A_i x_i(t_n+\tau)+B_i u_i(t_n+\tau)+W_{i1} d_i(t_n+\tau)\Big)\,d\tau.
\label{eq:mech_integral}
\end{equation}
该式揭示：从 $t_n$ 到 $t_{n+1}$ 的变化量本质上是“机理动力学在一个区间内的累积效应”。
因此，把学习模型写成
\begin{equation}
\Delta x_i(t_{n+1}) \approx \Delta x_i(t_n)
+\mathcal N_i\!\big(\Delta x_i(t_n),\Delta x_{Z_i}(t_n),\Gamma_{i,n},\delta_n;\Theta_i\big)
\end{equation}
在结构上与 \eqref{eq:mech_integral} 同形：
快捷支路对应“当前状态传递”，而网络输出对应“一步增量”（即积分项/离散化增量）的数据驱动近似。
这使得残差网络具有明确的物理含义：它并非直接黑盒拟合状态，而是在逼近
\emph{厚度--张力耦合 + 执行机构动态 + 邻居耦合扰动} 共同作用下的区间累积增量。

\paragraph{为什么输入必须包含邻居状态：对应机理模型中的耦合扰动项。}
在机理模型中，张力动力学显式包含相邻张力项（例如 \eqref{eq:mech_tension_dyn} 中的 $\delta T_{\mathrm{in},i\pm 1}$），
且状态空间模型的扰动向量 $d_i(t)$ 亦包含相邻机架的张力、辊缝与速度等量。
因此，你在网络输入中引入 $\Delta x_{Z_i}(t_n)$ 的做法，
等价于在数据驱动离散映射中“显式暴露”机理耦合通道，
使网络能够学习到来自相邻机架的张力传播与运输耦合效应，而不是把它们隐含地当作噪声。

\paragraph{为什么输入要包含 $(\Gamma_{i,n},\delta_n)$：对应执行机构与区间内输入轨迹的机理依赖。}
机理模型指出辊缝/速度通过一阶环节响应控制指令 \eqref{eq:mech_actuator_1st}，
因此同样的采样起点在不同的区间长度 $\delta_n$ 下会产生不同的“有效输入累积效应”。
你的多项式参数 $\Gamma_{i,n}$ 给出了区间内增量轨迹的低维表示，
与 \eqref{eq:mech_integral} 中“区间内输入 $u_i(t_n+\tau)$ 的积分累积”相匹配；
而显式输入 $\delta_n$ 则让网络能够学习执行机构时间常数与采样周期共同决定的尺度效应。

\paragraph{辅助分支与前后向一致性的物理解释。}
（1）$\psi_i(\cdot)+\rho_i(\cdot)$ 的分解可解释为：
$\psi_i$ 捕捉与 $\delta_n$、执行机构动态强相关的“粗尺度增量”，
$\rho_i$ 再学习剩余的耦合非线性修正（如厚度--张力非线性、参数漂移等）。
（2）引入后向网络并施加多步互逆一致性（MSRP）可理解为对离散演化算子施加“近似可逆/一致性”结构约束，
以抑制递推预测的漂移累积，提升长时域滚动预测的稳定性。

\begin{remark}
综上，你的网络结构与机理模型在三个层面可对齐：
(i) 残差形式对应机理离散化后的区间增量；
(ii) 邻居状态输入对应机理耦合通道（张力传播与扰动向量中的相邻量）；
(iii) $(\Gamma,\delta)$ 对应执行机构动态与区间内输入累积效应。
因此该模型具备“结构可解释性”，而非纯黑盒回归。
\end{remark}

你接下来要做的两点“对齐”

你文中用的是 $\Delta x_i=[\Delta h_i,\Delta T_{i-1},\Delta T_i]^\top$ 。上面机理模型用了 $\delta h_{\text{out},i}$ 、 $\delta T_{\text{in},i}$ 这类符号。你可以在正文加一句符号对应（不必改全篇）：
$\delta h_{\text{out},i}\leftrightarrow \Delta h_i$ ， $\delta T_{\text{in},i}$ 与你定义的相邻张力偏差 $\Delta T_{i-1}$ 或 $\Delta T_i$ 对应即可。引用机理来源时就用文献符号。
上面这段没有强行把文献的 $x_i=[\delta T_{\text{in},i},\delta S_i,\delta V_{i-1}]^\top$ 搬到你文里，只是用它说明：耦合来自哪里、执行机构动态是什么、离散增量为什么合理。这样最省改动，也最容易让老师认可“能从机理上对上”。

如果你把你论文里“机理模型那一节”准备怎么写（是引用 Hu&Sun 这篇，还是你自己写的五机架模型），我还可以把上面这段里的 $\eqref{eq:mech_thickness_inc}\sim\eqref{eq:mech_ss}$ 改成直接引用你论文内部的机理公式编号，做到全文编号完全一致。

问题

The stand-wise deviation-state evolution over [tn,tn+1][t_n,t_{n+1}][tn​,tn+1​] can be expressed by a discrete-time mapping \begin{equation} \Delta x_i(t_{n+1})

\paragraph{Vector quadratic polynomial parameterization (two inputs).} On the interval [tn,tn+1][t_n,t_{n+1}][tn​,tn+1​], parameterize the control increment trajectory as \begin{equation} \Delta u_{i,n}(\tau;\Gamma_{i,n})

Define the multi-step reciprocal prediction error \begin{equation} E_i(t_n)

\paragraph{Learned forward predictor used in control.} After training, the forward predictor is \begin{equation} \Delta \hat{x}i(t{n+1})

\paragraph{Local polynomial parameterization over the horizon.} Over each interval [tn+s,tn+s+1][t_{n+s},t_{n+s+1}][tn+s​,tn+s+1​] with length δn+s\delta_{n+s}δn+s​, the control increment trajectory of stand iii is \begin{equation} \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\paragraph{Decision variables.} At time tnt_ntn​, the local decision vector for stand iii is \begin{equation} \mathbf{\Gamma}_i(t_n)

\paragraph{Consistency between within-interval trajectory and discrete execution (interval average).} To update the discrete absolute input and enforce \eqref{eq:u_abs_clean} consistently, we use the interval-averaged increment: \begin{equation} \Delta u_i(t_{n+s})

\frac{1}{\delta_{n+s}}\int_0^{\delta_{n+s}}\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s}),d\tau

\paragraph{Convergence metric.} Define \begin{equation} \varsigma^{(l)}

\frac{1}{\delta_n}\int_0^{\delta_n}\Delta u_{i,n}(\tau),d\tau

思考

Offering revisions for multi-stand game clarity

Clarifying the game-theoretic nature of the model

Enhancing clarity on game structure and solution approach

Refining the optimization objective and game formulation

Clarifying cross-stand tensions and coordination

Clarifying NE and game formulation

回答

1）为什么现在“看不出博弈”

2）最省事的改法：不改求解流程，只把“博弈依赖”写进定义（强烈建议先做这个）

（a）把预测显式写成“策略的函数”

（b）把目标函数写成 payoff（收益）形式：Ji(Γi;Γ−i)J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i})Ji​(Γi​;Γ−i​)

（c）给出 Nash 均衡的标准定义（这是“博弈”最关键的一行）

3）让“冲突/博弈”更一眼可见：加一个“共享张力一致性/耦合惩罚”项（推荐）

做法 A：一致性惩罚（consensus-like）

做法 B：把博弈升级成“广义 Nash”（GNE）也很加分

4）“是不是得改变求解方案？”

在 best-response 更新里加松弛（relaxation）

你可以直接照搬到论文里的“最小改动清单”

问题

思考

回答

你改完后，老师那句“看不出来多个机架博弈”基本就能对上了

问题

思考

回答

这 3 个量分别代表什么？

那机架 1 和 5 为什么也能用 d=3d=3d=3？

可以不是 3 吗？

问题

问题

思考

Explaining basis-function parameterization and polynomial relation

Citing sources and addressing the five-stand question

Crafting a response in Chinese and citing references

Summing up sources and citations

回答

1) 连轧（厚度–张力）DMPC/博弈里用“基函数参数化”（Laguerre）

2) 连轧（厚度–张力）DMPC 方向的对照文献（不一定多项式，但同一问题域）

3) 若你想**正面引用“多项式/基函数参数化”**这件事：PFC（Predictive Functional Control）

PFC 在“连轧/多机架冷轧”上的直接应用

我这次检索到的“空缺点”（避免你引用时踩坑）

问题

思考

Clarifying LaTeX equation issues

Addressing LaTeX formula and reference issues

回答

为什么显示成问号（??）

这段“要什么公式”才更规范（两种写法）

写法 A：你现在这样“用 \eqref 打包”是对的（推荐）

写法 B：如果你不想依赖 \eqref（避免问号），就把约束展开写出来

你现在最该做的快速排查

问题

思考

回答

1）你这几个 label 应该在前面哪里出现？

2）怎么确认是不是“前面没有这个标号”？

3）解决办法（任选其一）

4）别忘了：编译至少两遍

问题

思考

回答

这三个分别对应你原文哪里（方便你核对）

你现在真正容易出问题的是哪个？

问题

Over [tn+s,tn+s+1][t_{n+s},t_{n+s+1}][tn+s​,tn+s+1​] with length δn+s\delta_{n+s}δn+s​, \begin{equation} \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\Gamma_{i,n+s,0} +\Gamma_{i,n+s,1}\tau +\Gamma_{i,n+s,2}\tau^2,\qquad \tau \in [0,\delta_{n+s}], \end{equation} and the interval-averaged increment is \begin{equation} \Delta u_i(t_{n+s})

At time tnt_ntn​, the local strategy of stand iii is \begin{equation} \mathbf{\Gamma}_i(t_n)

We define the stage cost of stand iii as a function of all players' strategies: \begin{equation} J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i})

The stand-wise deviation-state evolution over $[t_n,t_{n+1}]$ can be expressed by a discrete-time mapping
\begin{equation}
\Delta x_i(t_{n+1})

\paragraph{Vector quadratic polynomial parameterization (two inputs).}
On the interval $[t_n,t_{n+1}]$ , parameterize the control increment trajectory as
\begin{equation}
\Delta u_{i,n}(\tau;\Gamma_{i,n})

Define the multi-step reciprocal prediction error
\begin{equation}
E_i(t_n)

\paragraph{Learned forward predictor used in control.}
After training, the forward predictor is
\begin{equation}
\Delta \hat{x}i(t{n+1})

\paragraph{Local polynomial parameterization over the horizon.}
Over each interval $[t_{n+s},t_{n+s+1}]$ with length $\delta_{n+s}$ , the control increment trajectory of stand $i$ is
\begin{equation}
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\paragraph{Decision variables.}
At time $t_n$ , the local decision vector for stand $i$ is
\begin{equation}
\mathbf{\Gamma}_i(t_n)

\paragraph{Consistency between within-interval trajectory and discrete execution (interval average).}
To update the discrete absolute input and enforce \eqref{eq:u_abs_clean} consistently, we use the interval-averaged increment:
\begin{equation}
\Delta u_i(t_{n+s})

\paragraph{Convergence metric.}
Define
\begin{equation}
\varsigma^{(l)}

（b）把目标函数写成 payoff（收益）形式： $J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i})$

那机架 1 和 5 为什么也能用 $d=3$ ？

3) 若你想正面引用“多项式/基函数参数化”这件事：PFC（Predictive Functional Control）

Over $[t_{n+s},t_{n+s+1}]$ with length $\delta_{n+s}$ ,
\begin{equation}
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\tau
+\Gamma_{i,n+s,2}\tau^2,\qquad \tau \in [0,\delta_{n+s}],
\end{equation}
and the interval-averaged increment is
\begin{equation}
\Delta u_i(t_{n+s})

At time $t_n$ , the local strategy of stand $i$ is
\begin{equation}
\mathbf{\Gamma}_i(t_n)

We define the stage cost of stand $i$ as a function of all players' strategies:
\begin{equation}
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i})

C^+\Delta \hat{x}{i-1}(t{n+s};\mathbf{\Gamma})
\big|^2 \
&\qquad\quad
+
\mathbb{I}_{{i\le 4}}
\big|
C^+\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})

\paragraph{Local best-response problem (player $i$ ).}
Given neighbors' current strategies (or the communicated predicted trajectories induced by them),
stand $i$ solves the differentiable NLP:
\begin{equation}
\mathbf{\Gamma}_i^{\mathrm{BR}}

\paragraph{Convergence metric (unchanged).}
\begin{equation}
\varsigma^{(l)}

你这份代码里，`eq:rollout_mpc_game` 已经定义了

Define the multi-step reciprocal prediction error
\begin{equation}
E_i(t_n)

After training, the forward predictor is
\begin{equation}
\Delta \hat{x}i(t{n+1})

The stand-wise deviation-state evolution over $[t_n,t_{n+1}]$ can be expressed by a discrete-time mapping
\begin{equation}
\Delta x_i(t_{n+1})

On the interval $[t_n,t_{n+1}]$ , parameterize the control increment trajectory as
\begin{equation}
\Delta u_{i,n}(\tau;\Gamma_{i,n})

Define the multi-step reciprocal prediction error
\begin{equation}
E_i(t_n)

After training, the forward predictor is
\begin{equation}
\Delta \hat{x}i(t{n+1})

Although the five-stand cold rolling mill involves complex rolling deformation and tension-transport coupling, its stand-wise deviation dynamics can be abstractly described by a coupled nonlinear ODE:
\begin{equation}
\frac{d}{dt}\Delta x_i(t)

Over $[t_{n+s},t_{n+s+1}]$ with length $\delta_{n+s}$ ,
\begin{equation}
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\tau
+\Gamma_{i,n+s,2}\tau^2,\qquad \tau \in [0,\delta_{n+s}],
\end{equation}
and the interval-averaged increment is
\begin{equation}
\Delta u_i(t_{n+s})

At time $t_n$ , the local strategy of stand $i$ is
\begin{equation}
\mathbf{\Gamma}_i(t_n)

We define the stage cost of stand $i$ as a function of all players' strategies:
\begin{equation}
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i})

C^+\Delta \hat{x}{i-1}(t{n+s};\mathbf{\Gamma})
\big|^2 \
&\qquad\quad
+
\mathbb{I}_{{i\le 4}}
\big|
C^+\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})