%=================================================...

创建于:2026年2月23日

使用 GPT-5.2 Thinking 回答 Chat01

问题

%======================================================================
% GLOBAL CONSISTENCY CLEAN VERSION (Five-stand, thickness+tension only,
% actuators: roll gap + stand speed; Δx = deviation, Δu = increment)
%======================================================================

%========================
\section{Construction of Dataset}
%========================

\subsection{Five-stand tandem mill setting and unified notation}
\label{subsec:notation_clean}

Consider a five-stand tandem cold rolling mill indexed by i{1,2,3,4,5}i\in\{1,2,3,4,5\}.
Let tnt_n denote the nn-th sampling instant and δn=tn+1tn\delta_n=t_{n+1}-t_n the corresponding sampling interval length.
Define the local within-interval time variable τ=ttn[0,δn]\tau=t-t_n\in[0,\delta_n].

\paragraph{Thickness and tension signals (what is controlled).}
Let hi(t)h_i(t) denote the exit thickness of stand ii (i=1,,5i=1,\dots,5),
and let Ti(t)T_i(t) denote the inter-stand strip tension between stand ii and i+1i+1 (i=1,,4i=1,\dots,4).
The system is strongly coupled because the inter-stand tensions propagate along the mill line and are affected by neighboring stands' actions.

\paragraph{Reference trajectories and deviation-state definition (meaning of Δx\Delta x).}
Let hiref(t)h_i^{\mathrm{ref}}(t) and Tiref(t)T_i^{\mathrm{ref}}(t) be the desired references (setpoints) given by process requirements
(e.g., schedule-based references or constant setpoints).
We define deviation (tracking-error) variables
\begin{equation}
\Delta h_i(t)\triangleq h_i(t)-h_i^{\mathrm{ref}}(t),\qquad
\Delta T_i(t)\triangleq T_i(t)-T_i^{\mathrm{ref}}(t).
\label{eq:dev_def}
\end{equation}
Throughout this paper, the symbol ``Δ\Delta'' attached to \emph{states} always means \emph{deviation from reference}.

\paragraph{Local state vector (where thickness and tension appear).}
For each stand ii, we choose the local deviation state as
\begin{equation}
\Delta x_i(t)\triangleq
\begin{bmatrix}
\Delta h_i(t)\
\Delta T_{i-1}(t)\
\Delta T_i(t)
\end{bmatrix}\in\mathbb{R}^{d},\qquad d=3,
\label{eq:xi_def_clean}
\end{equation}
with the boundary convention ΔT0(t)0\Delta T_0(t)\equiv 0 and ΔT5(t)0\Delta T_5(t)\equiv 0 to keep a unified dimension d=3d=3 for all stands.
(Equivalently, one may remove nonexistent boundary tensions from the state and use varying dimensions; here we keep a unified form.)

\paragraph{Neighbor sets and coupling representation (five-stand chain).}
For a five-stand tandem mill, the dominant coupling is between adjacent stands, hence we define
\begin{equation}
Z_1={2},\quad
Z_i={i-1,i+1}\ (i=2,3,4),\quad
Z_5={4}.
\label{eq:Zi_clean}
\end{equation}
Define the neighbor-state stack
\begin{equation}
\Delta x_{Z_i}(t_n)=\mathrm{col}{\Delta x_k(t_n),|,k\in Z_i}.
\label{eq:xZi_clean}
\end{equation}

\paragraph{Actuators and increment inputs (meaning of uu and Δu\Delta u).}
Each stand ii is manipulated by \emph{roll gap} (screw-down/hydraulic gap) si(t)s_i(t) and \emph{stand speed} vi(t)v_i(t):
\begin{equation}
u_i(t)=
\begin{bmatrix}
s_i(t)\
v_i(t)
\end{bmatrix}\in\mathbb{R}^{n_u},\qquad n_u=2.
\label{eq:ui_clean}
\end{equation}
To ensure smooth actuation and match industrial practice, we optimize \emph{discrete input increments}:
\begin{equation}
\Delta u_i(t_n)\triangleq u_i(t_n)-u_i(t_{n-1})

\begin{bmatrix}
\Delta s_i(t_n)\
\Delta v_i(t_n)
\end{bmatrix}.
\label{eq:du_discrete_clean}
\end{equation}
Throughout this paper, the symbol ``Δ\Delta'' attached to \emph{inputs} Δui(tn)\Delta u_i(t_n) means \emph{sample-to-sample increment}.
Thus, Δx\Delta x (deviation state) and Δu\Delta u (input increment) are conceptually different, and this is fixed by definition.

\paragraph{Disturbance.}
Let di(t)d_i(t) denote exogenous disturbances (e.g., entry thickness fluctuation, friction variation, material parameter drift, etc.).
We denote the interval-level equivalent disturbance by Δdi(tn)\Delta d_i(t_n) (defined via interval averaging below).

\paragraph{Basic matrix notation.}
IdI_d denotes the d×dd\times d identity matrix; 0a×b0_{a\times b} denotes the a×ba\times b zero matrix.

\subsection{Discrete interval mapping and data-driven learning objective}
\label{subsec:mapping_clean}

The stand-wise deviation-state evolution over [tn,tn+1][t_n,t_{n+1}] can be expressed by a discrete-time mapping
\begin{equation}
\Delta x_i(t_{n+1})

\Phi_i\Big(\Delta x_i(t_n),,\Delta x_{Z_i}(t_n),,\Delta u_i([t_n,t_{n+1}]),,\Delta d_i([t_n,t_{n+1}])\Big),
\label{eq:true_mapping_clean}
\end{equation}
where Φi()\Phi_i(\cdot) is generally nonlinear and coupled due to rolling deformation and tension propagation.
A commonly used \emph{conceptual} equivalent discrete linear form is
\begin{equation}
\Delta x_i(t_{n+1})

M_d,\Delta x_i(t_n)
+
N_d,\Delta u_i(t_n)
+
F_d,\Delta d_i(t_n),
\label{eq:linear_form_concept}
\end{equation}
where Md,Nd,FdM_d,N_d,F_d represent equivalent discrete-time matrices around operating conditions.
In a practical five-stand cold rolling mill, accurately deriving/identifying these matrices and disturbance models from first principles is difficult,
due to strong coupling, unmodeled nonlinearities, and time-varying operating regimes.
Therefore, this paper aims to learn a high-fidelity approximation of the interval evolution from data and then embed it into distributed MPC.

\begin{remark}
In fact, due to the existence of complex coupling relationships, it is difficult to directly and accurately establish \eqref{eq:linear_form_concept}
based on first principles. Therefore, in this paper, we learn an approximate mapping of \eqref{eq:true_mapping_clean} from data.
\end{remark}

%========================
\subsection{Interval-level parameterization and one-step dataset}
%========================

\paragraph{Why interval-level parameterization is reasonable in the five-stand setting.}
Although decisions are updated at discrete instants tnt_n, the hydraulic gap and drive systems evolve continuously inside each interval,
and abrupt within-interval changes may excite tension oscillations and deteriorate thickness stability.
Thus, parameterizing the within-interval increment trajectory by a low-order polynomial:
(i) yields a compact finite-dimensional decision representation;
(ii) enforces smooth profiles inside the interval;
(iii) enables enforcing increment constraints for all τ[0,δn]\tau\in[0,\delta_n].
This is appropriate when δn\delta_n is not excessively large relative to actuator bandwidth and the within-interval evolution is well approximated by a low-order basis.

\paragraph{Vector quadratic polynomial parameterization (two inputs).}
On the interval [tn,tn+1][t_n,t_{n+1}], parameterize the control increment trajectory as
\begin{equation}
\Delta u_{i,n}(\tau;\Gamma_{i,n})

\Gamma_{i,n0}+\Gamma_{i,n1}\tau+\Gamma_{i,n2}\tau^2,
\qquad \tau\in[0,\delta_n],
\label{eq:du_poly_vec_clean}
\end{equation}
where Γi,n0,Γi,n1,Γi,n2Rnu\Gamma_{i,n0},\Gamma_{i,n1},\Gamma_{i,n2}\in\mathbb{R}^{n_u} are coefficient vectors (nu=2n_u=2).
Component-wise, \eqref{eq:du_poly_vec_clean} corresponds to
\begin{equation}
\begin{aligned}
\Delta s_{i,n}(\tau) &= \gamma^{(s)}{i,n0}+\gamma^{(s)}{i,n1}\tau+\gamma^{(s)}{i,n2}\tau^2,\
\Delta v
{i,n}(\tau) &= \gamma^{(v)}{i,n0}+\gamma^{(v)}{i,n1}\tau+\gamma^{(v)}{i,n2}\tau^2.
\end{aligned}
\label{eq:du_components_clean}
\end{equation}
Define the stacked parameter vector
\begin{equation}
\Gamma
{i,n}\triangleq
\big[
(\Gamma_{i,n0})^\top,,
(\Gamma_{i,n1})^\top,,
(\Gamma_{i,n2})^\top
\big]^\top
\in\mathbb{R}^{p},
\qquad
p=3n_u=6.
\label{eq:Gamma_clean}
\end{equation}
Here, Γi,n0\Gamma_{i,n0} is the baseline increment at τ=0\tau=0, while Γi,n1\Gamma_{i,n1} and Γi,n2\Gamma_{i,n2} describe the linear and quadratic variation rates.

\paragraph{Equivalent discrete-time (interval-averaged) increments.}
Define the interval-averaged equivalent increments as
\begin{equation}
\begin{aligned}
\Delta u_i(t_n) &\triangleq \frac{1}{\delta_n}\int_0^{\delta_n}\Delta u_{i,n}(\tau),d\tau,\
\Delta d_i(t_n) &\triangleq \frac{1}{\delta_n}\int_0^{\delta_n}\Delta d_i(\tau),d\tau.
\end{aligned}
\label{eq:avg_def_clean}
\end{equation}
With \eqref{eq:du_poly_vec_clean}, the input average has a closed form:
\begin{equation}
\Delta u_i(t_n)=
\Gamma_{i,n0}
+\Gamma_{i,n1}\frac{\delta_n}{2}
+\Gamma_{i,n2}\frac{\delta_n^2}{3}.
\label{eq:avg_closed_clean}
\end{equation}

\paragraph{Sampling domains for offline data generation.}
Let Ix\mathcal{I}_x denote the sampling domain (ranges) of deviation states Δxi(tn)\Delta x_i(t_n) and neighbor stacks ΔxZi(tn)\Delta x_{Z_i}(t_n),
and let IΓ\mathcal{I}_\Gamma denote the sampling domain of polynomial parameters Γi,n\Gamma_{i,n} (covering both gap and speed channels).
These domains specify the operating envelope used to generate supervised training data.

\paragraph{One-step sample generation (five-stand coupled simulation).}
Given the above parameterization, one training sample is generated on each interval [tn,tn+1][t_n,t_{n+1}].
In addition to the local deviation state, the neighbor deviation states are included to represent inter-stand coupling.
The process is summarized in Table~\ref{tab:interval_sample_generation_en}.

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.15}
\caption{Procedure for generating one interval-level sample on [tn,tn+1][t_n,t_{n+1}] (five-stand coupled mill).}
\label{tab:interval_sample_generation_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.09\linewidth} X}
\toprule
\textbf{Step} & \textbf{Operation} \
\midrule
1 & \textbf{State sampling:} sample Δxi(tn)\Delta x_i(t_n) and ΔxZi(tn)\Delta x_{Z_i}(t_n) from Ix\mathcal{I}_x. \
2 & \textbf{Parameter sampling:} draw Γi,nIΓ\Gamma_{i,n}\sim\mathcal{I}_\Gamma (coefficients for both Δsi,n(τ)\Delta s_{i,n}(\tau) and Δvi,n(τ)\Delta v_{i,n}(\tau)). \
3 & \textbf{Control construction:} compute Δui,n(τ)\Delta u_{i,n}(\tau) via \eqref{eq:du_poly_vec_clean}. \
4 & \textbf{State propagation:} integrate the \emph{five-stand coupled} mill model on [tn,tn+1][t_n,t_{n+1}] (e.g., RK4) using the within-interval control trajectory, and record Δxi(tn+1)\Delta x_i(t_{n+1}). \
\bottomrule
\end{tabularx}
\end{table}

Accordingly, an interval sample for subsystem ii can be represented as
\begin{equation}
\mathcal{D}{i,n}=\big{\Delta x_i(t_n),\ \Delta x{Z_i}(t_n),\ \Delta u_{i,n}(\tau),\ \Delta x_i(t_{n+1})\big}.
\label{eq:interval_sample_clean}
\end{equation}
Note that Δui,n(τ)\Delta u_{i,n}(\tau) is fully determined by (Γi,n,δn)(\Gamma_{i,n},\delta_n) via \eqref{eq:du_poly_vec_clean},
therefore it is sufficient to store (Γi,n,δn)(\Gamma_{i,n},\delta_n) as the learning input.

For each subsystem ii, by repeating the above procedure across multiple intervals and randomized draws,
the local one-step training dataset is formed as
\begin{equation}
\begin{split}
S_i=\Big{&
\big(\Delta x_i^{(j)}(t_n),,\Delta x_{Z_i}^{(j)}(t_n),,\Delta x_i^{(j)}(t_{n+1});,
\Gamma_{i,n}^{(j)},,\delta_n^{(j)}\big)
\ \Big|\ j=1,\ldots,J
\Big}.
\end{split}
\label{eq:S_i_clean}
\end{equation}
Here JJ is the number of one-step samples for subsystem ii.
The overall dataset for the five-stand mill is denoted by {Si}i=15\{S_i\}_{i=1}^{5}.
The point-cloud visualization of the training dataset is shown in Figure~\ref{2}.

\begin{figure*}[htbp]
\centering
\includegraphics[scale=0.5]{picture/Fig2.pdf}
\caption{Point cloud map of the training dataset.}\label{2}
\end{figure*}

%========================
\subsection{Multi-step rollout segment dataset}
%========================

The one-step set SiS_i is sufficient for one-step regression, but it is not sufficient for training with multi-step rollout loss
and reciprocal-consistency regularization, because these objectives require ground-truth deviation-state trajectories over a horizon of KK consecutive intervals.
Therefore, without changing the single-interval sampling mechanism above, we additionally organize the offline-simulated samples
into KK-step trajectory segments.

Specifically, during offline simulation, for each starting time tnt_n we generate a segment of length KK by consecutively sampling
{Γi,n+s,δn+s}s=0K1\{\Gamma_{i,n+s},\delta_{n+s}\}_{s=0}^{K-1} (and the corresponding inputs/disturbances),
and integrating the five-stand coupled mill model over [tn+s,tn+s+1][t_{n+s},t_{n+s+1}] for s=0,,K1s=0,\ldots,K-1.
Hence, we obtain the deviation-state sequence {Δxi(tn+s)}s=0K\{\Delta x_i(t_{n+s})\}_{s=0}^{K} as well as the neighbor stacks
{ΔxZi(tn+s)}s=0K\{\Delta x_{Z_i}(t_{n+s})\}_{s=0}^{K}.

Define a KK-step segment sample for subsystem ii as
\begin{equation}
\begin{aligned}
\mathcal{W}{i,n}=
\Big{&
\big(\Delta x_i(t
{n+s}),,\Delta x_{Z_i}(t_{n+s}),,\Gamma_{i,n+s},,\delta_{n+s}\big){s=0}^{K-1}; \
&\big(\Delta x_i(t
{n+s+1})\big){s=0}^{K-1}
\Big}.
\end{aligned}
\label{eq:segment_clean}
\end{equation}
By repeating the above segment generation, we form the multi-step training set
\begin{equation}
S_i^{(K)}=\Big{\mathcal{W}
{i,n}^{(j)}\ \Big|\ j=1,\ldots,J_K\Big},
\label{eq:S_i_K_clean}
\end{equation}
where JKJ_K is the number of KK-step segment samples.
Note that SiS_i can be viewed as the marginal one-step projection of Si(K)S_i^{(K)} (keeping only s=0s=0),
thus the original dataset design is preserved, and only an additional \emph{segment organization} is introduced for multi-step training.

%========================
\section{Construction of Residual Neural Network}
%========================

\subsection{Network architecture (what is learned, why residual, why include control)}
\label{subsec:net_clean}

\paragraph{Learning target (one-step controlled deviation-state evolution).}
Given the dataset, the neural network model is trained to learn a stand-wise, control-dependent one-step evolution law of deviation states:
\begin{equation}
\Delta x_i(t_{n+1})
\approx
\Delta x_i(t_n)+
\mathcal{N}i!\Big(\Delta x_i(t_n),,\Delta x{Z_i}(t_n),,\Gamma_{i,n},,\delta_n;,\Theta_i\Big),
\label{eq:learned_dyn_clean}
\end{equation}
where Ni()\mathcal{N}_i(\cdot) outputs the one-step deviation-state change and Θi\Theta_i are trainable parameters.

\paragraph{Why the model must include uu (difference between with $u$'' and without uu'').}
If Ni\mathcal{N}_i does not take control information as input (here Γi,n\Gamma_{i,n} and δn\delta_n),
the predictor becomes an autoregressive model that only reproduces trajectories under the training input patterns
and cannot answer the counterfactual question: ``what will happen if we choose a different roll gap/speed trajectory?''
Since MPC optimizes over candidate decisions, a control-dependent predictor \eqref{eq:learned_dyn_clean} is necessary
to evaluate the predicted thickness/tension behavior under different candidate actuator trajectories.

\paragraph{Input/output dimensions.}
Let d=3d=3 (state dimension), Zi|Z_i| be the number of neighbors of stand ii in \eqref{eq:Zi_clean}, and p=6p=6 in \eqref{eq:Gamma_clean}.
Define the input vector
\begin{equation}
X_{i,\text{in}} \triangleq
\big[
\Delta x_i(t_n)^\top,,
\Delta x_{Z_i}(t_n)^\top,,
\Gamma_{i,n}^\top,,
\delta_n
\big]^\top
\in \mathbb{R}^{d(1+|Z_i|)+p+1}.
\label{eq:X_in_clean}
\end{equation}
The network mapping is
\begin{equation}
\mathcal{N}_i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d}.
\end{equation}

\paragraph{Residual (shortcut) structure.}
To improve training stability and long-horizon rollout robustness, we use a residual form.
Let I^iRd×(d(1+Zi)+p+1)\hat{I}_i\in\mathbb{R}^{d\times(d(1+|Z_i|)+p+1)} be a selection matrix extracting the local state block:
\begin{equation}
\hat{I}i = [I_d,, 0{d\times(d|Z_i|+p+1)}].
\label{eq:Ihat_clean}
\end{equation}
Then the one-step predictor is written as
\begin{equation}
X_{i,\text{out}} = \hat{I}i X{i,\text{in}} + \mathcal{N}i(X{i,\text{in}}; \Theta_i),
\label{eq:res_predict_clean}
\end{equation}
where Xi,outX_{i,\text{out}} represents the predicted Δxi(tn+1)\Delta x_i(t_{n+1}).
This structure implements a baseline-plus-correction interpretation:
the shortcut propagates the current deviation state Δxi(tn)\Delta x_i(t_n), while the network learns the correction capturing
unmodeled nonlinearities and inter-stand coupling (via ΔxZi\Delta x_{Z_i}) under varying operating conditions.

\paragraph{Auxiliary branch for variable interval length (avoid symbol conflict).}
To improve robustness when δn\delta_n varies, we introduce an auxiliary branch inside Ni\mathcal{N}_i:
\begin{equation}
\mathcal{N}i(X{i,\text{in}};\Theta_i)\triangleq
\psi_i(X_{i,\text{in}};\Theta_{\psi_i}) + \rho_i(X_{i,\text{in}};\theta_i),
\label{eq:aux_clean}
\end{equation}
where ψi()\psi_i(\cdot) is a lightweight feedforward branch that captures low-frequency/scale effects strongly related to δn\delta_n,
and ρi()\rho_i(\cdot) captures the remaining nonlinear coupling corrections.
When ψi()0\psi_i(\cdot)\equiv 0, the model reduces to a standard residual network.
(We use ψi\psi_i and ρi\rho_i to avoid notation conflicts with the sampling sets Ix,IΓ\mathcal{I}_x,\mathcal{I}_\Gamma and the optimizer learning rate.)

\paragraph{One-step supervised target.}
For the jj-th sample in \eqref{eq:S_i_clean}, define
\begin{equation}
X_{i,\text{in}}^{(j)} =
\big[
\Delta x_i^{(j)}(t_n),\ \Delta x_{Z_i}^{(j)}(t_n),\
\Gamma_{i,n}^{(j)},\ \delta_n^{(j)}
\big]^{\top},
\end{equation}
and the supervised residual target
\begin{equation}
\Delta r_i^{(j)}=\Delta x_i^{(j)}(t_{n+1})-\Delta x_i^{(j)}(t_n).
\label{eq:target_clean}
\end{equation}

\subsection{Training, learned model, and system prediction (multi-step stability and control usage)}
\label{subsec:train_clean}

To suppress accumulation drift induced by long-horizon recursion and to improve long-term predictive stability,
we train the forward predictor jointly with an auxiliary backward residual model
and impose a multi-step reciprocal-consistency regularization over a KK-step segment from Si(K)S_i^{(K)}.

\paragraph{Backward residual model.}
Construct a backward residual network
\begin{equation}
\mathcal{B}i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d},
\end{equation}
parameterized by Θˉi\bar{\Theta}_i. For the backward step associated with interval [tn,tn+1][t_n,t_{n+1}], define
\begin{equation}
\begin{aligned}
X
{i,\mathrm{in}}^{b}
&=
\big[
\Delta x_i(t_{n+1}),\ \Delta x_{Z_i}(t_{n+1}),
\Gamma_{i,n},\ \delta_n
\big]^{\top},\
X_{i,\mathrm{out}}^{b}
&=
\hat{I}i X{i,\mathrm{in}}^{b} + \mathcal{B}i(X{i,\mathrm{in}}^{b};\bar{\Theta}i),
\end{aligned}
\label{eq:back_clean}
\end{equation}
where Xi,outbX_{i,\mathrm{out}}^{b} represents the backward estimate of Δxi(tn)\Delta x_i(t_n).
The supervised backward residual target is
\begin{equation}
\Delta r_i^{b}=\Delta x_i(t_n)-\Delta x_i(t
{n+1}).
\end{equation}

\paragraph{Forward rollout on a KK-step segment.}
Given a segment sample Wi,nSi(K)\mathcal{W}_{i,n}\in S_i^{(K)}, initialize
\begin{equation}
\Delta \hat{x}i(t_n)=\Delta x_i(t_n),
\end{equation}
and recursively apply the forward predictor for KK steps:
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1})
&=
\Delta \hat{x}i(t{n+s})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s}),,\Delta \hat{x}
{Z_i}(t
{n+s}),,
\Gamma_{i,n+s},,\delta_{n+s};,\Theta_i
\Big),\
&\qquad s=0,\ldots,K-1.
\end{aligned}
\label{eq:fwd_roll_clean}
\end{equation}

\paragraph{Backward rollout and reciprocal consistency.}
Set the terminal condition
\begin{equation}
\Delta \bar{x}i(t{n+K})=\Delta \hat{x}i(t{n+K}),
\end{equation}
and roll back using Bi\mathcal{B}_i:
\begin{equation}
\begin{aligned}
\Delta \bar{x}i(t{n+s})
&=
\hat{I}i X{i,\mathrm{in}}^{b}(t_{n+s})
+
\mathcal{B}i!\Big(X{i,\mathrm{in}}^{b}(t_{n+s});,\bar{\Theta}i\Big),
\quad s=K-1,\ldots,0,
\end{aligned}
\label{eq:bwd_roll_clean}
\end{equation}
where
\begin{equation}
X
{i,\mathrm{in}}^{b}(t_{n+s})=
\big[
\Delta \bar{x}i(t{n+s+1}),\ \Delta \hat{x}{Z_i}(t{n+s+1}),
\Gamma_{i,n+s},\ \delta_{n+s}
\big]^{\top}.
\end{equation}

Define the multi-step reciprocal prediction error
\begin{equation}
E_i(t_n)

\sum_{s=0}^{K}
\left|
\Delta \hat{x}i(t{n+s})-\Delta \bar{x}i(t{n+s})
\right|^2.
\end{equation}

\paragraph{Training objectives (meaning of each loss).}
We jointly minimize:
\begin{equation}
\begin{aligned}
L_{\mathrm{1step}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s+1})-\Delta x_i^{(j)}(t_{n+s})\big)
-\mathcal{N}i!\left(
X
{i,\mathrm{in}}^{(j)}(t_{n+s});\Theta_i
\right)
\Big|^2,\[2mm]
L_{\mathrm{bwd}}(\bar{\Theta}i)
&= \frac{1}{J_K}\sum
{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s})-\Delta x_i^{(j)}(t_{n+s+1})\big)
-\mathcal{B}i!\left(
X
{i,\mathrm{in}}^{b,(j)}(t_{n+s});\bar{\Theta}i
\right)
\Big|^2,\[2mm]
L
{\mathrm{msrp}}(\Theta_i,\bar{\Theta}i)
&= \frac{1}{J_K}\sum
{j=1}^{J_K} E_i^{(j)}(t_n),\[2mm]
L_{\mathrm{roll}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\sum_{s=1}^{K}
\Big|
\Delta x_i^{(j)}(t_{n+s})-\Delta \hat{x}i^{(j)}(t{n+s})
\Big|^2.
\end{aligned}
\label{eq:loss_clean}
\end{equation}
Here, L1stepL_{\mathrm{1step}} enforces one-step accuracy; LrollL_{\mathrm{roll}} explicitly suppresses long-horizon drift under recursion;
LmsrpL_{\mathrm{msrp}} regularizes the learned dynamics by enforcing reciprocal consistency between forward and backward rollouts;
and LbwdL_{\mathrm{bwd}} trains the backward model for the consistency regularization.
In implementation, these terms are combined as
\begin{equation}
L_{\mathrm{total}}=\lambda_1 L_{\mathrm{1step}}+\lambda_2 L_{\mathrm{roll}}+\lambda_3 L_{\mathrm{msrp}}+\lambda_4 L_{\mathrm{bwd}},
\end{equation}
where λ1,λ2,λ3,λ4>0\lambda_1,\lambda_2,\lambda_3,\lambda_4>0 are tuned on a validation set.

\paragraph{Learned forward predictor used in control.}
After training, the forward predictor is
\begin{equation}
\Delta \hat{x}i(t{n+1})

\Delta x_i(t_n)
+
\mathcal{N}i!\Big(
\Delta x_i(t_n),,\Delta x
{Z_i}(t_n),,
\Gamma_{i,n},,\delta_n;,\Theta_i^*
\Big),
\label{eq:pred_clean}
\end{equation}
and multi-step prediction is obtained by recursive rollout of \eqref{eq:pred_clean}.
This learned predictor is the internal model used by the MPC optimizer in the next section.

Finally, network parameters are optimized using Adam:
\begin{equation}
\Theta_{i,t+1} = \Theta_{i,t} - \alpha \frac{\hat{m}{i,t}}{\sqrt{\hat{v}{i,t}} + \varepsilon},
\end{equation}
where α\alpha is the learning rate (we use α\alpha to avoid conflict with other symbols),
m^i,t\hat{m}_{i,t} and v^i,t\hat{v}_{i,t} are bias-corrected moment estimates, and ε>0\varepsilon>0 is a small constant for numerical stability.
Figure~\ref{fig:rnn_logic} illustrates the overall structure.

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.85]{picture/x6.pdf}
\caption{Logic diagram of the residual neural network.}
\label{fig:rnn_logic}
\end{figure}

%========================
\section{Nash Equilibrium-Based RNE-DMPC}
%========================

The five-stand tandem cold rolling system is strongly coupled through inter-stand tension propagation.
As a result, changes in control actions (roll gap and stand speed) at one stand can affect both upstream and downstream stands,
making centralized online optimization over all stands' decision variables computationally demanding.

To mitigate this issue, we decompose the global predictive-control problem into N=5N=5 local subproblems associated with individual stands.
Each local controller optimizes its own decision variables while accounting for coupling via limited information exchange with neighboring controllers.
Motivated by game-theoretic coordination \citep{rawlings2008coordinating}, we formulate distributed coordination as a Nash-equilibrium-seeking iteration.
Based on the trained residual neural network surrogate model, we construct a Nash-equilibrium-based distributed MPC method (RNE-DMPC)
for coordinated thickness--tension regulation/tracking. The overall control structure is shown in Figure~\ref{4}.

\begin{figure*}[htbp]
\centering
\includegraphics[width=\linewidth]{picture/x2.pdf}
\caption{Schematic diagram of the control architecture for a tandem cold rolling mill.}\label{4}
\end{figure*}

\subsection{Prediction model used in MPC and the prediction--control interface}
\label{subsec:interface_clean}

\paragraph{Key idea: prediction serves control through model constraints.}
At each sampling time tnt_n, MPC evaluates candidate actuator trajectories (encoded by Γi,n+s\Gamma_{i,n+s}) by rolling out predictions of
thickness/tension deviations using the learned surrogate \eqref{eq:pred_clean}.
Therefore, the learned predictor directly provides the multi-step forecasts required to compute the MPC objective and enforce constraints.

\paragraph{Local polynomial parameterization over the horizon.}
Over each interval [tn+s,tn+s+1][t_{n+s},t_{n+s+1}] with length δn+s\delta_{n+s}, the control increment trajectory of stand ii is
\begin{equation}
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\tau
+\Gamma_{i,n+s,2}\tau^2,\qquad \tau \in [0,\delta_{n+s}],
\label{eq:du_poly_mpc_clean}
\end{equation}
where Γi,n+sRp\Gamma_{i,n+s}\in\mathbb{R}^{p} with p=6p=6.

\paragraph{Neural-network-based multi-step prediction inside MPC.}
Define the prediction horizon NpN_p (number of future intervals predicted) and the control horizon NcN_c (number of future intervals optimized),
with NcNpN_c\le N_p.
Given the current measured/estimated deviation states Δxi(tn)\Delta x_i(t_n) and a candidate decision sequence
Γi(tn)=[Γi,n,,Γi,n+Nc1]\mathbf{\Gamma}_i(t_n)=\big[\Gamma_{i,n}^\top,\ldots,\Gamma_{i,n+N_c-1}^\top\big]^\top,
stand ii predicts its deviation-state evolution by
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1})
&=
\Delta \hat{x}i(t{n+s})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s}),,
\Delta \hat{x}
{Z_i}(t_{n+s}),,
\Gamma_{i,n+s},,
\delta_{n+s};,
\Theta_i^*
\Big), \
&\qquad s=0,\ldots,N_p-1,
\end{aligned}
\label{eq:rollout_mpc_clean}
\end{equation}
with initialization Δx^i(tn)=Δxi(tn)\Delta \hat{x}_i(t_n)=\Delta x_i(t_n).
Here Δx^Zi(tn+s)\Delta \hat{x}_{Z_i}(t_{n+s}) is obtained from the latest communication with neighbors during the Nash iteration.
Equation \eqref{eq:rollout_mpc_clean} is the explicit mathematical interface: \textbf{control decisions} (Γi,n+s)(\Gamma_{i,n+s})
\rightarrow \textbf{predicted thickness/tension deviations} (Δx^i)(\Delta \hat{x}_i) \rightarrow \textbf{objective/constraints evaluation}.

\subsection{Local optimization problem (objective, reference meaning, constraints, and numerical solution)}
\label{subsec:local_opt_clean}

\paragraph{Decision variables.}
At time tnt_n, the local decision vector for stand ii is
\begin{equation}
\mathbf{\Gamma}_i(t_n)

\big[
\Gamma_{i,n}^\top,,
\Gamma_{i,n+1}^\top,,
\ldots,,
\Gamma_{i,n+N_c-1}^\top
\big]^\top
\in \mathbb{R}^{pN_c}.
\label{eq:Gamma_seq_clean}
\end{equation}

\paragraph{Reference meaning (remove ambiguity of Δxref\Delta x_{\mathrm{ref}}).}
Because the deviation state is defined as Δxi(t)=xi(t)xiref(t)\Delta x_i(t)=x_i(t)-x_i^{\mathrm{ref}}(t) in \eqref{eq:dev_def} and \eqref{eq:xi_def_clean},
the desired regulation/tracking objective in deviation coordinates is always
\begin{equation}
\Delta x_i(t)\rightarrow 0.
\end{equation}
Therefore, the reference in deviation form is simply the zero vector, i.e.,
\begin{equation}
\Delta x_{i,\mathrm{ref}}(t_{n+s})\equiv 0\in\mathbb{R}^{d}.
\label{eq:dxref_zero_clean}
\end{equation}
Equivalently, one can view the cost as penalizing (x^ixiref)(\hat x_i-x_i^{\mathrm{ref}}) in absolute coordinates; in this paper we keep the deviation formulation.

\paragraph{Local objective (explicitly thickness+tension).}
Let Δx^i(tn+s)=[Δh^i(tn+s),ΔT^i1(tn+s),ΔT^i(tn+s)]\Delta \hat{x}_i(t_{n+s})=[\Delta \hat h_i(t_{n+s}),\,\Delta \widehat T_{i-1}(t_{n+s}),\,\Delta \widehat T_i(t_{n+s})]^\top.
The local cost is
\begin{equation}
\begin{aligned}
J_i
&=
\sum_{s=1}^{N_p}
\big|
\Delta \hat{x}i(t{n+s}) - \Delta x_{i,\mathrm{ref}}(t_{n+s})
\big|{Q_i}^2
+
\sum
{s=0}^{N_c-1}
\big|
\Gamma_{i,n+s}
\big|_{R_i}^2,
\end{aligned}
\label{eq:Ji_clean}
\end{equation}
where QiRd×dQ_i\in\mathbb{R}^{d\times d} weights thickness and tension deviations,
and RiRp×pR_i\in\mathbb{R}^{p\times p} penalizes the polynomial-parameter magnitude to encourage smooth increments.
Using \eqref{eq:dxref_zero_clean}, the tracking term reduces to penalizing predicted deviation states directly.

\paragraph{Constraints.}
We enforce both absolute-input bounds and increment-trajectory bounds.

\emph{Absolute input bounds:}
\begin{equation}
u_{i,\min} \le u_i(t_{n+s}) \le u_{i,\max},
\qquad s=0,\ldots,N_p-1,
\label{eq:u_abs_clean}
\end{equation}
where ui,min,ui,maxR2u_{i,\min},u_{i,\max}\in\mathbb{R}^{2} provide component-wise bounds for (si,vi)(s_i,v_i).

\emph{Increment trajectory bounds for all τ\tau:}
\begin{equation}
\Delta u_{i,\min}
\le
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})
\le
\Delta u_{i,\max},
\qquad \forall\tau\in[0,\delta_{n+s}],
\label{eq:du_traj_clean}
\end{equation}
where Δui,min,Δui,maxR2\Delta u_{i,\min},\Delta u_{i,\max}\in\mathbb{R}^{2} are component-wise bounds for (Δs,Δv)(\Delta s,\Delta v).

\paragraph{Practical enforcement of \eqref{eq:du_traj_clean} (no nonstandard symbols).}
For a scalar quadratic q(τ)=a+bτ+cτ2q(\tau)=a+b\tau+c\tau^2 on τ[0,δ]\tau\in[0,\delta], its extrema over the interval occur at
τ=0\tau=0, τ=δ\tau=\delta, and possibly at the stationary point τ=b/(2c)\tau^\star=-b/(2c) if c0c\neq 0 and τ[0,δ]\tau^\star\in[0,\delta].
Therefore, to enforce \eqref{eq:du_traj_clean} for the two-channel vector Δui,n+s(τ)=[Δsi,n+s(τ),Δvi,n+s(τ)]\Delta u_{i,n+s}(\tau)=[\Delta s_{i,n+s}(\tau),\,\Delta v_{i,n+s}(\tau)]^\top,
we check the above candidate points \emph{separately for each channel} using the corresponding coefficients in \eqref{eq:du_components_clean}.

\paragraph{Consistency between within-interval trajectory and discrete execution (interval average).}
To update the discrete absolute input and enforce \eqref{eq:u_abs_clean} consistently, we use the interval-averaged increment:
\begin{equation}
\Delta u_i(t_{n+s})

\frac{1}{\delta_{n+s}}\int_0^{\delta_{n+s}}\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s}),d\tau

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\frac{\delta_{n+s}}{2}
+\Gamma_{i,n+s,2}\frac{\delta_{n+s}^2}{3}.
\label{eq:du_avg_clean}
\end{equation}
Then propagate the absolute input sequence:
\begin{equation}
\begin{aligned}
u_i(t_n) &= u_i(t_{n-1}) + \Delta u_i(t_n), \
u_i(t_{n+s}) &= u_i(t_{n+s-1}) + \Delta u_i(t_{n+s}), \qquad s=1,\ldots,N_p-1.
\end{aligned}
\label{eq:u_prop_clean}
\end{equation}

\paragraph{Local optimization problem (solved at each Nash iteration).}
At Nash-iteration index ll, subsystem ii solves
\begin{equation}
\mathbf{\Gamma}i^{(l)}=\arg\min{\mathbf{\Gamma}_i}; J_i
\quad \text{s.t.}\quad
\eqref{eq:rollout_mpc_clean},\ \eqref{eq:u_abs_clean},\ \eqref{eq:du_traj_clean},\ \eqref{eq:u_prop_clean}.
\label{eq:local_prob_clean}
\end{equation}
Because the learned surrogate Ni()\mathcal{N}_i(\cdot) is differentiable, \eqref{eq:local_prob_clean} is a differentiable nonlinear program (NLP),
which can be solved by standard gradient-based NLP solvers (e.g., SQP or interior-point methods) using automatic differentiation to compute gradients.

\subsection{Nash equilibrium coordination (where conflict comes from, how it is resolved, and how the solution is obtained)}
\label{subsec:nash_clean}

\paragraph{Why a Nash iteration is needed (explicit conflict explanation).}
Inter-stand tensions are shared coupling variables: the tension TiT_i is influenced by both stand ii and stand i+1i+1 (notably through their speed actions),
and changes in roll gap can also indirectly affect tensions via strip deformation and transport.
Therefore, purely independent local optimization can lead to conflicting actions: improving local thickness may worsen neighbor tensions, and vice versa.
To resolve this coupling conflict with limited communication, we adopt a Nash-equilibrium-seeking distributed best-response iteration.

\paragraph{Distributed best-response iteration.}
At iteration ll, each stand computes its best response to the latest neighbor strategies and predictions.
The procedure is summarized in Table~\ref{tab:nash_iter_en}.

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.12}
\setlength{\tabcolsep}{3.5pt}
\caption{Distributed Nash best-response iteration for RNE-DMPC (five-stand).}
\label{tab:nash_iter_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.11\linewidth} X}
\toprule
\textbf{Step} & \textbf{Description} \
\midrule
A &
Initialize l=1l=1 and initialize Γi(0)\mathbf{\Gamma}_i^{(0)} for all subsystems (e.g., warm-start from previous time step). \

B &
Using \eqref{eq:rollout_mpc_clean}, compute Δx^i(l)(tn+s)\Delta \hat{x}_i^{(l)}(t_{n+s}) for s=1,,Nps=1,\ldots,N_p \
&
given Γi(l1)\mathbf{\Gamma}_i^{(l-1)} and the latest neighbor predictions
Δx^Zi(l1)(tn+s)\Delta \hat{x}_{Z_i}^{(l-1)}(t_{n+s}). \

C & Solve the local NLP \eqref{eq:local_prob_clean} to update Γi(l)\mathbf{\Gamma}_i^{(l)} (best response). \

D &
Broadcast Γi(l)\mathbf{\Gamma}_i^{(l)} and predicted trajectories
Δx^i(l)(tn+s)\Delta \hat{x}_i^{(l)}(t_{n+s}) to the communication system. \

E &
Update neighbor predictions Δx^Zi(l)(tn+s)\Delta \hat{x}_{Z_i}^{(l)}(t_{n+s}) using received information; re-generate predictions if needed. \

F & Compute the maximum relative change ς(l)\varsigma^{(l)} as the convergence metric. \

G &
If ς(l)ςtol\varsigma^{(l)}\le \varsigma_{\mathrm{tol}}, stop and set Γi=Γi(l)\mathbf{\Gamma}_i^*=\mathbf{\Gamma}_i^{(l)}; \
& otherwise set ll+1l\leftarrow l+1 and repeat Steps B--F. \
\bottomrule
\end{tabularx}
\end{table}

\paragraph{Convergence metric.}
Define
\begin{equation}
\varsigma^{(l)}

\max_i
\frac{\left|
\mathbf{\Gamma}_i^{(l)}-\mathbf{\Gamma}_i^{(l-1)}
\right|_2}{
\left|
\mathbf{\Gamma}_i^{(l-1)}
\right|_2+\epsilon},
\label{eq:nash_metric_clean}
\end{equation}
where ϵ>0\epsilon>0 is a small constant to avoid division by zero.

\paragraph{Receding-horizon implementation (how the final control is applied).}
Only the first-interval parameters Γi,n\Gamma_{i,n}^* are applied.
The increment trajectory over [tn,tn+1][t_n,t_{n+1}] is
\begin{equation}
\Delta u_{i,n}(\tau)=\Delta u_{i,n}(\tau;\Gamma_{i,n}^*),
\quad \tau\in[0,\delta_n].
\end{equation}
The discrete input increment applied for updating the absolute input is the interval average:
\begin{equation}
\Delta u_i(t_n)

\frac{1}{\delta_n}\int_0^{\delta_n}\Delta u_{i,n}(\tau),d\tau

\Gamma_{i,n,0}^*
+
\Gamma_{i,n,1}^\frac{\delta_n}{2}
+
\Gamma_{i,n,2}^
\frac{\delta_n^2}{3}.
\label{eq:apply_avg_clean}
\end{equation}
Then the absolute input is updated by
\begin{equation}
u_i(t_n)=u_i(t_{n-1})+\Delta u_i(t_n),
\label{eq:apply_u_clean}
\end{equation}
which ensures smooth evolution of both roll gap and stand speed and avoids abrupt actuator changes.

\paragraph{Closed-loop prediction--control connection (complete loop).}
At each sampling instant tnt_n:
(i) measure/estimate the current deviation states Δxi(tn)\Delta x_i(t_n) (thickness and tensions) and initialize neighbor information;
(ii) perform Nash best-response iterations; in each iteration solve \eqref{eq:local_prob_clean} using the learned predictor \eqref{eq:rollout_mpc_clean};
(iii) after convergence, apply Γi,n\Gamma_{i,n}^* by generating the within-interval increment trajectory and updating ui(tn)u_i(t_n) via \eqref{eq:apply_avg_clean}--\eqref{eq:apply_u_clean};
(iv) move to tn+1t_{n+1} and repeat.
In this way, the learned neural predictor supplies the multi-step forecasts needed by MPC, and the distributed optimization computes coordinated
gap/speed increment trajectories that regulate thickness and tensions while resolving coupling conflicts via Nash equilibrium.

The overall control flow chart is shown in Fig.~\ref{liu}.

\begin{figure}[htbp]
\centering
\includegraphics[width=\linewidth]{picture/x5.pdf}
\caption{The overall system control flow chart}\label{liu}
\end{figure}

%========================
% Optional: a compact symbol paragraph you can keep or move to Appendix
%========================
\paragraph{Summary of key symbols (for reviewer clarity).}
i{1,,5}i\in\{1,\dots,5\}: stand index; tnt_n: sampling instant; δn\delta_n: sampling interval; τ\tau: within-interval time.
hi,Tih_i,T_i: thickness and inter-stand tension; hiref,Tirefh_i^{\mathrm{ref}},T_i^{\mathrm{ref}}: references.
Δhi,ΔTi\Delta h_i,\Delta T_i: deviations; Δxi\Delta x_i: local deviation state in \eqref{eq:xi_def_clean}.
ui=[si,vi]u_i=[s_i,v_i]^\top: actuators (gap and speed); Δui(tn)=ui(tn)ui(tn1)\Delta u_i(t_n)=u_i(t_n)-u_i(t_{n-1}): discrete input increment.
Δui,n(τ;Γi,n)\Delta u_{i,n}(\tau;\Gamma_{i,n}): within-interval increment trajectory; Γi,nRp\Gamma_{i,n}\in\mathbb{R}^{p}: polynomial parameters (p=6p=6).
ZiZ_i: neighbor set; ΔxZi\Delta x_{Z_i}: neighbor stack.
Ni()\mathcal{N}_i(\cdot): learned forward residual model; Bi()\mathcal{B}_i(\cdot): backward model.
KK: segment length; J,JKJ,J_K: numbers of one-step and segment samples.
Np,NcN_p,N_c: prediction and control horizons; Qi,RiQ_i,R_i: weights; ϵ,ε\epsilon,\varepsilon: small constants.
这是我的论文第二到第四部分。老师说优化目标函数看不出来多个机架博弈;建立了优化目标求解方案是不是得改变。

思考

Offering revisions for multi-stand game clarity

The user is working on a technical manuscript and seeking advice. To clarify the game theory and Nash equilibrium aspects: I could suggest showing each stand as a player with strategies and costs depending on their own and neighboring states. Including coupling terms, consensus, or Nash-seeking methods might help refine the objective function.

Clarifying the game-theoretic nature of the model

The reviewer might not find the game aspect clear because the cost function, J_i, currently only includes local deviations and each stand's own strategy. To fix this, I could redefine J_i to include neighbor strategies, clearly showing the influence of neighbor states. I’d also make the shared inter-stand tension ΔT_i explicit.

Enhancing clarity on game structure and solution approach

The teacher might not find the game structure clear yet. To clarify, I could define global coupling weights or include cross-terms in the cost functions, like the differences between predicted tensions. I could also add coordination constraints or implement a Nash best response using methods like Gauss-Seidel.

Refining the optimization objective and game formulation

To highlight the multi-stand game, rather than change the solution itself, I could refine the formulation. I’d keep the objective but show dependencies between stands and include shared variable consistency. For clarity, I might add coupling terms, like penalties on tension mismatches between adjacent stands, using LaTeX for these modifications.

Clarifying cross-stand tensions and coordination

To handle the tension between stand i and i+1, I’d add a consensus penalty to ensure consistency, like including a term for the mismatch of predicted tensions. I could also consider treating this as a potential game or generalized Nash equilibrium. Two main options: A) local noncooperative NE or B) adding explicit coupling for a cooperative game element.

Clarifying NE and game formulation

To emphasize the game aspect, I should rewrite the optimization as a parametric game where each player's payoff depends on others through predictions and shared variables. Defining NE and showing the functional dependence of J_i on neighbors will highlight this. The algorithm will include best-response iterations. Also, add terms to penalize shared tensions or treat as a generalized Nash.

Thought for 56s

回答

你老师这句话的意思通常是:你现在写出来的 JiJ_i 看起来像“各机架各自做局部 MPC”,但“博弈”这一层(每个玩家的收益依赖别人的策略)在公式里没有被显式写出来
这不一定要求你推翻求解框架(Nash best-response 仍然可以用),但需要把“目标函数对他人决策的依赖”写得更明确,必要时再加一两个“共享变量一致性/耦合惩罚”项,让博弈冲突一眼可见。

下面给你两种改法(从“只改写法”到“改目标更明显”),按你论文工作量选。


1)为什么现在“看不出博弈”

你目前的目标

Ji=s=1NpΔx^i(tn+s)Qi2+s=0Nc1Γi,n+sRi2J_i=\sum_{s=1}^{N_p}\|\Delta \hat x_i(t_{n+s})\|_{Q_i}^2+\sum_{s=0}^{N_c-1}\|\Gamma_{i,n+s}\|_{R_i}^2

从表面看只含“自己的预测状态、自己的控制参数”。虽然你在预测模型里用了邻居预测 Δx^Zi\Delta \hat x_{Z_i},但你没有把它写成“由邻居的决策 ΓZi\mathbf\Gamma_{Z_i} 产生”,于是读者容易理解成:邻居轨迹是外部已知信号,而不是“其他玩家的策略”。

关键修正点:把 Δx^i()\Delta \hat x_i(\cdot) 明确写成 Δx^i(;Γi,Γi)\Delta \hat x_i(\cdot;\mathbf\Gamma_i,\mathbf\Gamma_{-i}) 或至少 Δx^i(;Γi,ΓZi)\Delta \hat x_i(\cdot;\mathbf\Gamma_i,\mathbf\Gamma_{Z_i})


2)最省事的改法:不改求解流程,只把“博弈依赖”写进定义(强烈建议先做这个)

(a)把预测显式写成“策略的函数”

把你 4.1 节的滚动预测式改成这种写法(含义不变,但读者立刻懂“互相影响”):

Δx^i(tn+s+1;Γi,ΓZi)=Δx^i(tn+s;Γi,ΓZi)+Ni ⁣(Δx^i(tn+s;),Δx^Zi(tn+s;ΓZi),Γi,n+s,δn+s).\Delta \hat x_i(t_{n+s+1};\mathbf\Gamma_i,\mathbf\Gamma_{Z_i}) = \Delta \hat x_i(t_{n+s};\mathbf\Gamma_i,\mathbf\Gamma_{Z_i}) + \mathcal N_i\!\Big( \Delta \hat x_i(t_{n+s};\cdot), \Delta \hat x_{Z_i}(t_{n+s};\mathbf\Gamma_{Z_i}), \Gamma_{i,n+s},\delta_{n+s} \Big).

并补一句定义邻居轨迹来自邻居策略:

Δx^Zi(tn+s;ΓZi)=col{Δx^k(tn+s;Γk)kZi}.\Delta \hat x_{Z_i}(t_{n+s};\mathbf\Gamma_{Z_i}) =\mathrm{col}\big\{\Delta \hat x_k(t_{n+s};\mathbf\Gamma_k)\mid k\in Z_i\big\}.

(b)把目标函数写成 payoff(收益)形式:Ji(Γi;Γi)J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i})

Ji(Γi;Γi)=s=1NpΔx^i(tn+s;Γi,Γi)Qi2+s=0Nc1Γi,n+sRi2.J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i}) = \sum_{s=1}^{N_p} \big\|\Delta \hat x_i(t_{n+s};\mathbf\Gamma_i,\mathbf\Gamma_{-i})\big\|_{Q_i}^2 + \sum_{s=0}^{N_c-1}\|\Gamma_{i,n+s}\|_{R_i}^2.

(c)给出 Nash 均衡的标准定义(这是“博弈”最关键的一行)

在 4.3 节加:

Γ={Γ1,,Γ5} 是 Nash 均衡当且仅当 i, ΓiargminΓiΩiJi(Γi;Γi).\mathbf\Gamma^*=\{\mathbf\Gamma_1^*,\ldots,\mathbf\Gamma_5^*\}\ \text{是 Nash 均衡当且仅当}\ \forall i,\ \mathbf\Gamma_i^*\in \arg\min_{\mathbf\Gamma_i\in\Omega_i} J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i}^*).

其中 Ωi\Omega_i 是你那些约束(输入上下界、增量轨迹约束、传播约束等)定义出来的可行域。

做完这三步,很多老师/审稿人就不会再说“看不出博弈”了,因为你把“他人的策略进入我的代价”写成了数学对象。


3)让“冲突/博弈”更一眼可见:加一个“共享张力一致性/耦合惩罚”项(推荐)

你系统里最典型的共享耦合量就是机架间张力 TiT_i
它同时出现在 stand ii 的状态里(ΔTi\Delta T_i)也出现在 stand i+1i+1 的状态里(ΔT(i+1)1=ΔTi\Delta T_{(i+1)-1}=\Delta T_i)。
这天然就是“两个玩家共同影响同一个量”,但你现在的写法没有把“共享”强调出来。

做法 A:一致性惩罚(consensus-like)

让每个机架在成本里额外惩罚“我预测的共享张力”和“邻居预测的共享张力”的差:

Jinew=Ji+μis=1NpΔT^i(i)(tn+s;Γi,Γi)ΔT^i(i+1)(tn+s;Γi+1,Γ(i+1))2.J_i^{\text{new}} = J_i + \mu_i \sum_{s=1}^{N_p} \big\| \Delta \widehat T_i^{(i)}(t_{n+s};\mathbf\Gamma_i,\mathbf\Gamma_{-i}) - \Delta \widehat T_i^{(i+1)}(t_{n+s};\mathbf\Gamma_{i+1},\mathbf\Gamma_{-(i+1)}) \big\|^2.
  • ΔT^i(i)\Delta \widehat T_i^{(i)}:由 stand ii 的预测轨迹读出来的 TiT_i
  • ΔT^i(i+1)\Delta \widehat T_i^{(i+1)}:由 stand i+1i+1 的预测轨迹读出来的同一个 TiT_i

这项一加,读者会立刻明白:同一根张力在两个玩家之间必须协调,否则成本上升 —— 博弈/协调味道非常强。

做法 B:把博弈升级成“广义 Nash”(GNE)也很加分

如果你把“共享张力一致性”不写成惩罚而写成约束(例如两边对 TiT_i 的预测必须一致/在通信一致变量上满足等式),那就是典型的 generalized Nash equilibrium(可行域与他人策略耦合)。这会更“理论味”,但写起来稍重。


4)“是不是得改变求解方案?”

多数情况下:不用推倒重来,你现在的 best-response Nash 迭代仍然适配上面两种改法。

但我建议你补一个非常实用的小改动,让“博弈求解”更像样、也更稳:

在 best-response 更新里加松弛(relaxation)

很多耦合系统直接用纯 best-response 会振荡。写成:

Γi(l)(1ω)Γi(l1)+ωBRi(Γi(l1)),ω(0,1].\mathbf\Gamma_i^{(l)} \leftarrow (1-\omega)\mathbf\Gamma_i^{(l-1)} + \omega\,\mathrm{BR}_i(\mathbf\Gamma_{-i}^{(l-1)}), \quad \omega\in(0,1].

并在文字里说:ω\omega 用于增强收敛性(工程上很常见)。


你可以直接照搬到论文里的“最小改动清单”

  1. Δx^i\Delta \hat x_i 写成 Δx^i(;Γi,ΓZi)\Delta \hat x_i(\cdot;\mathbf\Gamma_i,\mathbf\Gamma_{Z_i})
  2. JiJ_i 写成 Ji(Γi;Γi)J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i})
  3. 加一条 Nash 均衡定义式。
    4.(推荐)加一个共享张力一致性惩罚项,突出冲突来自 TiT_i 被两机架共同影响。
    5.(推荐)best-response 加松弛系数 ω\omega

如果你愿意,我也可以按你这段 LaTeX 的风格,把第 4 章(尤其 4.2–4.3)直接改写成“Game Formulation + NE Definition + Relaxed Best-response Algorithm”的可投版本,保证老师一眼看到“多个机架在博弈”。

问题

可以,帮我改吧

思考

Thought for 1m 21s

回答

下面给你一份可直接粘贴替换的 LaTeX(建议整体替换你第 4 章里 4.1–4.3 三个 \subsection)。改动目标是让审稿/老师一眼看到:

  1. 每个机架是一个玩家
  2. 每个玩家的代价显式依赖邻居策略Ji(Γi;Γi)J_i(\Gamma_i;\Gamma_{-i}));
  3. “冲突/耦合”通过共享张力 TiT_i 的“双边一致性(consensus)”项体现出来;
  4. 求解方案仍是 Nash best-response,但加了常见的松弛更新 ω\omega,理论和工程上都更稳,也更像“博弈求解”。

你不需要改你前面 RNN 预测器的结构;这里只是把“控制—预测—博弈—求解”在数学上写清楚、写得更像 game。


latex
%======================== \subsection{Prediction model used in MPC and the prediction--control interface (game-coupled form)} \label{subsec:interface_clean} \paragraph{Players and strategies.} At each sampling instant $t_n$, the five-stand mill is viewed as a finite-horizon dynamic game with player set $\mathcal{I}=\{1,2,3,4,5\}$. Player $i$ chooses its local polynomial-parameter sequence (strategy) \begin{equation} \mathbf{\Gamma}_i(t_n) = \big[ \Gamma_{i,n}^\top,\, \Gamma_{i,n+1}^\top,\, \ldots,\, \Gamma_{i,n+N_c-1}^\top \big]^\top\in\mathbb{R}^{pN_c}. \end{equation} Let $\mathbf{\Gamma}(t_n)=\mathrm{col}\{\mathbf{\Gamma}_i(t_n)\}_{i=1}^{5}$ denote the joint strategy, $\mathbf{\Gamma}_{-i}$ the strategies of all players except $i$, and $\mathbf{\Gamma}_{Z_i}$ the strategies of neighboring players in $Z_i$. \paragraph{Key idea: prediction depends on both own and neighbors' strategies.} The learned predictor provides multi-step forecasts used by each player to evaluate its objective and constraints. Crucially, due to tension propagation and transport coupling, the predicted trajectory of player $i$ is not only a function of $\mathbf{\Gamma}_i$ but also of neighbors' strategies $\mathbf{\Gamma}_{Z_i}$ through the neighbor states. \paragraph{Neural-network-based multi-step prediction inside MPC (explicit strategy dependence).} Given the current measured/estimated deviation states $\Delta x_i(t_n)$ and a candidate strategy profile $(\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})$, player $i$ predicts \begin{equation} \begin{aligned} \Delta \hat{x}_i(t_{n+s+1};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i}) &= \Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i}) + \mathcal{N}_i\!\Big( \Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i}),\, \Delta \hat{x}_{Z_i}(t_{n+s};\mathbf{\Gamma}_{Z_i}),\\ &\qquad\qquad\qquad \Gamma_{i,n+s},\, \delta_{n+s};\, \Theta_i^* \Big),\qquad s=0,\ldots,N_p-1, \end{aligned} \label{eq:rollout_mpc_game} \end{equation} with initialization $\Delta \hat{x}_i(t_n;\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i})=\Delta x_i(t_n)$. Here \begin{equation} \Delta \hat{x}_{Z_i}(t_{n+s};\mathbf{\Gamma}_{Z_i}) = \mathrm{col}\big\{ \Delta \hat{x}_k(t_{n+s};\mathbf{\Gamma}_k)\ \big|\ k\in Z_i \big\} \end{equation} is obtained from the latest communicated neighbor predictions during the Nash iteration. \paragraph{Interface-tension extraction (to expose coupling explicitly).} Recall $\Delta x_i=[\Delta h_i,\Delta T_{i-1},\Delta T_i]^\top$. Define two selection vectors \begin{equation} e^- \triangleq [0\ \ 1\ \ 0],\qquad e^+ \triangleq [0\ \ 0\ \ 1], \end{equation} so that $e^- \Delta x_i=\Delta T_{i-1}$ (upstream tension) and $e^+ \Delta x_i=\Delta T_i$ (downstream tension). The shared inter-stand tension between stand $i$ and $i{+}1$ is $\Delta T_i$, which appears as $e^+\Delta x_i$ in player $i$ and as $e^-\Delta x_{i+1}$ in player $i{+}1$. %======================== \subsection{Local optimization problem (payoff with explicit game coupling)} \label{subsec:local_opt_clean} \paragraph{Reference meaning.} Because $\Delta x_i(t)=x_i(t)-x_i^{\mathrm{ref}}(t)$, the deviation-coordinate target is \begin{equation} \Delta x_i(t)\rightarrow 0,\qquad \Delta x_{i,\mathrm{ref}}(t_{n+s})\equiv 0\in\mathbb{R}^{d}. \end{equation} \paragraph{Coupling-aware local payoff (game form).} Player $i$ minimizes a payoff that explicitly depends on other players' strategies: \begin{equation} J_i(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i}) = J_i^{\mathrm{trk}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i}) + J_i^{\mathrm{act}}(\mathbf{\Gamma}_i) + J_i^{\mathrm{ten}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i}), \label{eq:Ji_game_def} \end{equation} where \begin{equation} J_i^{\mathrm{trk}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i}) = \sum_{s=1}^{N_p} \big\| \Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i}) \big\|_{Q_i}^2, \qquad J_i^{\mathrm{act}}(\mathbf{\Gamma}_i) = \sum_{s=0}^{N_c-1} \big\| \Gamma_{i,n+s} \big\|_{R_i}^2. \label{eq:Ji_trk_act} \end{equation} \paragraph{Explicit tension-consensus (conflict) term.} To make the inter-stand game coupling visible and to prevent conflicting actions on shared tensions, we add a consensus penalty on the shared tension forecasts across neighboring players: \begin{equation} \begin{aligned} J_i^{\mathrm{ten}}(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{Z_i}) &= \mu_i^-\sum_{s=1}^{N_p} \Big\| e^- \Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i}) - e^+ \Delta \hat{x}_{i-1}(t_{n+s};\mathbf{\Gamma}_{i-1}) \Big\|^2 \\ &\quad + \mu_i^+\sum_{s=1}^{N_p} \Big\| e^+ \Delta \hat{x}_i(t_{n+s};\mathbf{\Gamma}_i,\mathbf{\Gamma}_{Z_i}) - e^- \Delta \hat{x}_{i+1}(t_{n+s};\mathbf{\Gamma}_{i+1}) \Big\|^2, \end{aligned} \label{eq:tension_consensus_cost} \end{equation} with boundary convention $\Delta \hat{x}_0\equiv 0$ and $\Delta \hat{x}_6\equiv 0$ so the formula holds for all $i$. The weights $\mu_i^-,\mu_i^+\ge 0$ tune the coupling strength; when $\mu_i^\pm=0$, the formulation reduces to the uncoupled local MPC cost. Importantly, \eqref{eq:tension_consensus_cost} makes the ``conflict'' explicit: the same shared tension $\Delta T_i$ is simultaneously influenced by the decisions of players $i$ and $i{+}1$. \paragraph{Constraints and feasible set.} Let $\Omega_i(t_n)$ denote the local feasible set induced by the prediction model and constraints: \begin{equation} \Omega_i(t_n)=\Big\{\mathbf{\Gamma}_i\ \Big|\ \eqref{eq:rollout_mpc_game},\ \eqref{eq:u_abs_clean},\ \eqref{eq:du_traj_clean},\ \eqref{eq:u_prop_clean}\ \text{hold over the horizon} \Big\}. \end{equation} \paragraph{Local best-response problem (solved per Nash iteration).} At Nash-iteration index $l$, player $i$ solves the best-response NLP: \begin{equation} \mathrm{BR}_i\big(\mathbf{\Gamma}_{-i}^{(l-1)}\big) = \arg\min_{\mathbf{\Gamma}_i\in\Omega_i(t_n)}\ J_i(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i}^{(l-1)}), \label{eq:best_response_problem} \end{equation} where the neighbor predictions in \eqref{eq:rollout_mpc_game} and \eqref{eq:tension_consensus_cost} are treated as fixed signals generated by $\mathbf{\Gamma}_{Z_i}^{(l-1)}$. %======================== \subsection{Nash equilibrium coordination and relaxed best-response iteration} \label{subsec:nash_clean} \paragraph{Game definition and Nash equilibrium.} At time $t_n$, the distributed MPC coordination defines a finite-horizon game $\mathcal{G}(t_n)=\langle \mathcal{I},\{\Omega_i(t_n)\}_{i\in\mathcal{I}},\{J_i(\cdot;\cdot)\}_{i\in\mathcal{I}}\rangle$. A joint strategy $\mathbf{\Gamma}^*(t_n)=\mathrm{col}\{\mathbf{\Gamma}_i^*(t_n)\}$ is a Nash equilibrium if \begin{equation} \forall i\in\mathcal{I},\qquad \mathbf{\Gamma}_i^*(t_n)\in \arg\min_{\mathbf{\Gamma}_i\in\Omega_i(t_n)} J_i\big(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i}^*(t_n)\big). \label{eq:NE_definition} \end{equation} Because $J_i$ depends on neighbors through \eqref{eq:rollout_mpc_game} and \eqref{eq:tension_consensus_cost}, \eqref{eq:NE_definition} explicitly represents the multi-stand game coupling. \paragraph{Relaxed best-response (to improve convergence).} Pure best-response updates may oscillate in strongly coupled mills. We therefore use a relaxed update with $\omega\in(0,1]$: \begin{equation} \mathbf{\Gamma}_i^{(l)} = (1-\omega)\mathbf{\Gamma}_i^{(l-1)} + \omega\, \mathrm{BR}_i\big(\mathbf{\Gamma}_{-i}^{(l-1)}\big). \label{eq:relaxed_BR} \end{equation} \begin{table}[t] \centering \small \renewcommand{\arraystretch}{1.12} \setlength{\tabcolsep}{3.5pt} \caption{Relaxed distributed Nash best-response iteration for RNE-DMPC (five-stand).} \label{tab:nash_iter_en} \begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.11\linewidth} X} \toprule \textbf{Step} & \textbf{Description} \\ \midrule A & Initialize $l=1$ and initialize $\mathbf{\Gamma}_i^{(0)}$ for all players (warm-start from the previous sampling time). \\ B & Given $\mathbf{\Gamma}^{(l-1)}$, each player $i$ performs forward rollout \eqref{eq:rollout_mpc_game} to obtain $\Delta \hat{x}_i^{(l-1)}(t_{n+s})$ for $s=1,\ldots,N_p$. \\ C & Each player $i$ solves the best-response NLP \eqref{eq:best_response_problem} to obtain $\widetilde{\mathbf{\Gamma}}_i^{(l)}=\mathrm{BR}_i(\mathbf{\Gamma}_{-i}^{(l-1)})$. \\ D & Apply relaxation: $\mathbf{\Gamma}_i^{(l)}=(1-\omega)\mathbf{\Gamma}_i^{(l-1)}+\omega\,\widetilde{\mathbf{\Gamma}}_i^{(l)}$. \\ E & Broadcast $\mathbf{\Gamma}_i^{(l)}$ and updated predictions $\Delta \hat{x}_i^{(l)}(t_{n+s})$ to neighbors. \\ F & Compute the maximum relative change $\varsigma^{(l)}$ using \eqref{eq:nash_metric_clean}. \\ G & If $\varsigma^{(l)}\le \varsigma_{\mathrm{tol}}$, stop and set $\mathbf{\Gamma}_i^*=\mathbf{\Gamma}_i^{(l)}$; otherwise set $l\leftarrow l+1$ and repeat Steps B--F. \\ \bottomrule \end{tabularx} \end{table} \paragraph{Receding-horizon implementation.} After convergence, only the first-interval parameters $\Gamma_{i,n}^*$ are applied. The within-interval increment trajectory and the interval-averaged increment are computed by \eqref{eq:du_poly_mpc_clean} and \eqref{eq:apply_avg_clean}, and the absolute input is updated by \eqref{eq:apply_u_clean}. This preserves smooth actuator evolution while coordinating shared tensions through the Nash game \eqref{eq:NE_definition}.

你改完后,老师那句“看不出来多个机架博弈”基本就能对上了

因为现在你有三处“铁证”:

  • Ji(Γi;Γi)J_i(\mathbf\Gamma_i;\mathbf\Gamma_{-i}):目标函数显式依赖他人策略;
  • 共享张力一致性项 \eqref{eq:tension_consensus_cost}:冲突来自同一 ΔTi\Delta T_i 被两边共同作用;
  • Nash 定义 \eqref{eq:NE_definition} + relaxed BR 更新 \eqref{eq:relaxed_BR}:求解方案与博弈目标严格一致。

如果你还想再“更博弈一点”,我也可以把 4.2 里“约束集合”升级成**广义 Nash(GNE)**版本(共享张力做等式耦合约束而不是惩罚项),但那会更重、篇幅更长;按你老师反馈,先用上面这个版本通常已经够用了。

问题

d为什么等于3

思考

Thought for 5s

回答

因为你在文中把每个机架 ii 的“局部偏差状态”定义成了 3 个量的列向量

Δxi(t)[Δhi(t)ΔTi1(t)ΔTi(t)]\Delta x_i(t)\triangleq \begin{bmatrix} \Delta h_i(t)\\ \Delta T_{i-1}(t)\\ \Delta T_i(t) \end{bmatrix}

所以它的维度自然就是

d=dim(Δxi)=3.d=\dim(\Delta x_i)=3.

这 3 个量分别代表什么?

  • Δhi\Delta h_i:机架 ii 出口厚度偏差(本机架最核心的质量指标)
  • ΔTi1\Delta T_{i-1}:机架 ii 上游张力偏差(来自 i1i-1ii 之间的张力)
  • ΔTi\Delta T_i:机架 ii 下游张力偏差(来自 iii+1i+1 之间的张力)

在五机架串联系统里,耦合主要就是通过相邻机架间的张力传播,所以“本机架厚度 + 上下游两侧张力”是一个很典型、也很直接的局部建模选取。

那机架 1 和 5 为什么也能用 d=3d=3

因为你采用了边界统一约定:

ΔT0(t)0,ΔT5(t)0\Delta T_0(t)\equiv 0,\qquad \Delta T_5(t)\equiv 0
  • 对机架 1:Δx1=[Δh1, ΔT0(=0), ΔT1]\Delta x_1=[\Delta h_1,\ \Delta T_0(=0),\ \Delta T_1]^\top
  • 对机架 5:Δx5=[Δh5, ΔT4, ΔT5(=0)]\Delta x_5=[\Delta h_5,\ \Delta T_4,\ \Delta T_5(=0)]^\top

这样做的目的就是:所有机架的状态维数一致,方便

  • 数据集结构统一(网络输入输出维数固定)
  • 网络结构统一(不用为边界机架单独设计)
  • MPC/DMPC公式更整齐

可以不是 3 吗?

完全可以。d=3d=3 是你“状态选取”的设计选择,不是物理定律要求。
如果你希望更高保真,可以把局部状态扩成比如:

  • 加入口厚度、压下量/轧制力、摩擦系数估计、机架速度偏差、积分误差等
    dd 就会变大,但训练数据需求和优化复杂度也会上去。

如果你把你论文里“为什么选这三个状态”的那段(或老师具体意见)也贴出来,我可以帮你补一段论文风格的解释文字,放在符号定义后面,专门回答“为何 d=3d=3、为何足够描述主要耦合”。

问题

有没有用多项式参数化应用在五机架冷轧的论文呢

问题

有没有用多项式参数化应用在五机架冷轧的论文呢

思考

Explaining basis-function parameterization and polynomial relation

I didn’t find direct references to polynomial parameterization, but here's a summary:

  • For cold rolling, basis-function parameterization like Laguerre functions are used to reduce decision variables. It’s a form of parameterization similar to polynomials in method.

  • For exact polynomial use, PFC frameworks often use polynomial basis functions, as seen in the references to Haber et al and Friebel et al.

Citing sources and addressing the five-stand question

I’ll mention CAISA publications or dblp listings and cite. A relevant example is "Predictive Functional Control for Tension of Cold Continuous Rolling Mill" (2013) which applies PFC—cited. For five-stand specifics, I’ll note the difficulty confirming the exact number but can mention Nash equilibrium and data-driven MPC strategies, though these may or may not involve polynomial parameterization.

Crafting a response in Chinese and citing references

I’ll suggest citing Hu et al. as the closest match to five-stand cold rolling. If an exact reference isn't found, I can use examples like Laguerre-function parameterization to support the idea of functional parameterization. I'll recommend citing the key works and propose a written paragraph to blend with their paper.

Summing up sources and citations

I’ll recommend using the J Process Control 2021 paper that discusses the use of Laguerre-function in DMPC and basis function parameterization. Then, I can suggest referencing works on both polynomial and basis function parameterization, including PFC and its application to tandem cold rolling, with relevant citations like Friebel et al., Haber-Rossiter-Zabet, and Zhi 2013.

Thought for 3m 34s

回答

有,但在冷轧五机架/连轧文献里,“把未来控制轨迹用多项式(如二次/三次)参数化”这种表述并不算主流;更常见的写法是把未来控制序列用**基函数展开(basis-function parameterization)**来降维——其中 Laguerre 基函数在“连轧厚度–张力 DMPC/博弈”方向里出现得比较明确,你的二次多项式其实可以被看作“基函数参数化”的一种特例(基函数选 {1,τ,τ2}\{1,\tau,\tau^2\})。

下面给你几篇可以拿来支撑“参数化控制轨迹(而不是逐点优化)”、且与“连轧/多机架/厚度张力控制”贴得比较近的论文/资料(都能作为你论文里“方法依据/相关工作”的引用):


1) 连轧(厚度–张力)DMPC/博弈里用“基函数参数化”(Laguerre)

  • Hu Y., Sun J., Peng W., Zhang D. 等:Nash equilibrium-based distributed predictive control strategy for thickness and tension control on tandem cold rolling system,Journal of Process Control,97:92–102,2021。该文明确是“连轧厚度+张力、Nash/DMPC”,并提到为降低计算复杂度引入 Laguerre 函数做控制参数化。 (J-GLOBAL)

你可以在文中这样用它来“背书”你的二次多项式:
“类似于连轧 DMPC 中常用的基函数参数化(如 Laguerre 展开)以降低在线优化维数,我们采用低阶多项式基 {1,τ,τ2}\{1,\tau,\tau^2\} 对区间内增量轨迹进行参数化……”。


2) 连轧(厚度–张力)DMPC 方向的对照文献(不一定多项式,但同一问题域)

  • Hu Y., Sun J. 等:Distributed model predictive control based on neighborhood optimization for thickness and tension control system in tandem cold rolling mill(ISA Transactions,DOI: 10.1016/j.isatra.2021.12.030)。这是同一类“连轧厚度–张力 + 分布式 MPC”问题域的代表文献,可作为你方法章节的背景对照。 (Europe PMC)

3) 若你想**正面引用“多项式/基函数参数化”**这件事:PFC(Predictive Functional Control)

PFC 的经典表述就是:操纵量(MV)用若干“基函数”加权求和表示;并且当参考/扰动用多项式叠加表示时,MV 也用类似的“基本函数”(可以是多项式)来表示——这正好能给你“用多项式参数化轨迹”提供方法论支撑:

  • Haber, Rossiter, Zabet 的 PFC 教程稿里明确写到:MV 是“weighted basis functions”的和;且当参考是“sum of polynomial functions”时,MV 由类似基本函数构成,需要最小化二次型误差。 (White Rose Research Online)

PFC 在“连轧/多机架冷轧”上的直接应用

  • Friebel, Zabet, Haber, Jelali (2015)Predictive functional control of tandem cold metal rolling,IEEE CCA 2015(会议论文,DOI: 10.1109/CCA.2015.7320649)。这是“tandem cold rolling”场景下用 PFC(本质是基函数/参数化)的一篇直接应用文献。 (DBLP)
  • Zhi (2013)Predictive Functional Control for Tension of Cold Continuous Rolling Mill(Springer 会议论文),是冷连轧张力控制里用 PFC 的例子。 (Springer Nature)

我这次检索到的“空缺点”(避免你引用时踩坑)

我没有检索到一篇公开可直接读到全文、并明确写成“five-stand tandem cold rolling + MPC + within-interval quadratic/cubic polynomial parameterization of roll gap/speed”这种一模一样表述的论文。现有更常见的是:

  • Laguerre/正交基函数给控制序列降维(连轧 DMPC/博弈里有明确例子);(ScienceDirect)
  • 或者直接用分段常值/分段线性(本质上也是“0/1 次多项式”)。

如果你愿意,我也可以顺手把你论文里“多项式参数化”的动机段落改成更像“站得住脚的引用链”写法:
“连轧在线优化通常采用基函数参数化降维(Laguerre/PFC),本文选用低阶多项式基函数实现区间内平滑与约束可检验”——把上面这些引用自然嵌进去即可。

问题

We keep the same constraints as in \eqref{eq:u_abs_clean}--\eqref{eq:u_prop_clean}.
Collect them into a compact feasible set:
\begin{equation}
\Omega_i \triangleq
\Big{\mathbf{\Gamma}_i\ \Big|\
\eqref{eq:rollout_mpc_game}\ \text{holds and}\
\eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}\ \text{are satisfied}
\Big}.
\end{equation}这部分是要什么公式,为什么显示出来都是问号

思考

Clarifying LaTeX equation issues

It looks like the user is encountering question marks due to unresolved references in their LaTeX, which often happens when equation labels are missing or not defined correctly. They might need to label equations and compile twice, and if using Overleaf, remember it also requires two runs. Beyond that, if integrating constraints like gi(Γi;Γi)0g_i(\Gamma_i;\Gamma_{-i}) \leq 0, I can suggest a more compact representation for their feasible set.

Addressing LaTeX formula and reference issues

If they're using a Chinese-language environment or haven't compiled with pdflatex twice, missing references could happen. Using \eqref without the amsmath package could also cause issues, but they likely included it. As for the formula: it's about defining the set Ωi(tn)\Omega_i(t_n), so no extra formulas are needed, but you could explicitly expand constraints. The question marks are often due to unresolved references, so compiling twice or ensuring labels are correctly placed can fix this.

Thought for 23s

回答

这段 不需要“额外某个新公式”,它本质上是在定义可行域集合 Ωi\Omega_i:把你前面已经写过的预测方程和约束(输入上下界、增量轨迹上下界、输入递推)统一打包成一个集合,方便后面写成 minΓiΩiJi()\min_{\mathbf\Gamma_i\in\Omega_i} J_i(\cdot)


为什么显示成问号(??)

LaTeX 里 \eqref{...} 显示 ?? / 问号,几乎都是交叉引用没解析成功,常见原因就这几类:

  1. 没有编译两遍
    交叉引用需要至少 pdflatex 编译两次(或用 latexmk 自动多次编译)。第一次生成 .aux,第二次才能把编号填回去。

  2. label 名字对不上/写错了
    比如你引用了 eq:rollout_mpc_game,但前面实际 \label{eq:rollout_mpc_game} 不存在,或者少了一个下划线/多了空格。

  3. \label 放的位置不对
    最好确保写法是:

latex
\begin{equation} ...公式... \label{eq:rollout_mpc_game} \end{equation}

\label 要放在 equation 环境里面(通常放在公式后面最稳)。

  1. 引用的公式在另一个文件但没被编译进来
    比如你用 \includeonly{...} 或者分文件编译时,含 label 的那部分没被编译。

  2. 辅助文件坏了/缓存没更新
    Overleaf 可以点 Recompile from scratch;本地就删掉 .aux .toc .lof .lot .out 再编译两遍。


这段“要什么公式”才更规范(两种写法)

写法 A:你现在这样“用 \eqref 打包”是对的(推荐)

你这段集合定义没问题,前提是这些 label 都能找到并成功编译。

写法 B:如果你不想依赖 \eqref(避免问号),就把约束展开写出来

比如写成(示意,按你文中的约束原样展开):

latex
\begin{equation} \Omega_i \triangleq \left\{\mathbf{\Gamma}_i\ \middle|\ \begin{aligned} &\Delta \hat{x}_i(t_{n+s+1})= \Delta \hat{x}_i(t_{n+s})+\mathcal N_i(\cdot),\ s=0,\ldots,N_p-1,\\ &u_{i,\min}\le u_i(t_{n+s})\le u_{i,\max},\ s=0,\ldots,N_p-1,\\ &\Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max}, \ \forall\tau\in[0,\delta_{n+s}],\\ &u_i(t_{n+s})=u_i(t_{n+s-1})+\Delta u_i(t_{n+s}),\ s=0,\ldots,N_p-1 \end{aligned} \right\}. \end{equation}

这样即使引用编号出问题,也不会出现 ??


你现在最该做的快速排查

  • 确认你前面那几个公式真的有
    \label{eq:rollout_mpc_game}\label{eq:u_abs_clean}\label{eq:du_traj_clean}\label{eq:u_prop_clean}
  • 清理辅助文件后 编译两遍
  • 若仍然 ??:把其中一个 label(比如 eq:rollout_mpc_game)在全文里搜一下,看是不是拼写不一致或重复定义

如果你把那四个被引用公式的 \begin{equation}...\label{...}...\end{equation} 原文各贴一小段,我可以直接帮你定位到底是哪一个 label 出错。

问题

\eqref{eq:rollout_mpc_game}\ \text{holds and}\
\eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}\ \text{are satisfied}这几个公式标号前面没有么

思考

Thought for 7s

回答

对,这些 \eqref{...} 必须在前文真的存在对应的 \label{...},否则就会显示 ??(你说的“问号”)。

1)你这几个 label 应该在前面哪里出现?

  • eq:u_abs_cleaneq:du_traj_cleaneq:u_prop_clean
    这三个在你原稿里确实已经有(你贴的第 4.2 小节里就有:绝对输入约束、增量轨迹约束、输入递推)。
  • eq:rollout_mpc_game
    这个很可能是我给你改写时新增的“game-coupled form”的编号名。如果你前面实际还叫 eq:rollout_mpc_clean,那这里引用就会找不到,直接 ??

✅ 所以最常见的情况是:你文里只有

latex
\label{eq:rollout_mpc_clean}

但你在集合里引用了

latex
\eqref{eq:rollout_mpc_game}

——名字不一致就会问号。


2)怎么确认是不是“前面没有这个标号”?

在 LaTeX 源码里全文搜索:

  • \label{eq:rollout_mpc_game}
    • 找不到:那就说明前面确实没有这个标号(所以一定 ??)。
  • 同理搜 \label{eq:u_abs_clean} 等。

3)解决办法(任选其一)

办法 A(最简单):把引用名改回你文里已有的 label
如果你预测式仍然用的是 eq:rollout_mpc_clean,那就把集合里这一句改成:

latex
\eqref{eq:rollout_mpc_clean}\ \text{holds and}\ \eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}\ \text{are satisfied}

办法 B:在你的预测公式下面补上对应 label
确保预测式写成这样(label 名要和你引用的一致):

latex
\begin{equation} ... % 你的预测滚动方程 \label{eq:rollout_mpc_game} \end{equation}

4)别忘了:编译至少两遍

即使 label 都对,第一次编译也可能还是 ??,第二次才会正常。
如果你改完仍旧 ??,通常是:

  • label 名字拼写不一致 / 多了空格 / 少了下划线
  • 或者你在分文件编译,含 \label 的那一章没被编译进来
  • 或者辅助文件没更新(删 .aux 再编译两遍)

如果你把你“滚动预测那条公式”的 \begin{equation}...\label{...} 那一小段贴出来(只要那几行),我可以直接告诉你:你现在应该引用 eq:rollout_mpc_clean 还是把它改成 eq:rollout_mpc_game

问题

那对应的,\eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}这三个公式在你现在新改的里面叫什么

思考

Thought for a second

回答

在我给你“新改的那版”里,这三个约束我没有改名——也就是说:

  • 绝对输入约束仍然叫 \label{eq:u_abs_clean}
  • 区间内增量轨迹约束仍然叫 \label{eq:du_traj_clean}
  • 输入递推/传播仍然叫 \label{eq:u_prop_clean}

也就是你在可行域里写的

latex
\eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}

理论上是不用动的,前提是你原文那三条约束下面确实分别有对应的 \label{...},而且拼写一致。


这三个分别对应你原文哪里(方便你核对)

  • eq:u_abs_clean:你原文的 “Absolute input bounds” ui,minui(tn+s)ui,maxu_{i,\min}\le u_i(t_{n+s})\le u_{i,\max}
  • eq:du_traj_clean:你原文的 “Increment trajectory bounds for all τ\tau Δui,minΔui,n+s(τ;Γi,n+s)Δui,max\Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max}
  • eq:u_prop_clean:你原文的 “propagate the absolute input sequence” ui(tn+s)=ui(tn+s1)+Δui(tn+s)u_i(t_{n+s})=u_i(t_{n+s-1})+\Delta u_i(t_{n+s})

你现在真正容易出问题的是哪个?

大概率是 eq:rollout_mpc_game(新加/新命名的),而不是这三个老标签。

如果你前面预测滚动方程还是原来的 \label{eq:rollout_mpc_clean},那可行域里就把 eq:rollout_mpc_game 改回 eq:rollout_mpc_clean 就行。

把你滚动预测那条公式附近的 \label{...} 那两三行贴出来,我可以直接告诉你该引用哪个名字,避免你来回试。

问题

%========================
\section{Nash Equilibrium-Based RNE-DMPC}
%========================

The five-stand tandem cold rolling system is strongly coupled through inter-stand tension propagation.
As a result, changes in control actions (roll gap and stand speed) at one stand can affect both upstream and downstream stands,
making centralized online optimization over all stands' decision variables computationally demanding.

To mitigate this issue, we decompose the global predictive-control problem into N=5N=5 local subproblems associated with individual stands.
Each local controller optimizes its own decision variables while accounting for coupling via limited information exchange with neighboring controllers.
Motivated by game-theoretic coordination \citep{rawlings2008coordinating}, we formulate distributed coordination as a Nash-equilibrium-seeking iteration.
Based on the trained residual neural network surrogate model, we construct a Nash-equilibrium-based distributed MPC method (RNE-DMPC)
for coordinated thickness--tension regulation and tracking. The overall control structure is shown in Figure~\ref{4}.

\begin{figure*}[htbp]
\centering
\includegraphics[width=\linewidth]{picture/x2.pdf}
\caption{Schematic diagram of the control architecture for a tandem cold rolling mill.}\label{4}
\end{figure*}

At sampling time tnt_n, stand ii chooses the polynomial-parameter sequence
Γi(tn)RpNc\mathbf{\Gamma}_i(t_n)\in\mathbb{R}^{pN_c}, where p=6p=6.
Let Γ(tn)col{Γ1(tn),,Γ5(tn)}\mathbf{\Gamma}(t_n)\triangleq \mathrm{col}\{\mathbf{\Gamma}_1(t_n),\ldots,\mathbf{\Gamma}_5(t_n)\}
denote the joint strategy profile, and let Γi(tn)\mathbf{\Gamma}_{-i}(t_n) denote the collection of all strategies except stand ii.

Given the current measured/estimated deviation state Δxi(tn)\Delta x_i(t_n) and the strategies
(Γi(tn),ΓZi(tn))(\mathbf{\Gamma}_i(t_n),\mathbf{\Gamma}_{Z_i}(t_n)),
the multi-step prediction used by stand ii is written explicitly as
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1};\mathbf{\Gamma}i,\mathbf{\Gamma}{Z_i})
&=
\Delta \hat{x}i(t{n+s};\mathbf{\Gamma}i,\mathbf{\Gamma}{Z_i})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s};\cdot),,
\Delta \hat{x}
{Z_i}(t_{n+s};\mathbf{\Gamma}{Z_i}),\
&\qquad
\Gamma
{i,n+s},,
\delta_{n+s};\Theta_i^*
\Big),
\end{aligned}
\label{eq:rollout_mpc_game}
\end{equation}
for s=0,,Np1s=0,\ldots,N_p-1, with initialization Δx^i(tn;)=Δxi(tn)\Delta \hat{x}_i(t_n;\cdot)=\Delta x_i(t_n).
Here the neighbor stack Δx^Zi(tn+s;ΓZi)\Delta \hat{x}_{Z_i}(t_{n+s};\mathbf{\Gamma}_{Z_i}) is generated from neighbors' strategies via the same learned predictors.

Over [tn+s,tn+s+1][t_{n+s},t_{n+s+1}] with length δn+s\delta_{n+s},
\begin{equation}
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\tau
+\Gamma_{i,n+s,2}\tau^2,\qquad \tau \in [0,\delta_{n+s}],
\end{equation}
and the interval-averaged increment is
\begin{equation}
\Delta u_i(t_{n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\frac{\delta_{n+s}}{2}
+\Gamma_{i,n+s,2}\frac{\delta_{n+s}^2}{3}.
\label{eq:du_avg_clean}
\end{equation}

\begin{remark}
Because inter-stand tension TiT_i is jointly affected by the adjacent stands ii and i+1i+1,
the predicted evolution of Δxi\Delta x_i depends on neighbors' future actions,
hence the MPC problems are not independent but form a coupled dynamic game.
\end{remark}

At time tnt_n, the local strategy of stand ii is
\begin{equation}
\mathbf{\Gamma}_i(t_n)

\mathrm{col}{\Gamma_{i,n},\Gamma_{i,n+1},\ldots,\Gamma_{i,n+N_c-1}}
\in \mathbb{R}^{pN_c}.
\end{equation}

In deviation coordinates, the regulation/tracking objective is Δxi(t)0\Delta x_i(t)\rightarrow 0, i.e.
\begin{equation}
\Delta x_{i,\mathrm{ref}}(t_{n+s})\equiv 0\in\mathbb{R}^{d},\qquad d=3.
\end{equation}

Recall Δxi=[Δhi,ΔTi1,ΔTi]\Delta x_i=[\Delta h_i,\Delta T_{i-1},\Delta T_i]^\top.
Define the row selectors
\begin{equation}
C^- \triangleq [0\ \ 1\ \ 0]\in\mathbb{R}^{1\times 3},\qquad
C^+ \triangleq [0\ \ 0\ \ 1]\in\mathbb{R}^{1\times 3},
\end{equation}
so that CΔxi=ΔTi1C^- \Delta x_i=\Delta T_{i-1} (upstream interface) and C+Δxi=ΔTiC^+\Delta x_i=\Delta T_i (downstream interface).

For the interface between stands ii and i+1i+1,
stand ii's prediction provides C+Δx^iC^+\Delta \hat{x}_i, while stand i+1i+1's prediction provides CΔx^i+1C^- \Delta \hat{x}_{i+1}.
Their mismatch measures coupling inconsistency:
\begin{equation}
e_{i}^{\mathrm{sh}}(t_{n+s};\mathbf{\Gamma})
\triangleq
C^+\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})

C^-\Delta \hat{x}{i+1}(t{n+s};\mathbf{\Gamma}),\qquad i=1,\ldots,4.
\label{eq:shared_tension_mismatch}
\end{equation}

We define the stage cost of stand ii as a function of all players' strategies:
\begin{equation}
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i})

\sum_{s=1}^{N_p}
\left|
\Delta \hat{x}i(t{n+s};\mathbf{\Gamma}i,\mathbf{\Gamma}{Z_i})
\right|{Q_i}^{2}
+
\sum
{s=0}^{N_c-1}
\left|\Gamma_{i,n+s}\right|_{R_i}^{2}
+
J_i^{\mathrm{cpl}}(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i})
\label{eq:Ji_game}
\end{equation}
where Qi0Q_i\succeq 0 weights thickness and tension deviations, and Ri0R_i\succeq 0 penalizes actuation magnitudes.

The coupling term JicplJ_i^{\mathrm{cpl}} explicitly reflects the game/coordination requirement on shared tensions.
A simple and effective choice is to penalize the interface mismatches adjacent to stand ii:
\begin{equation}
\begin{aligned}
J_i^{\mathrm{cpl}}
&=
\mu_i\sum_{s=1}^{N_p}
\Big(
\mathbb{I}_{{i\ge 2}}
\big|
C^-\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})

C^+\Delta \hat{x}{i-1}(t{n+s};\mathbf{\Gamma})
\big|^2 \
&\qquad\quad
+
\mathbb{I}_{{i\le 4}}
\big|
C^+\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})

C^-\Delta \hat{x}{i+1}(t{n+s};\mathbf{\Gamma})
\big|^2
\Big),
\end{aligned}
\label{eq:coupling_cost}
\end{equation}
with μi>0\mu_i>0 and indicator I{}\mathbb{I}_{\{\cdot\}}.
This term makes the coupling conflict explicit: unilateral actions that locally reduce thickness error may worsen shared-tension
compatibility and thus increase JiJ_i, and also affect neighbors' objectives.

We keep the same constraints as in \eqref{eq:u_abs_clean}--\eqref{eq:u_prop_clean}.
Collect them into a compact feasible set:
\begin{equation}
\Omega_i \triangleq
\Big{\mathbf{\Gamma}_i\ \Big|\
\eqref{eq:rollout_mpc_game}\ \text{holds and}\
\eqref{eq:u_abs_clean},\eqref{eq:du_traj_clean},\eqref{eq:u_prop_clean}\ \text{are satisfied}
\Big}.
\end{equation}

\paragraph{Local best-response problem (player ii).}
Given neighbors' current strategies (or the communicated predicted trajectories induced by them),
stand ii solves the differentiable NLP:
\begin{equation}
\mathbf{\Gamma}_i^{\mathrm{BR}}

\arg\min_{\mathbf{\Gamma}_i\in\Omega_i}\
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i}).
\label{eq:local_BR}
\end{equation}
Because the learned surrogate is differentiable, \eqref{eq:local_BR} can be solved by standard gradient-based NLP solvers
(e.g., SQP or interior-point methods with automatic differentiation).

%========================
\subsection{Nash equilibrium coordination and relaxed best-response solution}
\label{subsec:nash_clean}
%========================

\paragraph{Dynamic game definition (five-stand).}
At each sampling time tnt_n, the distributed MPC coordination induces a finite-horizon dynamic game:
players are stands i=1,,5i=1,\ldots,5; strategy sets are Ωi\Omega_i; and payoff (cost) functions are Ji(Γi;Γi)J_i(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i})
defined in \eqref{eq:Ji_game}--\eqref{eq:coupling_cost}.

\paragraph{Nash equilibrium (NE).}
A joint strategy profile Γ=col{Γ1,,Γ5}\mathbf{\Gamma}^*=\mathrm{col}\{\mathbf{\Gamma}_1^*,\ldots,\mathbf{\Gamma}_5^*\}
is a Nash equilibrium if
\begin{equation}
\forall i\in{1,\ldots,5},\qquad
\mathbf{\Gamma}i^*\in
\arg\min
{\mathbf{\Gamma}_i\in\Omega_i}
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i}^*).
\label{eq:NE_def}
\end{equation}
This definition explicitly characterizes the strategic coupling:
each player's optimal decision depends on neighbors' decisions through the shared-tension dynamics and the coupling term.

\paragraph{Relaxed distributed best-response iteration (improved robustness).}
To compute an NE online with limited communication, we employ a relaxed best-response iteration.
Let ll denote the Nash-iteration index.
Given Γ(l1)\mathbf{\Gamma}^{(l-1)}, each stand computes a best response ΓiBR,(l)\mathbf{\Gamma}_i^{\mathrm{BR},(l)}
by solving \eqref{eq:local_BR}, and then updates with relaxation:
\begin{equation}
\mathbf{\Gamma}_i^{(l)}

(1-\omega)\mathbf{\Gamma}_i^{(l-1)}
+
\omega,\mathbf{\Gamma}_i^{\mathrm{BR},(l)},
\qquad \omega\in(0,1].
\label{eq:relaxed_BR}
\end{equation}
The relaxation factor ω\omega mitigates oscillations caused by strong coupling and improves practical convergence.

\paragraph{Convergence metric (unchanged).}
\begin{equation}
\varsigma^{(l)}

\max_i
\frac{\left|
\mathbf{\Gamma}_i^{(l)}-\mathbf{\Gamma}_i^{(l-1)}
\right|_2}{
\left|
\mathbf{\Gamma}_i^{(l-1)}
\right|_2+\epsilon},
\end{equation}
with ϵ>0\epsilon>0 small.

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.12}
\setlength{\tabcolsep}{3.5pt}
\caption{Relaxed distributed Nash best-response iteration for RNE-DMPC (five-stand).}
\label{tab:nash_iter_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.11\linewidth} X}
\toprule
\textbf{Step} & \textbf{Description} \
\midrule
A &
Initialize l=1l=1 and warm-start Γi(0)\mathbf{\Gamma}_i^{(0)} (e.g., from the previous sampling time). \

B &
Communicate Γi(l1)\mathbf{\Gamma}_i^{(l-1)} (or the induced predicted trajectories) among neighbors; form ΓZi(l1)\mathbf{\Gamma}_{Z_i}^{(l-1)}. \

C &
Given Γi(l1)\mathbf{\Gamma}_{-i}^{(l-1)}, solve the best-response NLP \eqref{eq:local_BR} to obtain ΓiBR,(l)\mathbf{\Gamma}_i^{\mathrm{BR},(l)}. \

D &
Update the relaxed strategy using \eqref{eq:relaxed_BR} and compute the induced predictions
Δx^i(l)(;Γ(l))\Delta \hat{x}_i^{(l)}(\cdot;\mathbf{\Gamma}^{(l)}) via \eqref{eq:rollout_mpc_game}. \

E &
Broadcast Γi(l)\mathbf{\Gamma}_i^{(l)} and the predicted interface quantities needed in \eqref{eq:coupling_cost}
(e.g., C±Δx^i(l)C^\pm\Delta \hat{x}_i^{(l)}) to neighbors. \

F &
Compute ς(l)\varsigma^{(l)}; if ς(l)ςtol\varsigma^{(l)}\le \varsigma_{\mathrm{tol}}, stop and set Γi=Γi(l)\mathbf{\Gamma}_i^*=\mathbf{\Gamma}_i^{(l)}; otherwise ll+1l\leftarrow l+1 and repeat. \
\bottomrule
\end{tabularx}
\end{table}

\paragraph{Receding-horizon implementation (unchanged).}
After convergence, only the first-interval parameters Γi,n\Gamma_{i,n}^* are applied.
The within-interval increment is Δui,n(τ;Γi,n)\Delta u_{i,n}(\tau;\Gamma_{i,n}^*) and the discrete applied increment uses the interval average:
\begin{equation}
\Delta u_i(t_n)

\Gamma_{i,n,0}^*
+
\Gamma_{i,n,1}^\frac{\delta_n}{2}
+
\Gamma_{i,n,2}^
\frac{\delta_n^2}{3},
\end{equation}
followed by ui(tn)=ui(tn1)+Δui(tn)u_i(t_n)=u_i(t_{n-1})+\Delta u_i(t_n).

\paragraph{Remark (what makes the ``game'' visible).}
Equations \eqref{eq:Ji_game}--\eqref{eq:coupling_cost} and the NE definition \eqref{eq:NE_def} explicitly show that each player's objective
depends on others' strategies through (i) the coupled prediction dynamics and (ii) the shared-tension interface mismatch penalty,
thus making the multi-stand strategic interaction and coordination mechanism transparent.这里面有的公式标号没有这个公式,怎么解决

思考

Providing guidance on LaTeX compilation issues

To resolve the issue with missing equation labels, first, compile twice, ensure each label is unique, and check for any previous references in the document. If the label uses \mathbf{\Gamma} or \mathbb{I}, ensure the required packages are included. For missing labels, like eq:u_prop_clean, verify prior definitions and address any range mismatches.

Fixing undefined label issues in LaTeX

If the equations aren't being repeated, referencing should work as long as the labels were defined earlier. If the user sees question marks, it's likely that some labels were not compiled because they are in another file. To fix this, either include the file or ensure the labels are correctly defined. Numeric labels like \label{4} can be confusing, so it's better to use more descriptive names like \label{eq:du_avg_clean}. Some equations may be missing labels, so check prior definitions.

Identifying missing labels and giving fix instructions

Some equations, like eq:u_abs_clean, might be missing labels because they were defined in a previous chapter. If files are split, make sure the order is correct, or cross-references may fail. I suggest using \Cref for better formatting, and adding a label for the convergence metric. To fix:

  1. Search for \eqref{eq:u_abs_clean} and make sure each label matches its equation.
  2. Verify labels are unique.
  3. Compile twice to ensure proper referencing.

Thought for 59s

回答

你这段里出现“有的公式标号没有这个公式/显示??”,本质原因只有一个:你引用的 label(比如 eq:u_abs_clean)在当前编译出来的文档里没有成功定义(或定义没被编译进来),所以 LaTeX 只能给 ??

结合你贴的代码,最常见的情况是下面几种:


1)你引用的这三个 label 在“新文件/新章节”里其实没被编译进来

你在这里引用了:

  • eq:u_abs_clean
  • eq:du_traj_clean
  • eq:u_prop_clean

但这三个约束不在你贴的这一段里,而是在前面章节(你原稿 4.2)里。

如果你现在是分文件写作(比如 main.tex + chap4.tex),就会出现这种情况:

  • 你在 chap4.tex 引用 eq:u_abs_clean
  • main.tex 里并没有 \include{chapX} 把包含这些 \label{eq:u_abs_clean} 的文件编进来
    或者你用了 \includeonly{chap4} 导致前面的章节没编译

✅ 解决:

  • 确认包含那三个约束的章节文件确实被 \include{...} / \input{...} 进主文件,并且不在 \includeonly 排除名单里。

2)label 名字在你前文里改过,但这里仍用旧名字

比如你前面其实写的是:

latex
\label{eq:u_abs}

但这里引用的是 eq:u_abs_clean,那就必然 ??

✅ 解决:

  • 全文搜索 \label{eq:u_abs_clean}\label{eq:du_traj_clean}\label{eq:u_prop_clean}
    找不到就说明“前面没有这个标号”,把这里的 \eqref{...} 改成你真实存在的 label 名。

3)编译次数不够 / 辅助文件没更新

交叉引用需要至少编译两遍(第一次生成 .aux,第二次回填编号)。

✅ 解决:

  • 连续编译两次(Overleaf 也一样)
  • 还不行就“从头编译”:删除 .aux/.out/.toc 等或 Overleaf 选择 Recompile from scratch

4)最稳的“自检方法”

看编译日志(log)里会有这种提示:

LaTeX Warning: Reference 'eq:u_abs_clean' on page ... undefined

这会直接告诉你到底哪个 label 不存在


5)如果你想让这一节完全自洽(不依赖前文),可以把三条约束在这里再写一遍并打 label

但注意:不要和前文重复 label,否则会变成 “Label multiply defined”。

如果你确实要在这一节重写约束,建议改成新名字,例如:

latex
\begin{equation} u_{i,\min} \le u_i(t_{n+s}) \le u_{i,\max},\ s=0,\ldots,N_p-1. \label{eq:u_abs_game} \end{equation} \begin{equation} \Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max}, \ \forall\tau\in[0,\delta_{n+s}]. \label{eq:du_traj_game} \end{equation} \begin{equation} u_i(t_{n+s}) = u_i(t_{n+s-1}) + \Delta u_i(t_{n+s}),\ s=0,\ldots,N_p-1. \label{eq:u_prop_game} \end{equation}

然后把你可行域集合里的引用同步改成 ..._game


你这份代码里,eq:rollout_mpc_game 已经定义了

你贴的滚动预测式下面有:

latex
\label{eq:rollout_mpc_game}

所以它不会是 ?? 的来源;真正容易 ?? 的就是那三个“前文约束”的 label。


如果你把那三条约束在你全文里实际写的那几行(含 \label{...})贴出来,我可以直接告诉你:这里应该引用什么名字、或你主文件哪里没 include 进来。

问题

这个约束条件重新在这写一下呗,新版前面好像没写

思考

Thought for 9s

回答

可以的。你现在这节里既然“新版前面没写约束”,那就把约束在这一节重新写一遍并重新打 label(不要再用 ..._clean,避免和旧稿/别处冲突)。下面这段你可以直接粘贴到你那句 “We keep the same constraints …” 的位置(或者紧跟在 \eqref{eq:coupling_cost} 后面)。

latex
\paragraph{Constraints and feasible set.} We enforce the same types of constraints as in the practical mill: absolute-input bounds and within-interval increment bounds. To keep this section self-contained, we restate them here. \emph{Absolute input bounds (roll gap and speed):} \begin{equation} u_{i,\min}\le u_i(t_{n+s})\le u_{i,\max}, \qquad s=0,\ldots,N_p-1, \label{eq:u_abs_game} \end{equation} where $u_i(t)=[s_i(t),\,v_i(t)]^\top$. \emph{Within-interval increment-trajectory bounds (for all $\tau$):} \begin{equation} \Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max}, \qquad \forall\tau\in[0,\delta_{n+s}],\ s=0,\ldots,N_p-1, \label{eq:du_traj_game} \end{equation} where $\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})$ is given by the polynomial parameterization. \emph{Consistency with discrete execution (interval average and propagation):} Define the interval-averaged increment \begin{equation} \Delta u_i(t_{n+s}) = \frac{1}{\delta_{n+s}}\int_{0}^{\delta_{n+s}}\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\,d\tau = \Gamma_{i,n+s,0} +\Gamma_{i,n+s,1}\frac{\delta_{n+s}}{2} +\Gamma_{i,n+s,2}\frac{\delta_{n+s}^2}{3}, \label{eq:du_avg_game} \end{equation} and propagate the absolute input along the horizon by \begin{equation} u_i(t_n)=u_i(t_{n-1})+\Delta u_i(t_n),\qquad u_i(t_{n+s})=u_i(t_{n+s-1})+\Delta u_i(t_{n+s}),\ s=1,\ldots,N_p-1, \label{eq:u_prop_game} \end{equation} where $u_i(t_{n-1})$ is the applied (measured) input from the previous sampling instant. \emph{Compact feasible set:} \begin{equation} \Omega_i \triangleq \Big\{\mathbf{\Gamma}_i\ \Big|\ \eqref{eq:rollout_mpc_game}\ \text{holds and}\ \eqref{eq:u_abs_game},\eqref{eq:du_traj_game},\eqref{eq:u_prop_game}\ \text{are satisfied} \Big\}. \label{eq:Omega_i_game} \end{equation}

你这样改的好处

  • 这一节自洽:不再依赖“前面某处的 eq:u_abs_clean 等 label”,也就不会出现 ??
  • label 全是新的:eq:u_abs_game / eq:du_traj_game / eq:u_prop_game,不怕和旧稿重复。

最后提醒一句:改完后至少编译两遍(或 Overleaf “Recompile from scratch”),引用号才能稳定显示。

问题

State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf

%========================
\section{Construction of Residual Neural Network}
%========================

Given the dataset, the neural network model is trained to learn a stand-wise, control-dependent one-step evolution law of deviation states:
\begin{equation}
\Delta x_i(t_{n+1})
\approx
\Delta x_i(t_n)+
\mathcal{N}i!\Big(\Delta x_i(t_n),,\Delta x{Z_i}(t_n),,\Gamma_{i,n},,\delta_n;,\Theta_i\Big),
\label{eq:learned_dyn_clean}
\end{equation}
where Ni()\mathcal{N}_i(\cdot) outputs the one-step deviation-state change and Θi\Theta_i are trainable parameters.

\begin{remark}
If Ni\mathcal{N}_i does not take control information as input (here Γi,n\Gamma_{i,n} and δn\delta_n),
the predictor becomes an autoregressive model that only reproduces trajectories under the training input patterns
and cannot answer the counterfactual question: ``what will happen if we choose a different roll gap and speed trajectory?''
Since MPC optimizes over candidate decisions, a control-dependent predictor \eqref{eq:learned_dyn_clean} is necessary
to evaluate the predicted thickness and tension behavior under different candidate actuator trajectories.
\end{remark}

Let d=3d=3 (state dimension), Zi|Z_i| be the number of neighbors of stand ii in \eqref{eq:Zi_clean}, and p=6p=6 in \eqref{eq:Gamma_clean}.
Define the input vector
\begin{equation}
X_{i,\text{in}} \triangleq
\big[
\Delta x_i(t_n)^\top,,
\Delta x_{Z_i}(t_n)^\top,,
\Gamma_{i,n}^\top,,
\delta_n
\big]^\top
\in \mathbb{R}^{d(1+|Z_i|)+p+1}.
\label{eq:X_in_clean}
\end{equation}
The network mapping is
\begin{equation}
\mathcal{N}_i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d}.
\end{equation}

To improve training stability and long-horizon rollout robustness, we use a residual form.
Let I^iRd×(d(1+Zi)+p+1)\hat{I}_i\in\mathbb{R}^{d\times(d(1+|Z_i|)+p+1)} be a selection matrix extracting the local state block:
\begin{equation}
\hat{I}i = [I_d,, 0{d\times(d|Z_i|+p+1)}].
\label{eq:Ihat_clean}
\end{equation}
Then the one-step predictor is written as
\begin{equation}
X_{i,\text{out}} = \hat{I}i X{i,\text{in}} + \mathcal{N}i(X{i,\text{in}}; \Theta_i),
\label{eq:res_predict_clean}
\end{equation}
where Xi,outX_{i,\text{out}} represents the predicted Δxi(tn+1)\Delta x_i(t_{n+1}).
This structure implements a baseline-plus-correction interpretation:
the shortcut propagates the current deviation state Δxi(tn)\Delta x_i(t_n), while the network learns the correction capturing
unmodeled nonlinearities and inter-stand coupling (via ΔxZi\Delta x_{Z_i}) under varying operating conditions.

To improve robustness when δn\delta_n varies, we introduce an auxiliary branch inside Ni\mathcal{N}_i:
\begin{equation}
\mathcal{N}i(X{i,\text{in}};\Theta_i)\triangleq
\psi_i(X_{i,\text{in}};\Theta_{\psi_i}) + \rho_i(X_{i,\text{in}};\theta_i),
\label{eq:aux_clean}
\end{equation}
where ψi()\psi_i(\cdot) is a lightweight feedforward branch that captures low-frequency/scale effects strongly related to δn\delta_n,
and ρi()\rho_i(\cdot) captures the remaining nonlinear coupling corrections.
When ψi()0\psi_i(\cdot)\equiv 0, the model reduces to a standard residual network.

For the jj-th sample in \eqref{eq:S_i_clean}, define
\begin{equation}
X_{i,\text{in}}^{(j)} =
\big[
\Delta x_i^{(j)}(t_n),\ \Delta x_{Z_i}^{(j)}(t_n),\
\Gamma_{i,n}^{(j)},\ \delta_n^{(j)}
\big]^{\top},
\end{equation}
and the supervised residual target
\begin{equation}
\Delta r_i^{(j)}=\Delta x_i^{(j)}(t_{n+1})-\Delta x_i^{(j)}(t_n).
\label{eq:target_clean}
\end{equation}

To suppress accumulation drift induced by long-horizon recursion and to improve long-term predictive stability,
we train the forward predictor jointly with an auxiliary backward residual model
and impose a multi-step reciprocal-consistency regularization over a KK-step segment from Si(K)S_i^{(K)}.

Construct a backward residual network
\begin{equation}
\mathcal{B}i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d},
\end{equation}
parameterized by Θˉi\bar{\Theta}_i. For the backward step associated with interval [tn,tn+1][t_n,t_{n+1}], define
\begin{equation}
\begin{aligned}
X
{i,\mathrm{in}}^{b}
&=
\big[
\Delta x_i(t_{n+1}),\ \Delta x_{Z_i}(t_{n+1}),
\Gamma_{i,n},\ \delta_n
\big]^{\top},\
X_{i,\mathrm{out}}^{b}
&=
\hat{I}i X{i,\mathrm{in}}^{b} + \mathcal{B}i(X{i,\mathrm{in}}^{b};\bar{\Theta}i),
\end{aligned}
\label{eq:back_clean}
\end{equation}
where Xi,outbX_{i,\mathrm{out}}^{b} represents the backward estimate of Δxi(tn)\Delta x_i(t_n).
The supervised backward residual target is
\begin{equation}
\Delta r_i^{b}=\Delta x_i(t_n)-\Delta x_i(t
{n+1}).
\end{equation}

Given a segment sample Wi,nSi(K)\mathcal{W}_{i,n}\in S_i^{(K)}, initialize
\begin{equation}
\Delta \hat{x}i(t_n)=\Delta x_i(t_n),
\end{equation}
and recursively apply the forward predictor for KK steps:
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1})
&=
\Delta \hat{x}i(t{n+s})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s}),,\Delta \hat{x}
{Z_i}(t
{n+s}),,
\Gamma_{i,n+s},,\delta_{n+s};,\Theta_i
\Big),\
&\qquad s=0,\ldots,K-1.
\end{aligned}
\label{eq:fwd_roll_clean}
\end{equation}

Set the terminal condition
\begin{equation}
\Delta \bar{x}i(t{n+K})=\Delta \hat{x}i(t{n+K}),
\end{equation}
and roll back using Bi\mathcal{B}_i:
\begin{equation}
\begin{aligned}
\Delta \bar{x}i(t{n+s})
&=
\hat{I}i X{i,\mathrm{in}}^{b}(t_{n+s})
+
\mathcal{B}i!\Big(X{i,\mathrm{in}}^{b}(t_{n+s});,\bar{\Theta}i\Big),
\quad s=K-1,\ldots,0,
\end{aligned}
\label{eq:bwd_roll_clean}
\end{equation}
where
\begin{equation}
X
{i,\mathrm{in}}^{b}(t_{n+s})=
\big[
\Delta \bar{x}i(t{n+s+1}),\ \Delta \hat{x}{Z_i}(t{n+s+1}),
\Gamma_{i,n+s},\ \delta_{n+s}
\big]^{\top}.
\end{equation}

Define the multi-step reciprocal prediction error
\begin{equation}
E_i(t_n)

\sum_{s=0}^{K}
\left|
\Delta \hat{x}i(t{n+s})-\Delta \bar{x}i(t{n+s})
\right|^2.
\end{equation}

We jointly minimize:
\begin{equation}
\begin{aligned}
L_{\mathrm{1step}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s+1})-\Delta x_i^{(j)}(t_{n+s})\big)
-\mathcal{N}i!\left(
X
{i,\mathrm{in}}^{(j)}(t_{n+s});\Theta_i
\right)
\Big|^2,\[2mm]
L_{\mathrm{bwd}}(\bar{\Theta}i)
&= \frac{1}{J_K}\sum
{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s})-\Delta x_i^{(j)}(t_{n+s+1})\big)
-\mathcal{B}i!\left(
X
{i,\mathrm{in}}^{b,(j)}(t_{n+s});\bar{\Theta}i
\right)
\Big|^2,\[2mm]
L
{\mathrm{msrp}}(\Theta_i,\bar{\Theta}i)
&= \frac{1}{J_K}\sum
{j=1}^{J_K} E_i^{(j)}(t_n),\[2mm]
L_{\mathrm{roll}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\sum_{s=1}^{K}
\Big|
\Delta x_i^{(j)}(t_{n+s})-\Delta \hat{x}i^{(j)}(t{n+s})
\Big|^2.
\end{aligned}
\label{eq:loss_clean}
\end{equation}
Here, L1stepL_{\mathrm{1step}} enforces one-step accuracy; LrollL_{\mathrm{roll}} explicitly suppresses long-horizon drift under recursion;
LmsrpL_{\mathrm{msrp}} regularizes the learned dynamics by enforcing reciprocal consistency between forward and backward rollouts;
and LbwdL_{\mathrm{bwd}} trains the backward model for the consistency regularization.
In implementation, these terms are combined as
\begin{equation}
L_{\mathrm{total}}=\lambda_1 L_{\mathrm{1step}}+\lambda_2 L_{\mathrm{roll}}+\lambda_3 L_{\mathrm{msrp}}+\lambda_4 L_{\mathrm{bwd}},
\end{equation}
where λ1,λ2,λ3,λ4>0\lambda_1,\lambda_2,\lambda_3,\lambda_4>0 are tuned on a validation set.

After training, the forward predictor is
\begin{equation}
\Delta \hat{x}i(t{n+1})

\Delta x_i(t_n)
+
\mathcal{N}i!\Big(
\Delta x_i(t_n),,\Delta x
{Z_i}(t_n),,
\Gamma_{i,n},,\delta_n;,\Theta_i^*
\Big),
\label{eq:pred_clean}
\end{equation}
and multi-step prediction is obtained by recursive rollout of \eqref{eq:pred_clean}.
This learned predictor is the internal model used by the MPC optimizer in the next section.

Finally, network parameters are optimized using Adam:
\begin{equation}
\Theta_{i,t+1} = \Theta_{i,t} - \alpha \frac{\hat{m}{i,t}}{\sqrt{\hat{v}{i,t}} + \varepsilon},
\end{equation}
where α\alpha is the learning rate (we use α\alpha to avoid conflict with other symbols),
m^i,t\hat{m}_{i,t} and v^i,t\hat{v}_{i,t} are bias-corrected moment estimates, and ε>0\varepsilon>0 is a small constant for numerical stability.
Figure~\ref{fig:rnn_logic} illustrates the overall structure.

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.85]{picture/x6.pdf}
\caption{Logic diagram of the residual neural network.}
\label{fig:rnn_logic}
\end{figure}这是我目前的第三部分。老师说在写一点,就是把网络结构和机理模型对比一下,在机理模型的角度能匹配上,能更好的说明可解释性的问题。可以仿照我发你的这篇文章写

思考

Adding to Section 3 with mechanism interpretation

To expand Section 3, I’ll compare the neural network with the mechanistic model for interpretability. Referencing the aero-engine paper, I’ll include the evolution operator, ResNet as a time integrator, and the residual network resembling mechanistic dynamics. The auxiliary branch will align with gResNet, ensuring stability with MSRP.

Adding mechanism-consistent sections to the paper

I'll create two subsections: one for "Mechanism-consistent interpretation and explainability" and another linking to the evolution operator of the mechanistic model. I’ll include equations to show the relation between continuous dynamics and the ResNet structure. The notation will align with earlier mappings, ensuring precision with inputs and increments.

Linking evolution operator and network proxy

We can explain that since the input within each interval is parameterized by Γ and δ, the evolution operator depends on these values. The network uses them as proxies. We can also draw parallels from the aero-engine paper and define the increment φ_Δ. Text will reference key concepts and include citations.

Guidance for adding mechanism-consistent interpretation

You can insert the following into Section 3, possibly after eq:res_predict_clean or eq:aux_clean. It provides a LaTeX code snippet for the explanation of the mechanism-consistent interpretation and its explainability. The network's output is physically meaningful, representing increments in thickness and tensions. ResNet functions as a time integrator similar to Euler/Runge-Kutta. However, be sure to adjust formatting based on your existing document.

Crafting mechanism-consistent interpretation in LaTeX

The snippet should describe how the network approximates increments akin to the evolution operator, using integral notation. It links residual networks to proposed dynamics and auxiliary operators. The interpretation connects physically meaningful outputs (increments) ensuring consistency and invertibility. We can cite portions from the provided PDF for justification.

Thought for 2m 31s

回答

可以。你这第三部分已经把“残差网络 + 前向/后向 + MSRP”写得很完整了;老师要你补的其实是再加一小节,把你这个网络结构从“机理模型(连续时间动力学)→演化算子→残差网络时间积分器”的角度串起来,让读者觉得它不是“黑盒拟合”,而是在结构上能对上机理

下面我给你一段可直接粘贴进第 3 节的新小节(建议放在你式 \eqref{eq:res_predict_clean} 和 \eqref{eq:aux_clean} 后面,作为一个新 \subsection 或 \paragraph)。写法是仿照你发的那篇 aero-engine 文章里“ResNet ≈ 演化算子/时间积分器、残差≈增量、gResNet 用辅助算子处理较大时间步长、MSRP 用前后向一致性稳定长预测”的逻辑来写的:ResNet 作为时间积分器的解释、残差对应增量/积分项、gResNet 的辅助算子思想 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf),以及 MSRP 前后向回环误差的定义 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)。


latex
%======================== \subsection{Mechanism-consistent interpretation and explainability of the residual network} \label{subsec:mech_interp} \paragraph{Mechanistic viewpoint: coupled continuous-time dynamics and evolution operator.} Although the five-stand cold rolling mill involves complex rolling deformation and tension-transport coupling, its stand-wise deviation dynamics can be abstractly described by a coupled nonlinear ODE: \begin{equation} \frac{d}{dt}\Delta x_i(t) = f_i\!\Big(\Delta x_i(t),\,\Delta x_{Z_i}(t),\,u_i(t),\,d_i(t)\Big), \qquad i=1,\ldots,5, \label{eq:mech_ode} \end{equation} where $f_i(\cdot)$ summarizes the mechanistic effects (plastic deformation, strip elasticity, friction, transport, etc.). For a given sampling interval length $\delta_n$ and a within-interval input trajectory $u_i(t_n+\tau)$, the state transition over one interval can be written through an evolution operator: \begin{equation} \Delta x_i(t_{n+1})=\Phi_{i,\delta_n}\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,u_i([t_n,t_{n+1}]),\,d_i([t_n,t_{n+1}])\Big). \label{eq:evolution_operator_mill} \end{equation} By the fundamental theorem of calculus, \eqref{eq:mech_ode} implies the increment form \begin{equation} \Delta x_i(t_{n+1}) = \Delta x_i(t_n) + \underbrace{\int_{0}^{\delta_n} f_i\!\Big(\Delta x_i(t_n+\tau),\,\Delta x_{Z_i}(t_n+\tau),\,u_i(t_n+\tau),\,d_i(t_n+\tau)\Big)\,d\tau}_{\triangleq\ \varphi_{i,n}}, \label{eq:increment_integral} \end{equation} where $\varphi_{i,n}$ is the one-interval state increment generated by the mechanistic dynamics. \paragraph{Why the residual structure is interpretable (network output $\approx$ mechanistic increment).} Our learned model \eqref{eq:learned_dyn_clean} adopts the same increment form as \eqref{eq:increment_integral}: \begin{equation} \Delta x_i(t_{n+1}) \approx \Delta x_i(t_n) + \mathcal{N}_i\!\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,\Gamma_{i,n},\,\delta_n;\Theta_i\Big). \end{equation} Here, $\mathcal{N}_i(\cdot)$ plays the role of a data-driven approximation of the integral increment $\varphi_{i,n}$, i.e., it approximates the accumulated effect of the mechanistic dynamics over $[t_n,t_{n+1}]$. This is consistent with the well-known interpretation that a residual network behaves like a one-step time integrator: the identity (shortcut) path propagates the current state, while the residual branch represents the increment over the time lag. \paragraph{Control-trajectory parameterization matches the integral dependence.} Inside each interval, we do not optimize point-wise $u_i(t)$ but parameterize the increment trajectory by $\Delta u_{i,n}(\tau;\Gamma_{i,n})$. Hence, the mechanistic increment $\varphi_{i,n}$ in \eqref{eq:increment_integral} depends on the \emph{whole} within-interval trajectory. Feeding $(\Gamma_{i,n},\delta_n)$ into $\mathcal{N}_i$ is therefore a compact way to represent how different candidate gap/speed trajectories change the integral effect and thus the next thickness--tension state. \paragraph{Auxiliary branch as a learnable ``coarse physics'' predictor.} When $\delta_n$ varies and is not very small, directly learning $\varphi_{i,n}$ may be harder. Motivated by the generalized residual idea (output = auxiliary prediction + residual correction), we decompose the increment predictor into two parts in \eqref{eq:aux_clean}: \begin{equation} \mathcal{N}_i(\cdot)=\psi_i(\cdot)+\rho_i(\cdot). \end{equation} Conceptually, $\psi_i(\cdot)$ captures low-frequency / scale effects strongly related to $\delta_n$ (e.g., coarse drift dominated by interval length or operating regime), while $\rho_i(\cdot)$ captures the remaining nonlinear coupling corrections. This provides a mechanism-consistent interpretation: a baseline increment (coarse integrator) plus a residual correction that compensates unmodeled nonlinearities and inter-stand coupling. \paragraph{Backward model and reciprocal consistency as an approximate invertibility constraint.} In mechanistic dynamics, evolving forward by $\delta_n$ and then evolving backward by the same lag should approximately return to the initial state (up to disturbances and numerical errors). Therefore, we introduce a backward residual model and a multi-step reciprocal-consistency loss. This can be interpreted as enforcing a data-driven approximate inverse of the learned evolution operator, which suppresses long-horizon drift and improves rollout stability. \begin{remark} The above interpretation links the proposed residual network to the mechanistic state transition: the shortcut corresponds to state propagation, and the learned residual corresponds to the integrated effect of nonlinear rolling and tension coupling over one sampling interval. Hence the network is not an arbitrary black box but a structured approximation of the mill evolution operator. \end{remark}

你需要做的两点小改动/提示

  1. 你文里已经有 Δui,n(τ;Γi,n)\Delta u_{i,n}(\tau;\Gamma_{i,n}) 的定义,所以在上面“Control-trajectory parameterization”那段不会冲突;如果你愿意更严谨,可在 \eqref{eq:increment_integral} 里把 ui(tn+τ)u_i(t_n+\tau) 写成 ui(tn)+Δui,n(τ;Γi,n)u_i(t_n)+\Delta u_{i,n}(\tau;\Gamma_{i,n})(可选)。
  2. aero-engine 那篇文章里对应的“演化算子—残差网络—增量—gResNet—MSRP”逻辑来源分别在:演化算子定义 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)、ResNet 输出与残差 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)、增量 φΔ\varphi_\Delta 与 gResNet 辅助算子 η\eta (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)、MSRP 前后向回环误差 (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)。你可以在论文里按需要加一两句 “inspired by …” 并配上对应参考文献(不一定要引用整篇,只要引用 ResNet-as-integrator / gResNet / MSRP 的那几篇也行)。

如果你把你论文参考文献里准备怎么引用那篇 aero-engine(或你想改成引用它的[27][32]那条链),我也可以帮你把这段里该插的 \cite{...} 一并补到位,保证格式统一、审稿读起来更顺。

问题

用中文写一遍

思考

Preparing Chinese version of mechanism-consistent interpretation

I need to create the Chinese version of a subsection similar to the one in the paper. It will include evolution operator definitions, ResNet as a time integrator, gResNet with auxiliary operators, and MSRP operators. I'll ensure the formulas label properly in LaTeX, using Chinese text and citing supporting sources. I also plan to relate this to cold rolling mechanistic models.

Crafting the final snippet with citations

I need to mention the evolution operator as a forward time integrator and connect it to the residual network. For gResNet, I'll explain the auxiliary operator. Then, I'll discuss MSRP, including forward and backward prediction. I'll weave in citations properly, ensuring they’re not in code fences and match the exact format. Since it's LaTeX, I'll include the code block separately from the citations to follow the rules.

Planning the LaTeX snippet with citations

I can’t put citations inside a LaTeX code block, so I should explain the Chinese text along with the citations outside the code block. The LaTeX snippet itself will remain without citations. I should provide a short explanation for the user on where to insert the snippet, and cite the paper afterward. This approach keeps everything in line with guidelines.

Thought for 45s

回答

下面给你一段中文 + 可直接粘贴的 LaTeX 小节,用“机理模型 → 演化算子(时间积分器)→ 残差网络增量”的逻辑,把你第 3 部分的网络结构和机理模型对上,从而更好解释“可解释性”。写法思路参考你给的那篇文章:先给“演化算子定义”,再说明“ResNet 可看作时间积分器/学习增量”,再说明“广义残差(辅助算子)对应你这里的辅助分支”,最后用“前后向 reciprocal/MSRP”解释长时预测稳定性。对应依据在该文对演化算子、ResNet 增量形式、gResNet 辅助算子、MSRP 的描述里都有明确表述。(State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf) (State_Baseline_Modeling_and_Fault_Detection_for_Aero-Engines_With_Unknown_Performance_Degradation_Based_on_Deep_Neural_Networks.pdf)

latex
%======================== \subsection{残差网络的机理一致性解释与可解释性} \label{subsec:mech_interp_cn} %======================== \paragraph{机理视角:五机架耦合动力学与“演化算子”。} 尽管五机架冷轧过程包含复杂的变形、摩擦与张力传递机理,但从系统论角度, 其机架 $i$ 的偏差状态可抽象为一个耦合非线性动力学方程: \begin{equation} \frac{d}{dt}\Delta x_i(t)= f_i\!\Big(\Delta x_i(t),\,\Delta x_{Z_i}(t),\,u_i(t),\,d_i(t)\Big),\qquad i=1,\ldots,5, \label{eq:mech_ode_mill} \end{equation} 其中 $f_i(\cdot)$ 汇总了轧制变形、材料弹性、运输与张力传播等机理效应。 对给定采样间隔长度 $\delta_n$ 与区间内输入轨迹 $u_i(t_n+\tau)$系统从 $t_n$$t_{n+1}$ 的状态映射可用“演化算子/状态转移算子”表示为 \begin{equation} \Delta x_i(t_{n+1})= \Phi_{i,\delta_n}\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,u_i([t_n,t_{n+1}]),\,d_i([t_n,t_{n+1}])\Big). \label{eq:evolution_operator_mill} \end{equation} 从微分方程的基本积分关系出发,可得一步增量形式 \begin{equation} \Delta x_i(t_{n+1}) = \Delta x_i(t_n) + \underbrace{\int_{0}^{\delta_n} f_i\!\Big(\Delta x_i(t_n+\tau),\,\Delta x_{Z_i}(t_n+\tau),\,u_i(t_n+\tau),\,d_i(t_n+\tau)\Big)\,d\tau}_{\triangleq\ \varphi_{i,n}}, \label{eq:increment_integral_mill} \end{equation} 其中 $\varphi_{i,n}$ 表示在一个采样区间内由机理动力学累积得到的状态增量。 \paragraph{残差结构的可解释性:网络输出近似“机理增量”。} 本文的学习模型采用 \begin{equation} \Delta x_i(t_{n+1}) \approx \Delta x_i(t_n) + \mathcal{N}_i\!\Big(\Delta x_i(t_n),\,\Delta x_{Z_i}(t_n),\,\Gamma_{i,n},\,\delta_n;\Theta_i\Big), \end{equation} 其结构与 \eqref{eq:increment_integral_mill} 完全同形:快捷支路传递当前状态, 而网络分支输出对应“一步增量” $\varphi_{i,n}$ 的数据驱动近似。 因此该残差网络并非任意黑盒映射,而是在结构上逼近机理系统的区间演化算子: 学习的是“从 $t_n$$t_{n+1}$ 的累积效应”,而不是直接拟合状态本身。 \paragraph{控制轨迹参数化与机理积分依赖的一致性。} 机理增量 $\varphi_{i,n}$ 本质依赖于区间内的整个输入轨迹 $u_i(t_n+\tau)$本文将区间内增量轨迹参数化为 $\Delta u_{i,n}(\tau;\Gamma_{i,n})$并把 $(\Gamma_{i,n},\delta_n)$ 作为网络输入的一部分, 等价于用低维参数对“输入轨迹对积分增量的影响”进行编码, 从而支持 MPC 在优化不同候选轨迹时进行“反事实预测”(不同 $\Gamma$ 对应不同演化结果)。 \paragraph{辅助分支的机理含义:粗尺度增量 + 残差修正。} 当采样间隔 $\delta_n$ 变化或不够小,直接学习 $\varphi_{i,n}$ 的难度增大。 因此本文采用 \begin{equation} \mathcal{N}_i(\cdot)=\psi_i(\cdot)+\rho_i(\cdot), \end{equation} 可解释为:$\psi_i(\cdot)$ 学习与 $\delta_n$ 强相关的“粗尺度/低频”增量(类似一个可学习的基准积分器), $\rho_i(\cdot)$ 再对剩余的耦合非线性与未建模效应做残差补偿。 该分解使得网络内部具有“基准预测 + 误差修正”的结构化意义,有助于解释与泛化。 \paragraph{前后向模型与互逆一致性:抑制长时递推漂移。} 在理想机理动力学下,若忽略扰动与数值误差,前向推进再按同一时间尺度回推应近似回到初值。 因此本文引入后向残差模型并施加多步“前后向一致性/互逆一致性”约束, 可解释为:在数据驱动框架下对演化算子施加近似可逆(或近似互逆)正则, 从而抑制长时递推误差累积并提升多步预测稳定性。 \begin{remark} 综上,本文残差网络的快捷支路与“状态传递”一致,网络分支对应“区间内机理效应的累积增量”, 辅助分支对应“粗尺度积分效应”,前后向一致性约束对应“演化算子互逆结构”的稳定性要求。 因此,该网络结构在机理层面具有明确的对应关系与可解释性。 \end{remark}

如果你希望“对比更像老师想要的那种”,我还能再给你补一段对照段落
把“传统机理模型离散化(如欧拉/RK4)的一步形式”写成 Δxn+1=Δxn+δnf()\Delta x_{n+1}=\Delta x_n + \delta_n f(\cdot)Δxn+1=Δxn+f()\Delta x_{n+1}=\Delta x_n + \int f(\cdot),然后一句话点明:你这个 ResNet 残差块就相当于把积分项/增量项用网络替代(这会更像“机理可匹配”的对比)。

问题

我现在第三部分分成两部分,一部分是我之前写的那部分,另一部分是你新给我的这部分,帮我给我之前写的部分起一个标题名

思考

Thought for 18s

回答

可以。你“之前写的那部分”主要是在讲:网络要学什么(一步演化/残差)、输入输出维度、残差结构与辅助分支、前向/后向网络、MSRP+rollout 等训练目标。所以标题建议突出“结构设计 + 训练方法”。

给你几个可选标题(按你论文风格从简到更学术):

  1. 残差神经网络结构设计与训练方法(最通用、最稳)
  2. 面向五机架厚度–张力预测的残差网络建模与训练(强调应用对象)
  3. 控制依赖的一步残差预测器构建与多步互逆一致性训练(把 MSRP 亮出来)
  4. 前向–后向残差网络与多步递推稳定训练策略(突出 forward/backward + 稳定性)
  5. 基于残差学习的离散区间演化建模:结构、目标与训练(更“论文味”)

如果你后面新增那部分标题叫“机理一致性解释与可解释性”,那我最推荐搭配用:
“残差神经网络结构设计与训练方法” + “机理一致性解释与可解释性”

问题

1-s2.0-S0959152420303371-main.pdf

机理模型是这篇文章中的机理模型,重新写一下我刚才让你写的可解释性的问题

思考

Explaining consistency and interpretability in model mechanisms

In this section, we need to highlight how Hu’s state vector differs from ours, showing the relation between their approach using δTin,i, δSi, δVi−1, and our focus on thickness deviation and adjacent tensions. I'll generate a subsection in Chinese and LaTeX that references the user's notation and includes citations from the uploaded paper.

Describing mechanism model dynamics and discretization for MPC

The mechanistic model summarizes how thickness increment (Model 3) shows δhout’s dependence on roll gaps, entry thickness, and tensions. Tension dynamics and actuator models indicate dependencies on neighboring tensions and speeds. State-space equations (18) integrate coupling, and for MPC, we discretize this space, deriving a process for discrete evolution. The residual network matches this by outputting changes over intervals.

Explaining the network, mapping, and model integration

The network learns the integral effect implied by the mechanistic model, representing the combined impact of rolling force, strip tension, and actuator response. The model maps inputs like local and neighbor states, input trajectory parameters, and rolling gap dynamics. The auxiliary branch models time-scale effects, and backward evolution approximates inverse transition for robustness.

Providing citations and integrating Hu & Sun 2021

We need citations for key equations such as the thickness (eq. 3), tension (eq. 15), and actuator equations (eq. 16-17). We can reference files for the relevant equations. Also, we'll integrate Hu & Sun (2021) by adding a non-heavy explanation for discretizing ẋ=Ax+Bu+Wd, keeping consistent with their notation and adjustments for incremental states.

Thought for 2m 23s

回答

下面给你把“可解释性/机理一致性”这一节按 Hu & Sun 这篇五机架冷轧机理模型来重写(你可以直接作为第三部分的第二小节/后一小节粘贴)。核心思路是:** 你的残差网络学的就是机理模型离散化后的一步“增量/演化算子”;而且你的输入(邻居状态 + Γ,δ\Gamma,\delta)在机理模型里分别对应耦合项(相邻张力等)执行机构/控制量(辊缝、速度)**。

机理模型依据:该文给出了出口厚度增量与辊缝/张力等的关系式(3) (1-s2.0-S0959152420303371-main.pdf)、张力动力学(15) (1-s2.0-S0959152420303371-main.pdf)、HGC/ASR 一阶执行机构模型(16)(17) (1-s2.0-S0959152420303371-main.pdf),以及子系统状态空间模型(18)与状态/输入/扰动的组成 (1-s2.0-S0959152420303371-main.pdf)。


latex
%======================== \subsection{与机理模型的一致性解释与可解释性(基于五机架冷轧机理模型)} \label{subsec:mech_interp_mill} %======================== \paragraph{机理模型回顾:厚度--张力耦合与执行机构动力学。} 针对五机架冷轧过程,文献中基于轧制基本理论建立了厚度与张力的增量模型与子系统状态空间模型。 其中,机架出口厚度增量可写为 \begin{equation} \delta h_{\mathrm{out},i} = e_{1i}\delta S_i + e_{2i}\delta h_{\mathrm{in},i} + e_{3i}\delta T_{\mathrm{in},i} + e_{4i}\delta T_{\mathrm{out},i}, \label{eq:mech_thickness_inc} \end{equation} 表明出口厚度在局部上受辊缝变化与入口厚度、入口/出口张力等共同影响(厚度--张力强耦合)。 张力动力学由胡克定律、质量流守恒与滑移关系推导后,可整理为相邻机架耦合的微分方程形式, 其核心特点是:$\delta T$ 的变化不仅与本机架量有关,还显式包含相邻机架张力、速度与辊缝等耦合项。 例如文献中给出 \begin{equation} \frac{d(\delta T_{\mathrm{in},i})}{dt} = \xi_1 \delta T_{\mathrm{in},i-1} + \xi_2 \delta T_{\mathrm{in},i} + \xi_3 \delta T_{\mathrm{in},i+1} + \cdots, \label{eq:mech_tension_dyn} \end{equation} 体现了典型的“相邻耦合链”结构。 此外,辊缝与速度并非理想瞬时执行,文献将 HGC(辊缝)与 ASR(速度)建模为一阶环节: \begin{equation} \dot S_i = \frac{U_{S_i}}{\tau_s} - \frac{S_i}{\tau_s},\qquad \dot V_i = \frac{U_{V_i}}{\tau_V} - \frac{V_i}{\tau_V}, \label{eq:mech_actuator_1st} \end{equation} 即控制指令(参考)$U_{S_i},U_{V_i}$ 经过时间常数 $\tau_s,\tau_V$ 的动态后才体现为实际辊缝/速度。 基于上述关系,文献把子系统 $i$ 写成连续时间状态空间模型 \begin{equation} \dot x_i(t)=A_i x_i(t)+B_i u_i(t)+W_{i1} d_i(t),\qquad y_i(t)=C_i x_i(t)+D_i u_i(t)+W_{i2} d_i(t), \label{eq:mech_ss} \end{equation} 并明确给出状态、输入与扰动向量的组成, 其中扰动项 $d_i(t)$ 中包含相邻机架张力、相邻辊缝、相邻速度及入口厚度等耦合量, 从结构上说明了“邻居变量进入本机架动力学”的机理来源。 \paragraph{残差网络的机理一致性:学习离散化后的“一步增量/演化算子”。} 将机理模型 \eqref{eq:mech_ss} 在采样时刻 $t_n$ 离散化(或等价地写成积分形式),可得到一步映射 \begin{equation} x_i(t_{n+1}) = x_i(t_n) + \int_{0}^{\delta_n} \Big(A_i x_i(t_n+\tau)+B_i u_i(t_n+\tau)+W_{i1} d_i(t_n+\tau)\Big)\,d\tau. \label{eq:mech_integral} \end{equation} 该式揭示:从 $t_n$$t_{n+1}$ 的变化量本质上是“机理动力学在一个区间内的累积效应”。 因此,把学习模型写成 \begin{equation} \Delta x_i(t_{n+1}) \approx \Delta x_i(t_n) +\mathcal N_i\!\big(\Delta x_i(t_n),\Delta x_{Z_i}(t_n),\Gamma_{i,n},\delta_n;\Theta_i\big) \end{equation} 在结构上与 \eqref{eq:mech_integral} 同形: 快捷支路对应“当前状态传递”,而网络输出对应“一步增量”(即积分项/离散化增量)的数据驱动近似。 这使得残差网络具有明确的物理含义:它并非直接黑盒拟合状态,而是在逼近 \emph{厚度--张力耦合 + 执行机构动态 + 邻居耦合扰动} 共同作用下的区间累积增量。 \paragraph{为什么输入必须包含邻居状态:对应机理模型中的耦合扰动项。} 在机理模型中,张力动力学显式包含相邻张力项(例如 \eqref{eq:mech_tension_dyn} 中的 $\delta T_{\mathrm{in},i\pm 1}$), 且状态空间模型的扰动向量 $d_i(t)$ 亦包含相邻机架的张力、辊缝与速度等量。 因此,你在网络输入中引入 $\Delta x_{Z_i}(t_n)$ 的做法, 等价于在数据驱动离散映射中“显式暴露”机理耦合通道, 使网络能够学习到来自相邻机架的张力传播与运输耦合效应,而不是把它们隐含地当作噪声。 \paragraph{为什么输入要包含 $(\Gamma_{i,n},\delta_n)$:对应执行机构与区间内输入轨迹的机理依赖。} 机理模型指出辊缝/速度通过一阶环节响应控制指令 \eqref{eq:mech_actuator_1st}因此同样的采样起点在不同的区间长度 $\delta_n$ 下会产生不同的“有效输入累积效应”。 你的多项式参数 $\Gamma_{i,n}$ 给出了区间内增量轨迹的低维表示, \eqref{eq:mech_integral} 中“区间内输入 $u_i(t_n+\tau)$ 的积分累积”相匹配; 而显式输入 $\delta_n$ 则让网络能够学习执行机构时间常数与采样周期共同决定的尺度效应。 \paragraph{辅助分支与前后向一致性的物理解释。} (1)$\psi_i(\cdot)+\rho_i(\cdot)$ 的分解可解释为: $\psi_i$ 捕捉与 $\delta_n$、执行机构动态强相关的“粗尺度增量”, $\rho_i$ 再学习剩余的耦合非线性修正(如厚度--张力非线性、参数漂移等)。 (2)引入后向网络并施加多步互逆一致性(MSRP)可理解为对离散演化算子施加“近似可逆/一致性”结构约束, 以抑制递推预测的漂移累积,提升长时域滚动预测的稳定性。 \begin{remark} 综上,你的网络结构与机理模型在三个层面可对齐: (i) 残差形式对应机理离散化后的区间增量; (ii) 邻居状态输入对应机理耦合通道(张力传播与扰动向量中的相邻量); (iii) $(\Gamma,\delta)$ 对应执行机构动态与区间内输入累积效应。 因此该模型具备“结构可解释性”,而非纯黑盒回归。 \end{remark}

你接下来要做的两点“对齐”

  1. 你文中用的是 Δxi=[Δhi,ΔTi1,ΔTi]\Delta x_i=[\Delta h_i,\Delta T_{i-1},\Delta T_i]^\top。上面机理模型用了 δhout,i\delta h_{\text{out},i}δTin,i\delta T_{\text{in},i} 这类符号。你可以在正文加一句符号对应(不必改全篇):
    δhout,iΔhi\delta h_{\text{out},i}\leftrightarrow \Delta h_iδTin,i\delta T_{\text{in},i} 与你定义的相邻张力偏差 ΔTi1\Delta T_{i-1}ΔTi\Delta T_i 对应即可。引用机理来源时就用文献符号。

  2. 上面这段没有强行把文献的 xi=[δTin,i,δSi,δVi1]x_i=[\delta T_{\text{in},i},\delta S_i,\delta V_{i-1}]^\top 搬到你文里,只是用它说明:耦合来自哪里、执行机构动态是什么、离散增量为什么合理。这样最省改动,也最容易让老师认可“能从机理上对上”。

如果你把你论文里“机理模型那一节”准备怎么写(是引用 Hu&Sun 这篇,还是你自己写的五机架模型),我还可以把上面这段里的 \eqrefeq:mechthicknessinc\eqrefeq:mechss\eqref{eq:mech_thickness_inc}\sim\eqref{eq:mech_ss} 改成直接引用你论文内部的机理公式编号,做到全文编号完全一致。

问题

%========================
\section{Construction of Dataset}
%========================

Consider a five-stand tandem cold rolling mill indexed by i{1,2,3,4,5}i\in\{1,2,3,4,5\}.
Let tnt_n denote the nn-th sampling instant and δn=tn+1tn\delta_n=t_{n+1}-t_n the corresponding sampling interval length.
Define the local within-interval time variable τ=ttn[0,δn]\tau=t-t_n\in[0,\delta_n].

Let hi(t)h_i(t) denote the exit thickness of stand ii (i=1,,5i=1,\dots,5),
and let Ti(t)T_i(t) denote the inter-stand strip tension between stand ii and i+1i+1 (i=1,,4i=1,\dots,4).
The system is strongly coupled because the inter-stand tensions propagate along the mill line and are affected by neighboring stands' actions.

Let hiref(t)h_i^{\mathrm{ref}}(t) and Tiref(t)T_i^{\mathrm{ref}}(t) be the desired references (setpoints) given by process requirements. We define deviation variables
\begin{equation}
\Delta h_i(t)\triangleq h_i(t)-h_i^{\mathrm{ref}}(t),\qquad
\Delta T_i(t)\triangleq T_i(t)-T_i^{\mathrm{ref}}(t).
\label{eq:dev_def}
\end{equation}
Throughout this paper, the symbol ``Δ\Delta'' attached to states always means deviation from reference.

For each stand ii, we choose the local deviation state as
\begin{equation}
\Delta x_i(t)\triangleq
\begin{bmatrix}
\Delta h_i(t)\
\Delta T_{i-1}(t)\
\Delta T_i(t)
\end{bmatrix}\in\mathbb{R}^{d},\qquad d=3,
\label{eq:xi_def_clean}
\end{equation}
with the boundary convention ΔT0(t)0\Delta T_0(t)\equiv 0 and ΔT5(t)0\Delta T_5(t)\equiv 0 to keep a unified dimension d=3d=3 for all stands.

For a five-stand tandem mill, the dominant coupling is between adjacent stands, hence we define
\begin{equation}
Z_1={2},\quad
Z_i={i-1,i+1}\ (i=2,3,4),\quad
Z_5={4}.
\label{eq:Zi_clean}
\end{equation}
Define the neighbor-state stack
\begin{equation}
\Delta x_{Z_i}(t_n)=\mathrm{col}{\Delta x_k(t_n),|,k\in Z_i}.
\label{eq:xZi_clean}
\end{equation}

Each stand ii is manipulated by roll gap si(t)s_i(t) and stand speed vi(t)v_i(t):
\begin{equation}
u_i(t)=
\begin{bmatrix}
s_i(t)\
v_i(t)
\end{bmatrix}\in\mathbb{R}^{n_u},\qquad n_u=2.
\label{eq:ui_clean}
\end{equation}
To ensure smooth actuation and match industrial practice, we optimize \emph{discrete input increments}:
\begin{equation}
\Delta u_i(t_n)\triangleq u_i(t_n)-u_i(t_{n-1})

\begin{bmatrix}
\Delta s_i(t_n)\
\Delta v_i(t_n)
\end{bmatrix}.
\label{eq:du_discrete_clean}
\end{equation}
Throughout this paper, the symbol ``Δ\Delta'' attached to inputs Δui(tn)\Delta u_i(t_n) means sample-to-sample increment.
Thus, Δx\Delta x and Δu\Delta u are conceptually different, and this is fixed by definition.

Let di(t)d_i(t) denote exogenous disturbances.
We denote the interval-level equivalent disturbance by Δdi(tn)\Delta d_i(t_n).
IdI_d denotes the d×dd\times d identity matrix; 0a×b0_{a\times b} denotes the a×ba\times b zero matrix.

The stand-wise deviation-state evolution over [tn,tn+1][t_n,t_{n+1}] can be expressed by a discrete-time mapping
\begin{equation}
\Delta x_i(t_{n+1})

\Phi_i\Big(\Delta x_i(t_n),,\Delta x_{Z_i}(t_n),,\Delta u_i([t_n,t_{n+1}]),,\Delta d_i([t_n,t_{n+1}])\Big),
\label{eq:true_mapping_clean}
\end{equation}
where Φi()\Phi_i(\cdot) is generally nonlinear and coupled due to rolling deformation and tension propagation.
A commonly used conceptual equivalent discrete linear form is
\begin{equation}
\Delta x_i(t_{n+1})

M_d,\Delta x_i(t_n)
+
N_d,\Delta u_i(t_n)
+
F_d,\Delta d_i(t_n),
\label{eq:linear_form_concept}
\end{equation}
where Md,Nd,FdM_d,N_d,F_d represent equivalent discrete-time matrices around operating conditions.
In a practical five-stand cold rolling mill, accurately deriving and identifying these matrices and disturbance models from first principles is difficult,
due to strong coupling, unmodeled nonlinearities, and time-varying operating regimes.
Therefore, this paper aims to learn a high-fidelity approximation of the interval evolution from data and then embed it into distributed MPC.

\begin{remark}
In fact, due to the existence of complex coupling relationships, it is difficult to directly and accurately establish \eqref{eq:linear_form_concept}
based on first principles. Therefore, in this paper, we learn an approximate mapping of \eqref{eq:true_mapping_clean} from data.
\end{remark}

Although decisions are updated at discrete instants tnt_n, the hydraulic gap and drive systems evolve continuously inside each interval,
and abrupt within-interval changes may excite tension oscillations and deteriorate thickness stability.
Thus, parameterizing the within-interval increment trajectory by a low-order polynomial:
(i) yields a compact finite-dimensional decision representation;
(ii) enforces smooth profiles inside the interval;
(iii) enables enforcing increment constraints for all τ[0,δn]\tau\in[0,\delta_n].
This is appropriate when δn\delta_n is not excessively large relative to actuator bandwidth and the within-interval evolution is well approximated by a low-order basis.

On the interval [tn,tn+1][t_n,t_{n+1}], parameterize the control increment trajectory as
\begin{equation}
\Delta u_{i,n}(\tau;\Gamma_{i,n})

\Gamma_{i,n0}+\Gamma_{i,n1}\tau+\Gamma_{i,n2}\tau^2,
\qquad \tau\in[0,\delta_n],
\label{eq:du_poly_vec_clean}
\end{equation}
where Γi,n0,Γi,n1,Γi,n2Rnu\Gamma_{i,n0},\Gamma_{i,n1},\Gamma_{i,n2}\in\mathbb{R}^{n_u} are coefficient vectors (nu=2n_u=2).
Component-wise, \eqref{eq:du_poly_vec_clean} corresponds to
\begin{equation}
\begin{aligned}
\Delta s_{i,n}(\tau) &= \gamma^{(s)}{i,n0}+\gamma^{(s)}{i,n1}\tau+\gamma^{(s)}{i,n2}\tau^2,\
\Delta v
{i,n}(\tau) &= \gamma^{(v)}{i,n0}+\gamma^{(v)}{i,n1}\tau+\gamma^{(v)}{i,n2}\tau^2.
\end{aligned}
\label{eq:du_components_clean}
\end{equation}
Define the stacked parameter vector
\begin{equation}
\Gamma
{i,n}\triangleq
\big[
(\Gamma_{i,n0})^\top,,
(\Gamma_{i,n1})^\top,,
(\Gamma_{i,n2})^\top
\big]^\top
\in\mathbb{R}^{p},
\qquad
p=3n_u=6.
\label{eq:Gamma_clean}
\end{equation}
Here, Γi,n0\Gamma_{i,n0} is the baseline increment at τ=0\tau=0, while Γi,n1\Gamma_{i,n1} and Γi,n2\Gamma_{i,n2} describe the linear and quadratic variation rates.

Define the interval-averaged equivalent increments as
\begin{equation}
\begin{aligned}
\Delta u_i(t_n) &\triangleq \frac{1}{\delta_n}\int_0^{\delta_n}\Delta u_{i,n}(\tau),d\tau,\
\Delta d_i(t_n) &\triangleq \frac{1}{\delta_n}\int_0^{\delta_n}\Delta d_i(\tau),d\tau.
\end{aligned}
\label{eq:avg_def_clean}
\end{equation}
With \eqref{eq:du_poly_vec_clean}, the input average has a closed form:
\begin{equation}
\Delta u_i(t_n)=
\Gamma_{i,n0}
+\Gamma_{i,n1}\frac{\delta_n}{2}
+\Gamma_{i,n2}\frac{\delta_n^2}{3}.
\label{eq:avg_closed_clean}
\end{equation}

Let Ix\mathcal{I}_x denote the sampling domain of deviation states Δxi(tn)\Delta x_i(t_n) and neighbor stacks ΔxZi(tn)\Delta x_{Z_i}(t_n),
and let IΓ\mathcal{I}_\Gamma denote the sampling domain of polynomial parameters Γi,n\Gamma_{i,n}.
These domains specify the operating envelope used to generate supervised training data.

Given the above parameterization, one training sample is generated on each interval [tn,tn+1][t_n,t_{n+1}].
In addition to the local deviation state, the neighbor deviation states are included to represent inter-stand coupling.
The process is summarized in Table~\ref{tab:interval_sample_generation_en}.

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.15}
\caption{Procedure for generating one interval-level sample on [tn,tn+1][t_n,t_{n+1}] (five-stand coupled mill).}
\label{tab:interval_sample_generation_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.09\linewidth} X}
\toprule
\textbf{Step} & \textbf{Operation} \
\midrule
1 & \textbf{State sampling:} sample Δxi(tn)\Delta x_i(t_n) and ΔxZi(tn)\Delta x_{Z_i}(t_n) from Ix\mathcal{I}_x. \
2 & \textbf{Parameter sampling:} draw Γi,nIΓ\Gamma_{i,n}\sim\mathcal{I}_\Gamma (coefficients for both Δsi,n(τ)\Delta s_{i,n}(\tau) and Δvi,n(τ)\Delta v_{i,n}(\tau)). \
3 & \textbf{Control construction:} compute Δui,n(τ)\Delta u_{i,n}(\tau) via \eqref{eq:du_poly_vec_clean}. \
4 & \textbf{State propagation:} integrate the \emph{five-stand coupled} mill model on [tn,tn+1][t_n,t_{n+1}] (e.g., RK4) using the within-interval control trajectory, and record Δxi(tn+1)\Delta x_i(t_{n+1}). \
\bottomrule
\end{tabularx}
\end{table}

Accordingly, an interval sample for subsystem ii can be represented as
\begin{equation}
\mathcal{D}{i,n}=\big{\Delta x_i(t_n),\ \Delta x{Z_i}(t_n),\ \Delta u_{i,n}(\tau),\ \Delta x_i(t_{n+1})\big}.
\label{eq:interval_sample_clean}
\end{equation}
Note that Δui,n(τ)\Delta u_{i,n}(\tau) is fully determined by (Γi,n,δn)(\Gamma_{i,n},\delta_n) via \eqref{eq:du_poly_vec_clean},
therefore it is sufficient to store (Γi,n,δn)(\Gamma_{i,n},\delta_n) as the learning input.

For each subsystem ii, by repeating the above procedure across multiple intervals and randomized draws,
the local one-step training dataset is formed as
\begin{equation}
\begin{split}
S_i=\Big{&
\big(\Delta x_i^{(j)}(t_n),,\Delta x_{Z_i}^{(j)}(t_n),,\Delta x_i^{(j)}(t_{n+1});,
\Gamma_{i,n}^{(j)},,\delta_n^{(j)}\big)
\ \Big|\ j=1,\ldots,J
\Big}.
\end{split}
\label{eq:S_i_clean}
\end{equation}
Here JJ is the number of one-step samples for subsystem ii.
The overall dataset for the five-stand mill is denoted by {Si}i=15\{S_i\}_{i=1}^{5}.
The point-cloud visualization of the training dataset is shown in Figure~\ref{2}.

\begin{figure*}[htbp]
\centering
\includegraphics[scale=0.5]{picture/Fig2.pdf}
\caption{Point cloud map of the training dataset.}\label{2}
\end{figure*}

The one-step set SiS_i is sufficient for one-step regression, but it is not sufficient for training with multi-step rollout loss
and reciprocal-consistency regularization, because these objectives require ground-truth deviation-state trajectories over a horizon of KK consecutive intervals.
Therefore, without changing the single-interval sampling mechanism above, we additionally organize the offline-simulated samples
into KK-step trajectory segments.

Specifically, for each starting time tnt_n we generate a segment of length KK by consecutively sampling
{Γi,n+s,δn+s}s=0K1\{\Gamma_{i,n+s},\delta_{n+s}\}_{s=0}^{K-1} (and the corresponding inputs/disturbances),
and integrating the five-stand coupled mill model over [tn+s,tn+s+1][t_{n+s},t_{n+s+1}] for s=0,,K1s=0,\ldots,K-1.
Hence, we obtain the deviation-state sequence {Δxi(tn+s)}s=0K\{\Delta x_i(t_{n+s})\}_{s=0}^{K} as well as the neighbor stacks
{ΔxZi(tn+s)}s=0K\{\Delta x_{Z_i}(t_{n+s})\}_{s=0}^{K}.

Define a KK-step segment sample for subsystem ii as
\begin{equation}
\begin{aligned}
\mathcal{W}{i,n}=
\Big{&
\big(\Delta x_i(t
{n+s}),,\Delta x_{Z_i}(t_{n+s}),,\Gamma_{i,n+s},,\delta_{n+s}\big){s=0}^{K-1}; \
&\big(\Delta x_i(t
{n+s+1})\big){s=0}^{K-1}
\Big}.
\end{aligned}
\label{eq:segment_clean}
\end{equation}
By repeating the above segment generation, we form the multi-step training set
\begin{equation}
S_i^{(K)}=\Big{\mathcal{W}
{i,n}^{(j)}\ \Big|\ j=1,\ldots,J_K\Big},
\label{eq:S_i_K_clean}
\end{equation}
where JKJ_K is the number of KK-step segment samples.
Note that SiS_i can be viewed as the marginal one-step projection of Si(K)S_i^{(K)} (keeping only s=0s=0),
thus the original dataset design is preserved, and only an additional \emph{segment organization} is introduced for multi-step training.

%========================
\section{Construction of Residual Neural Network}
%========================
\subsection{Residual Neural Network Structure Construction and Training Method}
Given the dataset, the neural network model is trained to learn a stand-wise, control-dependent one-step evolution law of deviation states:
\begin{equation}
\Delta x_i(t_{n+1})
\approx
\Delta x_i(t_n)+
\mathcal{N}i!\Big(\Delta x_i(t_n),,\Delta x{Z_i}(t_n),,\Gamma_{i,n},,\delta_n;,\Theta_i\Big),
\label{eq:learned_dyn_clean}
\end{equation}
where Ni()\mathcal{N}_i(\cdot) outputs the one-step deviation-state change and Θi\Theta_i are trainable parameters.

\begin{remark}
If Ni\mathcal{N}_i does not take control information as input (here Γi,n\Gamma_{i,n} and δn\delta_n),
the predictor becomes an autoregressive model that only reproduces trajectories under the training input patterns
and cannot answer the counterfactual question: ``what will happen if we choose a different roll gap and speed trajectory?''
Since MPC optimizes over candidate decisions, a control-dependent predictor \eqref{eq:learned_dyn_clean} is necessary
to evaluate the predicted thickness and tension behavior under different candidate actuator trajectories.
\end{remark}

Let d=3d=3 (state dimension), Zi|Z_i| be the number of neighbors of stand ii in \eqref{eq:Zi_clean}, and p=6p=6 in \eqref{eq:Gamma_clean}.
Define the input vector
\begin{equation}
X_{i,\text{in}} \triangleq
\big[
\Delta x_i(t_n)^\top,,
\Delta x_{Z_i}(t_n)^\top,,
\Gamma_{i,n}^\top,,
\delta_n
\big]^\top
\in \mathbb{R}^{d(1+|Z_i|)+p+1}.
\label{eq:X_in_clean}
\end{equation}
The network mapping is
\begin{equation}
\mathcal{N}_i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d}.
\end{equation}

To improve training stability and long-horizon rollout robustness, we use a residual form.
Let I^iRd×(d(1+Zi)+p+1)\hat{I}_i\in\mathbb{R}^{d\times(d(1+|Z_i|)+p+1)} be a selection matrix extracting the local state block:
\begin{equation}
\hat{I}i = [I_d,, 0{d\times(d|Z_i|+p+1)}].
\label{eq:Ihat_clean}
\end{equation}
Then the one-step predictor is written as
\begin{equation}
X_{i,\text{out}} = \hat{I}i X{i,\text{in}} + \mathcal{N}i(X{i,\text{in}}; \Theta_i),
\label{eq:res_predict_clean}
\end{equation}
where Xi,outX_{i,\text{out}} represents the predicted Δxi(tn+1)\Delta x_i(t_{n+1}).
This structure implements a baseline-plus-correction interpretation:
the shortcut propagates the current deviation state Δxi(tn)\Delta x_i(t_n), while the network learns the correction capturing
unmodeled nonlinearities and inter-stand coupling (via ΔxZi\Delta x_{Z_i}) under varying operating conditions.

To improve robustness when δn\delta_n varies, we introduce an auxiliary branch inside Ni\mathcal{N}_i:
\begin{equation}
\mathcal{N}i(X{i,\text{in}};\Theta_i)\triangleq
\psi_i(X_{i,\text{in}};\Theta_{\psi_i}) + \rho_i(X_{i,\text{in}};\theta_i),
\label{eq:aux_clean}
\end{equation}
where ψi()\psi_i(\cdot) is a lightweight feedforward branch that captures low-frequency/scale effects strongly related to δn\delta_n,
and ρi()\rho_i(\cdot) captures the remaining nonlinear coupling corrections.
When ψi()0\psi_i(\cdot)\equiv 0, the model reduces to a standard residual network.

For the jj-th sample in \eqref{eq:S_i_clean}, define
\begin{equation}
X_{i,\text{in}}^{(j)} =
\big[
\Delta x_i^{(j)}(t_n),\ \Delta x_{Z_i}^{(j)}(t_n),\
\Gamma_{i,n}^{(j)},\ \delta_n^{(j)}
\big]^{\top},
\end{equation}
and the supervised residual target
\begin{equation}
\Delta r_i^{(j)}=\Delta x_i^{(j)}(t_{n+1})-\Delta x_i^{(j)}(t_n).
\label{eq:target_clean}
\end{equation}

To suppress accumulation drift induced by long-horizon recursion and to improve long-term predictive stability,
we train the forward predictor jointly with an auxiliary backward residual model
and impose a multi-step reciprocal-consistency regularization over a KK-step segment from Si(K)S_i^{(K)}.

Construct a backward residual network
\begin{equation}
\mathcal{B}i:\mathbb{R}^{d(1+|Z_i|)+p+1}\rightarrow\mathbb{R}^{d},
\end{equation}
parameterized by Θˉi\bar{\Theta}_i. For the backward step associated with interval [tn,tn+1][t_n,t_{n+1}], define
\begin{equation}
\begin{aligned}
X
{i,\mathrm{in}}^{b}
&=
\big[
\Delta x_i(t_{n+1}),\ \Delta x_{Z_i}(t_{n+1}),
\Gamma_{i,n},\ \delta_n
\big]^{\top},\
X_{i,\mathrm{out}}^{b}
&=
\hat{I}i X{i,\mathrm{in}}^{b} + \mathcal{B}i(X{i,\mathrm{in}}^{b};\bar{\Theta}i),
\end{aligned}
\label{eq:back_clean}
\end{equation}
where Xi,outbX_{i,\mathrm{out}}^{b} represents the backward estimate of Δxi(tn)\Delta x_i(t_n).
The supervised backward residual target is
\begin{equation}
\Delta r_i^{b}=\Delta x_i(t_n)-\Delta x_i(t
{n+1}).
\end{equation}

Given a segment sample Wi,nSi(K)\mathcal{W}_{i,n}\in S_i^{(K)}, initialize
\begin{equation}
\Delta \hat{x}i(t_n)=\Delta x_i(t_n),
\end{equation}
and recursively apply the forward predictor for KK steps:
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1})
&=
\Delta \hat{x}i(t{n+s})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s}),,\Delta \hat{x}
{Z_i}(t
{n+s}),,
\Gamma_{i,n+s},,\delta_{n+s};,\Theta_i
\Big),\
&\qquad s=0,\ldots,K-1.
\end{aligned}
\label{eq:fwd_roll_clean}
\end{equation}

Set the terminal condition
\begin{equation}
\Delta \bar{x}i(t{n+K})=\Delta \hat{x}i(t{n+K}),
\end{equation}
and roll back using Bi\mathcal{B}_i:
\begin{equation}
\begin{aligned}
\Delta \bar{x}i(t{n+s})
&=
\hat{I}i X{i,\mathrm{in}}^{b}(t_{n+s})
+
\mathcal{B}i!\Big(X{i,\mathrm{in}}^{b}(t_{n+s});,\bar{\Theta}i\Big),
\quad s=K-1,\ldots,0,
\end{aligned}
\label{eq:bwd_roll_clean}
\end{equation}
where
\begin{equation}
X
{i,\mathrm{in}}^{b}(t_{n+s})=
\big[
\Delta \bar{x}i(t{n+s+1}),\ \Delta \hat{x}{Z_i}(t{n+s+1}),
\Gamma_{i,n+s},\ \delta_{n+s}
\big]^{\top}.
\end{equation}

Define the multi-step reciprocal prediction error
\begin{equation}
E_i(t_n)

\sum_{s=0}^{K}
\left|
\Delta \hat{x}i(t{n+s})-\Delta \bar{x}i(t{n+s})
\right|^2.
\end{equation}

We jointly minimize:
\begin{equation}
\begin{aligned}
L_{\mathrm{1step}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s+1})-\Delta x_i^{(j)}(t_{n+s})\big)
-\mathcal{N}i!\left(
X
{i,\mathrm{in}}^{(j)}(t_{n+s});\Theta_i
\right)
\Big|^2,\[2mm]
L_{\mathrm{bwd}}(\bar{\Theta}i)
&= \frac{1}{J_K}\sum
{j=1}^{J_K}\frac{1}{K}\sum_{s=0}^{K-1}
\Big|
\big(\Delta x_i^{(j)}(t_{n+s})-\Delta x_i^{(j)}(t_{n+s+1})\big)
-\mathcal{B}i!\left(
X
{i,\mathrm{in}}^{b,(j)}(t_{n+s});\bar{\Theta}i
\right)
\Big|^2,\[2mm]
L
{\mathrm{msrp}}(\Theta_i,\bar{\Theta}i)
&= \frac{1}{J_K}\sum
{j=1}^{J_K} E_i^{(j)}(t_n),\[2mm]
L_{\mathrm{roll}}(\Theta_i)
&= \frac{1}{J_K}\sum_{j=1}^{J_K}\sum_{s=1}^{K}
\Big|
\Delta x_i^{(j)}(t_{n+s})-\Delta \hat{x}i^{(j)}(t{n+s})
\Big|^2.
\end{aligned}
\label{eq:loss_clean}
\end{equation}
Here, L1stepL_{\mathrm{1step}} enforces one-step accuracy; LrollL_{\mathrm{roll}} explicitly suppresses long-horizon drift under recursion;
LmsrpL_{\mathrm{msrp}} regularizes the learned dynamics by enforcing reciprocal consistency between forward and backward rollouts;
and LbwdL_{\mathrm{bwd}} trains the backward model for the consistency regularization.
In implementation, these terms are combined as
\begin{equation}
L_{\mathrm{total}}=\lambda_1 L_{\mathrm{1step}}+\lambda_2 L_{\mathrm{roll}}+\lambda_3 L_{\mathrm{msrp}}+\lambda_4 L_{\mathrm{bwd}},
\end{equation}
where λ1,λ2,λ3,λ4>0\lambda_1,\lambda_2,\lambda_3,\lambda_4>0 are tuned on a validation set.

After training, the forward predictor is
\begin{equation}
\Delta \hat{x}i(t{n+1})

\Delta x_i(t_n)
+
\mathcal{N}i!\Big(
\Delta x_i(t_n),,\Delta x
{Z_i}(t_n),,
\Gamma_{i,n},,\delta_n;,\Theta_i^*
\Big),
\label{eq:pred_clean}
\end{equation}
and multi-step prediction is obtained by recursive rollout of \eqref{eq:pred_clean}.
This learned predictor is the internal model used by the MPC optimizer in the next section.

Finally, network parameters are optimized using Adam:
\begin{equation}
\Theta_{i,t+1} = \Theta_{i,t} - \alpha \frac{\hat{m}{i,t}}{\sqrt{\hat{v}{i,t}} + \varepsilon},
\end{equation}
where α\alpha is the learning rate (we use α\alpha to avoid conflict with other symbols),
m^i,t\hat{m}_{i,t} and v^i,t\hat{v}_{i,t} are bias-corrected moment estimates, and ε>0\varepsilon>0 is a small constant for numerical stability.
Figure~\ref{fig:rnn_logic} illustrates the overall structure.

\begin{figure}[htbp]
\centering
\includegraphics[scale=0.85]{picture/x6.pdf}
\caption{Logic diagram of the residual neural network.}
\label{fig:rnn_logic}
\end{figure}

\subsection{Explainability of the residual network}

Although the five-stand cold rolling mill involves complex rolling deformation and tension-transport coupling, its stand-wise deviation dynamics can be abstractly described by a coupled nonlinear ODE:
\begin{equation}
\frac{d}{dt}\Delta x_i(t)

f_i!\Big(\Delta x_i(t),,\Delta x_{Z_i}(t),,u_i(t),,d_i(t)\Big),
\qquad i=1,\ldots,5,
\label{eq:mech_ode}
\end{equation}
where fi()f_i(\cdot) summarizes the mechanistic effects.
For a given sampling interval length δn\delta_n and a within-interval input trajectory ui(tn+τ)u_i(t_n+\tau),
the state transition over one interval can be written through an evolution operator:
\begin{equation}
\Delta x_i(t_{n+1})=\Phi_{i,\delta_n}\Big(\Delta x_i(t_n),,\Delta x_{Z_i}(t_n),,u_i([t_n,t_{n+1}]),,d_i([t_n,t_{n+1}])\Big).
\label{eq:evolution_operator_mill}
\end{equation}
By the fundamental theorem of calculus, \eqref{eq:mech_ode} implies the increment form
\begin{equation}
\Delta x_i(t_{n+1})

\Delta x_i(t_n)
+
\underbrace{\int_{0}^{\delta_n}
f_i!\Big(\Delta x_i(t_n+\tau),,\Delta x_{Z_i}(t_n+\tau),,u_i(t_n+\tau),,d_i(t_n+\tau)\Big),d\tau}{\triangleq\ \varphi{i,n}},
\label{eq:increment_integral}
\end{equation}
where φi,n\varphi_{i,n} is the one-interval state increment generated by the mechanistic dynamics.

Our learned model \eqref{eq:learned_dyn_clean} adopts the same increment form as \eqref{eq:increment_integral}:
\begin{equation}
\Delta x_i(t_{n+1})
\approx
\Delta x_i(t_n)
+
\mathcal{N}i!\Big(\Delta x_i(t_n),,\Delta x{Z_i}(t_n),,\Gamma_{i,n},,\delta_n;\Theta_i\Big).
\end{equation}
Here, Ni()\mathcal{N}_i(\cdot) plays the role of a data-driven approximation of the integral increment φi,n\varphi_{i,n},
i.e., it approximates the accumulated effect of the mechanistic dynamics over [tn,tn+1][t_n,t_{n+1}].
This is consistent with the well-known interpretation that a residual network behaves like a one-step time integrator:
the identity path propagates the current state, while the residual branch represents the increment over the time lag.

Inside each interval, we do not optimize point-wise ui(t)u_i(t) but parameterize the increment trajectory by
Δui,n(τ;Γi,n)\Delta u_{i,n}(\tau;\Gamma_{i,n}).
Hence, the mechanistic increment φi,n\varphi_{i,n} in \eqref{eq:increment_integral} depends on the \emph{whole} within-interval trajectory.
Feeding (Γi,n,δn)(\Gamma_{i,n},\delta_n) into Ni\mathcal{N}_i is therefore a compact way to represent how different candidate
gap/speed trajectories change the integral effect and thus the next thickness--tension state.

When δn\delta_n varies and is not very small, directly learning φi,n\varphi_{i,n} may be harder.
Motivated by the generalized residual idea,
we decompose the increment predictor into two parts in \eqref{eq:aux_clean}:
\begin{equation}
\mathcal{N}_i(\cdot)=\psi_i(\cdot)+\rho_i(\cdot).
\end{equation}
Conceptually, ψi()\psi_i(\cdot) captures low-frequency and scale effects strongly related to δn\delta_n,
while ρi()\rho_i(\cdot) captures the remaining nonlinear coupling corrections.
This provides a mechanism-consistent interpretation: a baseline increment plus a residual correction
that compensates unmodeled nonlinearities and inter-stand coupling.

%========================
\section{Nash Equilibrium-Based RNE-DMPC}
%========================

The five-stand tandem cold rolling system is strongly coupled through inter-stand tension propagation.
As a result, changes in control actions (roll gap and stand speed) at one stand can affect both upstream and downstream stands,
making centralized online optimization over all stands' decision variables computationally demanding.

To mitigate this issue, we decompose the global predictive-control problem into N=5N=5 local subproblems associated with individual stands.
Each local controller optimizes its own decision variables while accounting for coupling via limited information exchange with neighboring controllers.
Motivated by game-theoretic coordination \citep{rawlings2008coordinating}, we formulate distributed coordination as a Nash-equilibrium-seeking iteration.
Based on the trained residual neural network surrogate model, we construct a Nash-equilibrium-based distributed MPC method (RNE-DMPC)
for coordinated thickness--tension regulation and tracking. The overall control structure is shown in Figure~\ref{4}.

\begin{figure*}[htbp]
\centering
\includegraphics[width=\linewidth]{picture/x2.pdf}
\caption{Schematic diagram of the control architecture for a tandem cold rolling mill.}\label{4}
\end{figure*}

At sampling time tnt_n, stand ii chooses the polynomial-parameter sequence
Γi(tn)RpNc\mathbf{\Gamma}_i(t_n)\in\mathbb{R}^{pN_c}, where p=6p=6.
Let Γ(tn)col{Γ1(tn),,Γ5(tn)}\mathbf{\Gamma}(t_n)\triangleq \mathrm{col}\{\mathbf{\Gamma}_1(t_n),\ldots,\mathbf{\Gamma}_5(t_n)\}
denote the joint strategy profile, and let Γi(tn)\mathbf{\Gamma}_{-i}(t_n) denote the collection of all strategies except stand ii.

Given the current measured/estimated deviation state Δxi(tn)\Delta x_i(t_n) and the strategies
(Γi(tn),ΓZi(tn))(\mathbf{\Gamma}_i(t_n),\mathbf{\Gamma}_{Z_i}(t_n)),
the multi-step prediction used by stand ii is written explicitly as
\begin{equation}
\begin{aligned}
\Delta \hat{x}i(t{n+s+1};\mathbf{\Gamma}i,\mathbf{\Gamma}{Z_i})
&=
\Delta \hat{x}i(t{n+s};\mathbf{\Gamma}i,\mathbf{\Gamma}{Z_i})
+
\mathcal{N}i!\Big(
\Delta \hat{x}i(t{n+s};\cdot),,
\Delta \hat{x}
{Z_i}(t_{n+s};\mathbf{\Gamma}{Z_i}),\
&\qquad
\Gamma
{i,n+s},,
\delta_{n+s};\Theta_i^*
\Big),
\end{aligned}
\label{eq:rollout_mpc_game}
\end{equation}
for s=0,,Np1s=0,\ldots,N_p-1, with initialization Δx^i(tn;)=Δxi(tn)\Delta \hat{x}_i(t_n;\cdot)=\Delta x_i(t_n).
Here the neighbor stack Δx^Zi(tn+s;ΓZi)\Delta \hat{x}_{Z_i}(t_{n+s};\mathbf{\Gamma}_{Z_i}) is generated from neighbors' strategies via the same learned predictors.

Over [tn+s,tn+s+1][t_{n+s},t_{n+s+1}] with length δn+s\delta_{n+s},
\begin{equation}
\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\tau
+\Gamma_{i,n+s,2}\tau^2,\qquad \tau \in [0,\delta_{n+s}],
\end{equation}
and the interval-averaged increment is
\begin{equation}
\Delta u_i(t_{n+s})

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\frac{\delta_{n+s}}{2}
+\Gamma_{i,n+s,2}\frac{\delta_{n+s}^2}{3}.
\label{eq:du_avg_clean}
\end{equation}

\begin{remark}
Because inter-stand tension TiT_i is jointly affected by the adjacent stands ii and i+1i+1,
the predicted evolution of Δxi\Delta x_i depends on neighbors' future actions,
hence the MPC problems are not independent but form a coupled dynamic game.
\end{remark}

At time tnt_n, the local strategy of stand ii is
\begin{equation}
\mathbf{\Gamma}_i(t_n)

\mathrm{col}{\Gamma_{i,n},\Gamma_{i,n+1},\ldots,\Gamma_{i,n+N_c-1}}
\in \mathbb{R}^{pN_c}.
\end{equation}

In deviation coordinates, the regulation/tracking objective is Δxi(t)0\Delta x_i(t)\rightarrow 0, i.e.
\begin{equation}
\Delta x_{i,\mathrm{ref}}(t_{n+s})\equiv 0\in\mathbb{R}^{d},\qquad d=3.
\end{equation}

Recall Δxi=[Δhi,ΔTi1,ΔTi]\Delta x_i=[\Delta h_i,\Delta T_{i-1},\Delta T_i]^\top.
Define the row selectors
\begin{equation}
C^- \triangleq [0\ \ 1\ \ 0]\in\mathbb{R}^{1\times 3},\qquad
C^+ \triangleq [0\ \ 0\ \ 1]\in\mathbb{R}^{1\times 3},
\end{equation}
so that CΔxi=ΔTi1C^- \Delta x_i=\Delta T_{i-1} (upstream interface) and C+Δxi=ΔTiC^+\Delta x_i=\Delta T_i (downstream interface).

For the interface between stands ii and i+1i+1,
stand ii's prediction provides C+Δx^iC^+\Delta \hat{x}_i, while stand i+1i+1's prediction provides CΔx^i+1C^- \Delta \hat{x}_{i+1}.
Their mismatch measures coupling inconsistency:
\begin{equation}
e_{i}^{\mathrm{sh}}(t_{n+s};\mathbf{\Gamma})
\triangleq
C^+\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})

C^-\Delta \hat{x}{i+1}(t{n+s};\mathbf{\Gamma}),\qquad i=1,\ldots,4.
\label{eq:shared_tension_mismatch}
\end{equation}

We define the stage cost of stand ii as a function of all players' strategies:
\begin{equation}
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i})

\sum_{s=1}^{N_p}
\left|
\Delta \hat{x}i(t{n+s};\mathbf{\Gamma}i,\mathbf{\Gamma}{Z_i})
\right|{Q_i}^{2}
+
\sum
{s=0}^{N_c-1}
\left|\Gamma_{i,n+s}\right|_{R_i}^{2}
+
J_i^{\mathrm{cpl}}(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i})
\label{eq:Ji_game}
\end{equation}
where Qi0Q_i\succeq 0 weights thickness and tension deviations, and Ri0R_i\succeq 0 penalizes actuation magnitudes.

The coupling term JicplJ_i^{\mathrm{cpl}} explicitly reflects the game/coordination requirement on shared tensions.
A simple and effective choice is to penalize the interface mismatches adjacent to stand ii:
\begin{equation}
\begin{aligned}
J_i^{\mathrm{cpl}}
&=
\mu_i\sum_{s=1}^{N_p}
\Big(
\mathbb{I}_{{i\ge 2}}
\big|
C^-\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})

C^+\Delta \hat{x}{i-1}(t{n+s};\mathbf{\Gamma})
\big|^2 \
&\qquad\quad
+
\mathbb{I}_{{i\le 4}}
\big|
C^+\Delta \hat{x}i(t{n+s};\mathbf{\Gamma})

C^-\Delta \hat{x}{i+1}(t{n+s};\mathbf{\Gamma})
\big|^2
\Big),
\end{aligned}
\label{eq:coupling_cost}
\end{equation}
with μi>0\mu_i>0 and indicator I{}\mathbb{I}_{\{\cdot\}}.
This term makes the coupling conflict explicit: unilateral actions that locally reduce thickness error may worsen shared-tension
compatibility and thus increase JiJ_i, and also affect neighbors' objectives.

We enforce the absolute-input bounds and within-interval increment bounds.

Absolute input bounds (roll gap and speed):
\begin{equation}
u_{i,\min}\le u_i(t_{n+s})\le u_{i,\max},
\qquad s=0,\ldots,N_p-1,
\label{eq:u_abs_game}
\end{equation}
where ui(t)=[si(t),vi(t)]u_i(t)=[s_i(t),\,v_i(t)]^\top.

Within-interval increment-trajectory bounds:
\begin{equation}
\Delta u_{i,\min}\le \Delta u_{i,n+s}(\tau;\Gamma_{i,n+s})\le \Delta u_{i,\max},
\qquad \forall\tau\in[0,\delta_{n+s}],\ s=0,\ldots,N_p-1,
\label{eq:du_traj_game}
\end{equation}
where Δui,n+s(τ;Γi,n+s)\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s}) is given by the polynomial parameterization.

Consistency with discrete execution:
Define the interval-averaged increment
\begin{equation}
\Delta u_i(t_{n+s})

\frac{1}{\delta_{n+s}}\int_{0}^{\delta_{n+s}}\Delta u_{i,n+s}(\tau;\Gamma_{i,n+s}),d\tau

\Gamma_{i,n+s,0}
+\Gamma_{i,n+s,1}\frac{\delta_{n+s}}{2}
+\Gamma_{i,n+s,2}\frac{\delta_{n+s}^2}{3},
\label{eq:du_avg_game}
\end{equation}
and propagate the absolute input along the horizon by
\begin{equation}
u_i(t_n)=u_i(t_{n-1})+\Delta u_i(t_n),\qquad
u_i(t_{n+s})=u_i(t_{n+s-1})+\Delta u_i(t_{n+s}),\ s=1,\ldots,N_p-1,
\label{eq:u_prop_game}
\end{equation}
where ui(tn1)u_i(t_{n-1}) is the applied (measured) input from the previous sampling instant.

Compact feasible set:
\begin{equation}
\Omega_i \triangleq
\Big{\mathbf{\Gamma}_i\ \Big|\
\eqref{eq:rollout_mpc_game}\ \text{holds and}
\eqref{eq:u_abs_game},\eqref{eq:du_traj_game},\eqref{eq:u_prop_game}\ \text{are satisfied}
\Big}.
\label{eq:Omega_i_game}
\end{equation}

Given neighbors' current strategies,
stand ii solves the differentiable NLP:
\begin{equation}
\mathbf{\Gamma}_i^{\mathrm{BR}}

\arg\min_{\mathbf{\Gamma}_i\in\Omega_i}\
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i}).
\label{eq:local_BR}
\end{equation}
Because the learned surrogate is differentiable, \eqref{eq:local_BR} can be solved by standard gradient-based NLP solvers.

At each sampling time tnt_n, the distributed MPC coordination induces a finite-horizon dynamic game:
players are stands i=1,,5i=1,\ldots,5; strategy sets are Ωi\Omega_i; and payoff (cost) functions are Ji(Γi;Γi)J_i(\mathbf{\Gamma}_i;\mathbf{\Gamma}_{-i})
defined in \eqref{eq:Ji_game}--\eqref{eq:coupling_cost}.

A joint strategy profile Γ=col{Γ1,,Γ5}\mathbf{\Gamma}^*=\mathrm{col}\{\mathbf{\Gamma}_1^*,\ldots,\mathbf{\Gamma}_5^*\}
is a Nash equilibrium if
\begin{equation}
\forall i\in{1,\ldots,5},\qquad
\mathbf{\Gamma}i^*\in
\arg\min
{\mathbf{\Gamma}_i\in\Omega_i}
J_i(\mathbf{\Gamma}i;\mathbf{\Gamma}{-i}^*).
\label{eq:NE_def}
\end{equation}
This definition explicitly characterizes the strategic coupling:
each player's optimal decision depends on neighbors' decisions through the shared-tension dynamics and the coupling term.

To compute an NE online with limited communication, we employ a relaxed best-response iteration.
Let ll denote the Nash-iteration index.
Given Γ(l1)\mathbf{\Gamma}^{(l-1)}, each stand computes a best response ΓiBR,(l)\mathbf{\Gamma}_i^{\mathrm{BR},(l)}
by solving \eqref{eq:local_BR}, and then updates with relaxation:
\begin{equation}
\mathbf{\Gamma}_i^{(l)}

(1-\omega)\mathbf{\Gamma}_i^{(l-1)}
+
\omega,\mathbf{\Gamma}_i^{\mathrm{BR},(l)},
\qquad \omega\in(0,1].
\label{eq:relaxed_BR}
\end{equation}
The relaxation factor ω\omega mitigates oscillations caused by strong coupling and improves practical convergence.

The Nash equilibrium is computed through distributed best-response iterations, summarized in Table ~\ref{tab:nash_iter_en}.

The convergence metric in Step F is defined as
\begin{equation}
\varsigma^{(l)}

\max_i
\frac{\left|
\mathbf{\Gamma}_i^{(l)}-\mathbf{\Gamma}_i^{(l-1)}
\right|_2}{
\left|
\mathbf{\Gamma}_i^{(l-1)}
\right|_2+\epsilon},
\end{equation}
with ϵ>0\epsilon>0 small.

\begin{table}[t]
\centering
\small
\renewcommand{\arraystretch}{1.12}
\setlength{\tabcolsep}{3.5pt}
\caption{Relaxed distributed Nash best-response iteration for RNE-DMPC (five-stand).}
\label{tab:nash_iter_en}
\begin{tabularx}{\linewidth}{>{\centering\arraybackslash}p{0.11\linewidth} X}
\toprule
\textbf{Step} & \textbf{Description} \
\midrule
A &
Initialize l=1l=1 and warm-start Γi(0)\mathbf{\Gamma}_i^{(0)} (e.g., from the previous sampling time). \

B &
Communicate Γi(l1)\mathbf{\Gamma}_i^{(l-1)} (or the induced predicted trajectories) among neighbors; form ΓZi(l1)\mathbf{\Gamma}_{Z_i}^{(l-1)}. \

C &
Given Γi(l1)\mathbf{\Gamma}_{-i}^{(l-1)}, solve the best-response NLP \eqref{eq:local_BR} to obtain ΓiBR,(l)\mathbf{\Gamma}_i^{\mathrm{BR},(l)}. \

D &
Update the relaxed strategy using \eqref{eq:relaxed_BR} and compute the induced predictions
Δx^i(l)(;Γ(l))\Delta \hat{x}_i^{(l)}(\cdot;\mathbf{\Gamma}^{(l)}) via \eqref{eq:rollout_mpc_game}. \

E &
Broadcast Γi(l)\mathbf{\Gamma}_i^{(l)} and the predicted interface quantities needed in \eqref{eq:coupling_cost}
(e.g., C±Δx^i(l)C^\pm\Delta \hat{x}_i^{(l)}) to neighbors. \

F &
Compute ς(l)\varsigma^{(l)}; if ς(l)ςtol\varsigma^{(l)}\le \varsigma_{\mathrm{tol}}, stop and set Γi=Γi(l)\mathbf{\Gamma}_i^*=\mathbf{\Gamma}_i^{(l)}; otherwise ll+1l\leftarrow l+1 and repeat. \
\bottomrule
\end{tabularx}
\end{table}

After convergence, only the first-interval parameters Γi,n\Gamma_{i,n}^* are applied.
The within-interval increment is Δui,n(τ;Γi,n)\Delta u_{i,n}(\tau;\Gamma_{i,n}^*) and the discrete applied increment uses the interval average:
\begin{equation}
\Delta u_i(t_n)

\Gamma_{i,n,0}^*
+
\Gamma_{i,n,1}^\frac{\delta_n}{2}
+
\Gamma_{i,n,2}^
\frac{\delta_n^2}{3},
\end{equation}
followed by ui(tn)=ui(tn1)+Δui(tn)u_i(t_n)=u_i(t_{n-1})+\Delta u_i(t_n).

The overall control flow chart is shown in Fig~\ref{liu}.

\begin{figure}
\centering
\includegraphics[width=\linewidth]{picture/x5.pdf}
\caption{The overall system control flow chart}\label{liu}
\end{figure}

\begin{remark}
Equations \eqref{eq:Ji_game}--\eqref{eq:coupling_cost} and the NE definition \eqref{eq:NE_def} explicitly show that each player's objective
depends on others' strategies through the coupled prediction dynamics and the shared-tension interface mismatch penalty,
thus making the multi-stand strategic interaction and coordination mechanism transparent.
\end{remark}这是我最新的第二章,第三章,第四章。在多项式参数化的部分, 这个地方一般的情况用n阶进行方案设计,仿真可以选为二阶的,所有的时变信号参数化都这样改。给出改后的完整的latex代码,上下文要符合逻辑,在符合逻辑的前提下其他地方都不要改

分享这个问答