# -*- coding: utf-8 -*- import pandas as pd import...

Criado em: 17 de junho de 2026

Respondido usando GPT-5.5 Thinking por Chat01

Pergunta

-- coding: utf-8 --

import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split, Subset

from sklearn.preprocessing import MinMaxScaler,MaxAbsScaler,StandardScaler

from util.env import get_device, set_device
from util.preprocess import build_loc_net, construct_data
from util.net_struct import get_feature_map, get_fc_graph_struc
from util.iostream import printsep

from datasets.TimeDataset import TimeDataset

import torch

import torch_sparse

print(torch.version)

print(torch_sparse.version)

#from models.GDN import GDN
from GDN import GDN

from train import train
from test import test
from evaluate import get_err_scores, get_best_performance_data, get_val_performance_data, get_full_err_scores

import sys
from datetime import datetime
import time

import os
import argparse
from pathlib import Path

import matplotlib.pyplot as plt
from dataset import STVSegLoader
from dataset import mslSegLoader
import json
import random
#os.environ['CUDA_LAUNCH_BLOCKING'] = '2'

class Main():
def init(self, train_config, env_config, debug=False):

text
self.train_config = train_config self.env_config = env_config self.datestr = None dataset = self.env_config['dataset'] #train_orig_1 = pd.read_csv(f'./data/{dataset}/train.csv', sep=',', index_col=0, nrows=1000) train_orig_1 = pd.read_csv(f'./data/{dataset}/train.csv', sep=',', index_col=0) test_orig_1 = pd.read_csv(f'./data/{dataset}/test.csv', sep=',', index_col=0) # mm = MinMaxScaler() # train_orig= mm.fit_transform(train_orig_1) # test_orig=mm.fit_transform(test_orig_1) # train_orig=pd.DataFrame(train_orig,columns=["docker_001","docker_002","docker_003","docker_004","docker_005","docker_006","docker_007","docker_008","os_021","os_022"]) # test_orig=pd.DataFrame(test_orig,columns=["docker_001","docker_002","docker_003","docker_004","docker_005","docker_006","docker_007","docker_008","os_021","os_022","label"]) if 'label' in test_orig_1.columns: test_orig_2 = test_orig_1.drop(columns=['label']) # mm=StandardScaler() # train_orig = mm.fit_transform(train_orig_1) # test_orig = mm.fit_transform(test_orig_2) # train_orig = pd.DataFrame(train_orig, # columns=["docker_001", "docker_002", "docker_003", "docker_004", "docker_005", # "docker_006", "docker_007", "docker_008", "os_021", "os_022"]) # test_orig_guiyi = pd.DataFrame(test_orig, # columns=["docker_001", "docker_002", "docker_003", "docker_004", "docker_005", # "docker_006", "docker_007", "docker_008", "os_021", "os_022"]) #test_orig=pd.concat([test_orig,test_orig_1[['label']]],axis=1) train, test = train_orig_1, test_orig_1 feature_map = get_feature_map(dataset) fc_struc = get_fc_graph_struc(dataset) '''fc_struc_msl = {'M-6': ['P-10','T-5','P-15','D-14','P-14','T-13'], 'M-1': ['T-4','M-5','C-1','T-12','F-4','D-14','D-16','M-2','S-2','M-3','M-4','P-16','F-8'], 'M-2': ['M-1','M-5', 'C-1', 'T-12', 'D-14','T-9','T-8','D-15','M-3','P-15','C-2','P-16','M-7'], 'S-2': ['M-1','P-10','T-4','T-5','M-5','C-1','C-2','T-12','F-4','D-14','T-9','P-14','T-8','D-16','M-3','P-15','T-13','F-5'], 'P-10': ['D-14','M-6','S-2','C-2','T-13','P-16','M-7'], 'T-4':['M-1','S-2','F-7','M-3','P-15','C-2','T-12','F-4','M-7','M-8'], 'T-5':['M-6','S-2','C-2','T-13'], 'F-7': ['T-4','T-12','T-9','T-8'], 'M-3': ['M-1','M-2','S-2','T-4','M-5','C-1','T-12','F-4','F-5','D-14','T-9','T-8','D-16','M-4','P-15','P-16','M-7'], 'M-4': ['M-1','M-3','T-12','F-4','T-9','T-8','D-15','M-7'], 'M-5':['M-1','M-2','S-2','M-3','P-15','C-2','T-12','F-4','F-5','T-8','P-16','M-7','F-8'], 'P-15': ['M-2','S-2','T-4','M-3','M-5','C-1','C-2','T-12','F-4','F-5','D-14','T-9','T-8','P-16','D-15','M-6','T-13'], 'C-1':['M-1','M-2','S-2','M-3','P-15','C-2','T-12','T-13','F-4','F-5','T-8','P-16','M-7','F-8'], 'C-2': ['M-2','P-10','T-4','T-5','M-5','C-1','T-12','F-4','D-14','T-8','D-15','S-2','P-15','T-13','F-5','P-16'], 'T-12': ['T-4','M-5','C-1','D-14','T-8','M-1','M-2','S-2','F-7','M-3','M-4','P-15','C-2','T-13','F-4','F-5','T-9','P-16','D-16','M-7','F-8'], 'T-13': ['M-6','S-2','P-10','T-5','P-15','C-1','C-2','T-12','F-4','D-14','T-9','P-14','T-8'], 'F-4': ['T-4','M-5','C-1','T-12','D-14','T-9','T-8','M-1','S-2','M-3','M-4','P-15','C-2','T-13','F-5','P-16','M-7','F-8'], 'F-5': ['S-2','M-5','C-1','C-2','T-12','F-4','D-14','T-9','M-3','P-15'], 'D-14':['M-6','M-1','M-2','S-2','P-10','M-3','P-15','C-2','T-12','T-13','F-4','F-5','T-8','P-16','M-7','F-8'], 'T-9': ['T-12','M-2','S-2','F-7','M-3','M-4','P-15','T-13','F-4','F-5','P-16','D-16','M-7'], 'P-14':['M-6','S-2','T-13'], 'T-8': ['M-5','C-1','D-14','D-15','M-2','S-2','F-7','M-3','M-4','P-15','C-2','T-12','T-13','F-4','M-7'], 'P-16': ['M-1','M-2','P-10','M-3','M-5','C-1','C-2','T-12','F-4','D-14','T-9','D-15','D-16','P-15'], 'D-15':['M-2','M-4','P-15','C-2','T-8','P-16','M-7'], 'D-16': ['T-12','T-9','M-1','S-2','M-3','P-16','M-7','F-8','M-1','S-2'], 'M-7': ['M-2','P-10','T-4','M-3','M-4','M-5','C-1','T-12','F-4','D-14','T-9','T-8','D-15','D-16','F-8'], 'F-8':['M-1','T-4','M-5','C-1','T-12','F-4','D-14','D-16','M-7'] }''' # fc_struc_msl ={'docker_001':['docker_006','docker_008'], # 'docker_002':['docker_001','docker_006','docker_007','docker_008'], # 'docker_003':['docker_001','docker_004','docker_006'], # 'docker_004':['docker_001','docker_002','docker_006','docker_008'], # 'docker_005':['docker_001','docker_002','docker_003','docker_004','docker_006','docker_007','docker_008','os_021','os_022'], # 'docker_007':['docker_006'],'docker_008':['docker_006','docker_007'], # 'os_021':['docker_001','docker_002','docker_003','docker_004','docker_006','docker_007','docker_008','os_022'], # 'os_022':['docker_001','docker_002','docker_003','docker_004','docker_006','docker_007','docker_008']} set_device(env_config['device']) self.device = get_device() fc_edge_index = build_loc_net(fc_struc, list(train.columns), feature_map=feature_map) fc_edge_index = torch.tensor(fc_edge_index, dtype=torch.long) # fc_edge_index_msl = build_loc_net(fc_struc_msl, list(train.columns), feature_map=feature_map) # fc_edge_index_msl = torch.tensor(fc_edge_index_msl, dtype=torch.long) self.feature_map = feature_map train_dataset_indata = construct_data(train, feature_map, labels=0) test_dataset_indata = construct_data(test, feature_map, labels=test.attack.tolist()) cfg = { 'slide_win': train_config['slide_win'], 'slide_stride': train_config['slide_stride'], } train_dataset = TimeDataset(train_dataset_indata, fc_edge_index, mode='train', config=cfg) # train_dataset_msl = TimeDataset(train_dataset_indata, fc_edge_index_msl, mode='train', config=cfg) test_dataset = TimeDataset(test_dataset_indata, fc_edge_index, mode='test', config=cfg) train_dataloader, val_dataloader = self.get_loaders(train_dataset, train_config['seed'], train_config['batch'], val_ratio=train_config['val_ratio']) self.train_dataset = train_dataset self.test_dataset = test_dataset self.train_dataloader = DataLoader(mslSegLoader(win_size=27, flag="train"), batch_size=128, shuffle=True) self.val_dataloader = DataLoader(mslSegLoader(win_size=27, flag="test"), batch_size=1, shuffle=False) self.test_dataloader = DataLoader(mslSegLoader(win_size=27, flag="test"), batch_size=1, shuffle=False) # self.train_dataloader = train_dataloader # self.val_dataloader = val_dataloader # self.test_dataloader = DataLoader(test_dataset, batch_size=train_config['batch'], # shuffle=False, num_workers=0) # self.test_dataloader = DataLoader(test_dataset, batch_size=128, # shuffle=False, num_workers=0) edge_index_sets = [] edge_index_sets.append(fc_edge_index) self.model = GDN(edge_index_sets, len(feature_map), dim=train_config['dim'], input_dim=train_config['slide_win'], out_layer_num=train_config['out_layer_num'], out_layer_inter_dim=train_config['out_layer_inter_dim'], topk=train_config['topk'] ).to(self.device) def run(self): if len(self.env_config['load_model_path']) > 0: model_save_path = self.env_config['load_model_path'] else: model_save_path = self.get_save_path()[0] tmana1 = time.perf_counter() self.train_log = train(self.model, model_save_path, config=train_config, train_dataloader=self.train_dataloader, val_dataloader=self.val_dataloader, feature_map=self.feature_map, test_dataloader=self.test_dataloader, test_dataset=self.test_dataset, train_dataset=self.train_dataset, dataset_name=self.env_config['dataset'] ) tmana2 = time.perf_counter() yongshi = tmana2-tmana1 print('CausalTrace+程序运行时间:%s毫秒' % (int(yongshi * 1000))) # test self.model.load_state_dict(torch.load(model_save_path)) best_model = self.model.to(self.device) T00 = time.perf_counter() _, self.test_result = test(best_model, self.test_dataloader) T11 = time.perf_counter() yongshi2 = T11 - T00 print('CausalTrace+程序测试时间:%s毫秒' % (int(yongshi2 * 1000))) _, self.val_result = test(best_model, self.val_dataloader) self.get_score(self.test_result, self.val_result) def get_loaders(self, train_dataset, seed, batch, val_ratio=0.1): dataset_len = int(len(train_dataset)) train_use_len = int(dataset_len * (1 - val_ratio)) val_use_len = int(dataset_len * val_ratio) val_start_index = random.randrange(train_use_len) indices = torch.arange(dataset_len) train_sub_indices = torch.cat([indices[:val_start_index], indices[val_start_index + val_use_len:]]) train_subset = Subset(train_dataset, train_sub_indices) val_sub_indices = indices[val_start_index:val_start_index + val_use_len] val_subset = Subset(train_dataset, val_sub_indices) train_dataloader = DataLoader(train_subset, batch_size=batch, shuffle=True) val_dataloader = DataLoader(val_subset, batch_size=batch, shuffle=False) return train_dataloader, val_dataloader def get_score(self, test_result, val_result): feature_num = len(test_result[0][0]) np_test_result = np.array(test_result) np_val_result = np.array(val_result) test_labels = np_test_result[2, :, 0].tolist() test_scores, normal_scores = get_full_err_scores(test_result, val_result) top1_best_info = get_best_performance_data(test_scores, test_labels, topk=1) top1_val_info = get_val_performance_data(test_scores, normal_scores, test_labels, topk=1) print('=========================** Result **============================\n') info = None if self.env_config['report'] == 'best': info = top1_best_info elif self.env_config['report'] == 'val': info = top1_val_info print(f'F1 score: {info[0]}') print(f'precision: {info[1]}') print(f'recall: {info[2]}\n') print(f'ROC-AUC: {info[3]}') print(f'PR-AUC: {info[4]}\n') print(f'Threshold:{info[5]}\n') # with open("chuntian.txt", "a") as f: # f.write('trace+hinge+yuzhi0.5+0') # f.write('\n') # f.write('F1 score: {}'.format(info[0])) # f.write('\n') # f.write('precision: {}'.format(info[1])) # f.write('\n') # f.write('recall: {}'.format(info[2])) # f.write('\n') # f.write('AUC:{}'.format(info[3])) # f.write('\n') # f.write('threshold:{}'.format(info[4])) # f.write('\n') def get_save_path(self, feature_name=''): dir_path = self.env_config['save_path'] if self.datestr is None: now = datetime.now() self.datestr = now.strftime('%m|%d-%H:%M:%S') datestr = self.datestr paths = [ f'./pretrained/{dir_path}/best_{datestr}.pt', f'./results/{dir_path}/{datestr}.csv', ] for path in paths: dirname = os.path.dirname(path) Path(dirname).mkdir(parents=True, exist_ok=True) return paths

if name == "main":
parser = argparse.ArgumentParser()

text
parser.add_argument('-batch', help='batch size', type=int, default=512) parser.add_argument('-epoch', help='train epoch', type=int, default=100) parser.add_argument('-slide_win', help='slide_win', type=int, default=27) parser.add_argument('-dim', help='dimension', type=int, default=64) parser.add_argument('-slide_stride', help='slide_stride', type=int, default=5) parser.add_argument('-save_path_pattern', help='save path pattern', type=str, default='') parser.add_argument('-dataset', help='wadi / swat', type=str, default='msl') parser.add_argument('-device', help='cuda / cpu', type=str, default=torch.device('cuda:0')) parser.add_argument('-random_seed', help='random seed', type=int, default=2023) parser.add_argument('-comment', help='experiment comment', type=str, default='') parser.add_argument('-out_layer_num', help='outlayer num', type=int, default=1) parser.add_argument('-out_layer_inter_dim', help='out_layer_inter_dim', type=int, default=256) parser.add_argument('-decay', help='decay', type=float, default=0) parser.add_argument('-val_ratio', help='val ratio', type=float, default=0.1) parser.add_argument('-topk', help='topk num', type=int, default=27) parser.add_argument('-report', help='best / val', type=str, default='best') parser.add_argument('-load_model_path', help='trained model path', type=str, default='') print("jianbing") args = parser.parse_args() random.seed(args.random_seed) np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True os.environ['PYTHONHASHSEED'] = str(args.random_seed) train_config = { 'batch': args.batch, 'epoch': args.epoch, 'slide_win': args.slide_win, 'dim': args.dim, 'slide_stride': args.slide_stride, 'comment': args.comment, 'seed': args.random_seed, 'out_layer_num': args.out_layer_num, 'out_layer_inter_dim': args.out_layer_inter_dim, 'decay': args.decay, 'val_ratio': args.val_ratio, 'topk': args.topk, } env_config = { 'save_path': args.save_path_pattern, 'dataset': args.dataset, 'report': args.report, 'device': args.device, 'load_model_path': args.load_model_path } main = Main(train_config, env_config, debug=False) main.run()

import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import time
from util.time import *
from util.env import *
from sklearn.metrics import mean_squared_error
from stocBiO import *
from test import *
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sknetwork.ranking import PageRank
from evaluate import get_best_performance_data, get_val_performance_data, get_full_err_scores
from sklearn.metrics import precision_score, recall_score, roc_auc_score, f1_score
from torch.utils.data import DataLoader, random_split, Subset
from scipy.stats import iqr
import logging
import pandas as pd
import scipy.optimize as sopt

logging.basicConfig(level=logging.INFO, format='%(asctime)s%(message)s')

class BaseLearner(object):
def init(self):
self._causal_matrix = None

text
def learn(self, data, *args, **kwargs): raise NotImplementedError @property def causal_matrix(self): return self._causal_matrix @causal_matrix.setter def causal_matrix(self, value): self._causal_matrix

class Tensor(object):
def init(self, indata):
self._set_tensor(indata)

text
def _set_tensor(self, indata): if isinstance(indata, np.ndarray): if indata.ndim == 2 or indata.ndim == 3: self.data = indata else: raise TypeError("Input numpy.ndarray ndim error!") elif isinstance(indata, pd.DataFrame): self.data = indata.values else: raise TypeError("Input data is not numpy.ndarray or pd.DataFrame")

class Notears(BaseLearner):
def init(self, lambda1=0.1,
eta=0.001,
loss_type='l2',
max_iter=10,
h_tol=1e-8,
rho_max=1e+16,
w_threshold=0.05):
super().init()
self.lambda1 = lambda1
self.eta = eta
self.loss_type = loss_type
self.max_iter = max_iter
self.h_tol = h_tol
self.rho_max = rho_max
self.w_threshold = w_threshold

text
def learn(self, data, yilaitu): if isinstance(data, np.ndarray): X = data elif isinstance(data, Tensor): X = data.data else: raise TypeError( 'The type of data must be Tensor or numpy.ndarray, but got {}'.format(type(data))) predict_matrix, sunshi = self.notears_linear( X, yilaitu, lambda1=self.lambda1, eta=self.eta, loss_type=self.loss_type, max_iter=self.max_iter, h_tol=self.h_tol, rho_max=self.rho_max, w_threshold=self.w_threshold) self.predict_matrix = predict_matrix self.sunshi = sunshi def notears_linear(self, X, yilaitu, lambda1, eta, loss_type, max_iter, h_tol, rho_max, w_threshold): def _loss(W): M = X @ W if loss_type == 'l2': R = X - M loss = 0.5 / X.shape[0] * (R ** 2).sum() G_loss = -1.0 / X.shape[0] * X.T @ R return loss, G_loss def _h(W): M = np.eye(d) + W * W / d E = np.linalg.matrix_power(M, d - 1) h = (E.T * M).sum() - d G_h = E.T * W * 2 return h, G_h def _adj(w): return (w[:d * d] - w[d * d:]).reshape([d, d]) def sqrt_var(x): mean_x = x.mean(0) v = torch.norm(x - mean_x, p=2, dim=0) / ((x.shape[0] - 1) ** 0.5) return v def hinge(input, threshold=1.0, reduction='sum'): diff = F.relu(threshold - input) diff = diff ** 2 if reduction == 'sum': loss = diff.sum() elif reduction == 'mean': loss = diff.mean() return loss def cosine_similarity_loss(A, B): A = torch.from_numpy(A) B = torch.from_numpy(B) similarity = F.cosine_similarity(A, B) loss = 1 - similarity return loss def biased(A, B): A = torch.from_numpy(A) B = torch.from_numpy(B) diff = A - B norm_squared = torch.norm(diff, p=2) ** 2 return norm_squared def _func(w): W = _adj(w) loss, G_loss = _loss(W) h, G_h = _h(W) W_sparse = torch.from_numpy(W) hinge_loss = hinge(input=sqrt_var(W_sparse), threshold=0.5, reduction='sum') cosine = cosine_similarity_loss(W, yilaitu).mean() biased_regular = biased(W, yilaitu) obj = loss + 0.5 * rho * h * h + alpha * h + lambda1 * hinge_loss G_smooth = G_loss + (rho * h + alpha) * G_h g_obj = np.concatenate( (G_smooth + lambda1 + eta, -G_smooth + lambda1 + eta), axis=None) return obj, g_obj n, d = X.shape[0], X.shape[1] w_est, rho, alpha, h = np.zeros(2 * d * d), 1.0, 0.0, np.inf bnds = [(0, 0) if i == j else (0, None) for _ in range(2) for i in range(d) for j in range(d)] if loss_type == 'l2': X = X - np.mean(X, axis=0, keepdims=True) for i in range(max_iter): w_new, h_new = None, None while rho < rho_max: sol = sopt.minimize(_func, w_est, method='L-BFGS-B', jac=True, bounds=bnds) w_new = sol.x h_new, _ = _h(_adj(w_new)) if h_new > 0.25 * h: rho *= 10 else: break w_est, h = w_new, h_new alpha += rho * h if h <= h_tol or rho >= rho_max: break W_est = _adj(w_est) W_est[np.abs(W_est) < w_threshold] = 0 return (W_est != 0).astype(int), _func(w_est)[0]

def loss_func(y_pred, y_true):
# FIX 1: squeeze trailing dim if y_true is 3-D (batch, win_size, 1) → (batch, win_size)
if y_true.dim() == 3 and y_true.shape[-1] == 1:
y_true = y_true.squeeze(-1)
# Guard: align last dim if they still differ
if y_pred.shape != y_true.shape and y_true.dim() == 2:
y_true = y_true[:, :y_pred.shape[1]]
loss = F.mse_loss(y_pred, y_true, reduction='mean')
return loss

def train(model=None, save_path='', config={}, train_dataloader=None, val_dataloader=None,
feature_map={}, test_dataloader=None, test_dataset=None,
dataset_name='swat', train_dataset=None):
seed = config['seed']
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=config['decay'])
l2_reg_params = torch.ones(1).requires_grad_(True)
now = time.time()

text
train_loss_list = [] device = get_device() acu_loss = 0 min_loss = 1e+8 i = 0 epoch = config['epoch'] early_stop_win = 15 model.train() stop_improve_count = 0 dataloader = train_dataloader # Initial prior graph for Notears (zeros = no prior) yilaitu = np.zeros((27, 27)) for i_epoch in range(epoch): acu_loss = 0 model.train() # FIX 2: mslSegLoader yields (x, labels) — only 2 items, no edge_index for x, labels in train_dataloader: x = x.float().to(device) # (batch, win_size, node_num) e.g. (128, 27, 27) labels = labels.float().to(device) # (batch, win_size, 1) # FIX 4: GDN expects (batch, node_num, win_size); mslSegLoader is time-first x_model = x.permute(0, 2, 1).contiguous() # (128, 27, 27) batch_, node_, window_ = x_model.shape # 128, 27, 27 # Notears operates on (N_samples, N_nodes) — reshape original x (time-first) x_num = x.reshape(batch_ * window_, node_) # (128*27, 27) = (3456, 27) x_input = x_num.cpu().numpy() # FIX 5: derive regression target from last timestep of the window # (actual sensor readings, not anomaly labels) y = x[:, -1, :] # (batch, node_num) = (128, 27) for iterator in range(1): inner_losses = [] nt = Notears() nt.learn(x_input, yilaitu) causalmatrix = torch.from_numpy(nt.predict_matrix).float().to(device) yinguoout = causalmatrix.cpu().numpy() # PageRank requires a non-empty graph; handle sparse/empty causal matrices pagerank = PageRank() adjacency = np.abs(yinguoout.T) # Check if adjacency matrix has any non-zero entries if adjacency.sum() < 1e-6: # Fallback: uniform importance scores if graph is empty print("[Warning] Causal matrix too sparse, using uniform scores") scores = np.ones(node_) / node_ else: try: pagerank.fit(adjacency) scores = pagerank.scores_ except ValueError as e: # Fallback if PageRank fails print(f"[Warning] PageRank fit failed: {e}, using uniform scores") scores = np.ones(node_) / node_ scores = torch.from_numpy(scores).to(device) # FIX 3: nt.sunshi is a numpy/Python float — wrap it in a tensor first trainnotears = torch.tensor(float(nt.sunshi), requires_grad=False, device=device) loss_train = trainnotears.requires_grad_() l2_reg_params = l2_reg_params.to(device) inner_losses.append(loss_train) optimizer.zero_grad() # FIX 2 + FIX 4: use permuted x_model, pass None for org_edge_index # (GDN uses self.edge_index_sets internally; org_edge_index is unused) model_outputs = model(x_model, None) out = model_outputs[0].float().to(device) # (batch, node_num) = (128, 27) result = out * scores.unsqueeze(0).to(out.dtype) # (128, 27) # FIX 1 + FIX 5: loss against actual sensor values, shapes now match loss = loss_func(result, y) # both (128, 27) # FIX 2: get cos_ji_mat from the same forward pass — no separate call needed, # and avoids the undefined `edge_index` NameError yilaitu = model_outputs[1].cpu().numpy() loss.backward() optimizer.step() print("iterator : {:.4f}, optimization objective: {}\n".format( iterator, loss.item())) train_loss_list.append(loss.item()) acu_loss += loss.item() iterator += 1 print('epoch ({} / {}) (Loss:{:.8f})'.format( i_epoch, epoch, acu_loss / len(dataloader) / 3), flush=True) if val_dataloader is not None: val_loss, val_result = test(model, val_dataloader) if val_loss < min_loss: torch.save(model.state_dict(), save_path) min_loss = val_loss stop_improve_count = 0 else: stop_improve_count += 1 if stop_improve_count >= early_stop_win: break else: if acu_loss < min_loss: torch.save(model.state_dict(), save_path) min_loss = acu_loss return train_loss_listimport torch.nn as nn

import time
from util.time import *
from util.env import *
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import pandas as pd
import scipy.optimize as sopt

logging.basicConfig(level=logging.INFO, format='%(asctime)s%(message)s')

class BaseLearner(object):
def init(self):
self._causal_matrix = None

text
def learn(self, data, *args, **kwargs): raise NotImplementedError @property def causal_matrix(self): return self._causal_matrix @causal_matrix.setter def causal_matrix(self, value): self._causal_matrix

class Tensor(object):
def init(self, indata):
self._set_tensor(indata)

text
def _set_tensor(self, indata): if isinstance(indata, np.ndarray): if indata.ndim == 2 or indata.ndim == 3: self.data = indata else: raise TypeError("Input numpy.ndarray ndim error!") elif isinstance(indata, pd.DataFrame): self.data = indata.values else: raise TypeError("Input data is not numpy.ndarray or pd.DataFrame")

class Notears(BaseLearner):
def init(self, lambda1=0.1,
loss_type='l2',
max_iter=5,
h_tol=1e-8,
rho_max=1e+16,
w_threshold=0.05):
super().init()
self.lambda1 = lambda1
self.loss_type = loss_type
self.max_iter = max_iter
self.h_tol = h_tol
self.rho_max = rho_max
self.w_threshold = w_threshold

text
def learn(self, data): if isinstance(data, np.ndarray): X = data elif isinstance(data, Tensor): X = data.data else: raise TypeError( 'The type of data must be Tensor or numpy.ndarray, but got {}'.format(type(data))) predict_matrix, sunshi = self.notears_linear( X, lambda1=self.lambda1, loss_type=self.loss_type, max_iter=self.max_iter, h_tol=self.h_tol, rho_max=self.rho_max, w_threshold=self.w_threshold) self.predict_matrix = predict_matrix self.sunshi = sunshi def notears_linear(self, X, lambda1, loss_type, max_iter, h_tol, rho_max, w_threshold): def _loss(W): M = X @ W if loss_type == 'l2': R = X - M loss = 0.5 / X.shape[0] * (R ** 2).sum() G_loss = -1.0 / X.shape[0] * X.T @ R return loss, G_loss def _h(W): M = np.eye(d) + W * W / d E = np.linalg.matrix_power(M, d - 1) h = (E.T * M).sum() - d G_h = E.T * W * 2 return h, G_h def _adj(w): return (w[:d * d] - w[d * d:]).reshape([d, d]) def sqrt_var(x): mean_x = x.mean(0) v = torch.norm(x - mean_x, p=2, dim=0) / ((x.shape[0] - 1) ** 0.5) return v def hinge(input, threshold=1.0, reduction='sum'): diff = F.relu(threshold - input) diff = diff ** 2 if reduction == 'sum': loss = diff.sum() elif reduction == 'mean': loss = diff.mean() return loss def _func(w): W = _adj(w) loss, G_loss = _loss(W) h, G_h = _h(W) W_sparse = torch.from_numpy(W) hinge_loss = hinge(input=sqrt_var(W_sparse), threshold=0.5, reduction='sum') obj = loss + 0.5 * rho * h * h + alpha * h + lambda1 * hinge_loss G_smooth = G_loss + (rho * h + alpha) * G_h g_obj = np.concatenate( (G_smooth + lambda1, -G_smooth + lambda1), axis=None) return obj, g_obj n, d = X.shape[0], X.shape[1] w_est, rho, alpha, h = np.zeros(2 * d * d), 1.0, 0.0, np.inf bnds = [(0, 0) if i == j else (0, None) for _ in range(2) for i in range(d) for j in range(d)] if loss_type == 'l2': X = X - np.mean(X, axis=0, keepdims=True) for i in range(max_iter): w_new, h_new = None, None while rho < rho_max: sol = sopt.minimize(_func, w_est, method='L-BFGS-B', jac=True, bounds=bnds) w_new = sol.x h_new, _ = _h(_adj(w_new)) if h_new > 0.25 * h: rho *= 10 else: break w_est, h = w_new, h_new alpha += rho * h if h <= h_tol or rho >= rho_max: break W_est = _adj(w_est) W_est[np.abs(W_est) < w_threshold] = 0 return (W_est != 0).astype(int), _func(w_est)[0]

def test(model, dataloader):
loss_func = nn.MSELoss(reduction='mean')
device = get_device()

text
test_loss_list = [] now = time.time() t_test_predicted_list = [] t_test_ground_list = [] t_test_labels_list = [] test_len = len(dataloader) model.eval() i = 0 acu_loss = 0 # FIX 6: mslSegLoader yields (x, labels) — 2 items, not 4 for x, labels in dataloader: x = x.float().to(device) # (batch, win_size, node_num) e.g. (1, 27, 27) labels = labels.float().to(device) # (batch, win_size, 1) with torch.no_grad(): # FIX 8: GDN expects (batch, node_num, win_size); permute from time-first layout x_model = x.permute(0, 2, 1).contiguous() # (batch, 27, 27) batch_, node_, window_ = x_model.shape # e.g. 1, 27, 27 # Derive ground-truth regression target: last timestep sensor readings y = x[:, -1, :] # (batch, node_num) = (1, 27) # Notears input: (batch*win_size, node_num) — time-first reshape from original x x_num = x.reshape(batch_ * window_, node_) # (27, 27) x_input = x_num.cpu().numpy() nt = Notears() nt.learn(x_input) yinguoout = nt.predict_matrix # (27, 27) causal adjacency matrix # FIX 7 + FIX 8: use permuted x_model; org_edge_index=None (GDN uses self.edge_index_sets) predicted = model(x_model, None)[0].float().to(device) # (batch, node_num) loss = loss_func(predicted, y) # Collect labels: take anomaly label at last timestep, expand per-node # labels shape: (batch, win_size, 1) → last step → (batch,) → (batch, node_num) last_labels = labels[:, -1, 0] # (batch,) last_labels = last_labels.unsqueeze(1).repeat(1, predicted.shape[1]) # (batch, node_num) if len(t_test_predicted_list) <= 0: t_test_predicted_list = predicted t_test_ground_list = y t_test_labels_list = last_labels else: t_test_predicted_list = torch.cat((t_test_predicted_list, predicted), dim=0) t_test_ground_list = torch.cat((t_test_ground_list, y), dim=0) t_test_labels_list = torch.cat((t_test_labels_list, last_labels), dim=0) test_loss_list.append(loss.item()) acu_loss += loss.item() i += 1 if i % 10000 == 1 and i > 1: print(timeSincePlus(now, i / test_len)) test_predicted_list = t_test_predicted_list.tolist() test_ground_list = t_test_ground_list.tolist() test_labels_list = t_test_labels_list.tolist() avg_loss = sum(test_loss_list) / len(test_loss_list) return avg_loss, [test_predicted_list, test_ground_list, test_labels_list]import numpy as np

np.set_printoptions(threshold=np.inf)
import torch
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import time
from util.time import *
from util.env import *
'''from torch_geometric.nn import GCNConv, GATConv, EdgeConv'''
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '2'
import math
import torch.nn.functional as F
from graph_layer import GraphLayer
from tcn import TemporalConvNet
from encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack
from attn import FullAttention, ProbAttention, AttentionLayer
from torch.nn import Sequential as Seq, Linear, ReLU, Parameter
from torch_geometric.nn import MessagePassing, GCNConv
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.nn.inits import glorot, zeros
from tconv import TemporalBlock
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class AdaGCNConv(MessagePassing):
def init(self, num_nodes, in_channels, out_channels, improved=False,
add_self_loops=False, normalize=True, bias=True, init_method='all'):
super(AdaGCNConv, self).init(aggr='add', node_dim=0) # "Max" aggregation.
self.num_nodes = num_nodes
self.in_channels = in_channels
self.out_channels = out_channels
self.improved = improved
self.add_self_loops = add_self_loops
self.normalize = normalize
self.bias = bias
self.init_method = init_method

text
self.weight = Parameter(torch.Tensor(in_channels, out_channels)) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self._init_graph_logits_() self.reset_parameters() def _init_graph_logits_(self): if self.init_method == 'all': logits = .8 * torch.ones(self.num_nodes ** 2, 2) logits[:, 1] = 0 elif self.init_method == 'random': logits = 1e-3 * torch.randn(self.num_nodes ** 2, 2) elif self.init_method == 'equal': logits = .5 * torch.ones(self.num_nodes ** 2, 2) else: raise NotImplementedError('Initial Method %s is not implemented' % self.init_method) self.register_parameter('logits', Parameter(logits, requires_grad=True)) def reset_parameters(self): glorot(self.weight) zeros(self.bias) def forward(self, x, edge_index, edge_weight=None): # x has shape [N, in_channels] # edge_index has shape [2, E] if self.normalize: edge_index, edge_weight = gcn_norm( # yapf: disable edge_index, edge_weight, x.size(self.node_dim), self.improved, self.add_self_loops, dtype=x.dtype) z = torch.nn.functional.gumbel_softmax(self.logits, hard=True) x = torch.matmul(x, self.weight) # propagate_type: (x: Tensor, edge_weight: OptTensor) # out = self.propagate(edge_index.cpu(), x=x, edge_weight=edge_weight.cpu(), # size=None, z=z) out = self.propagate(edge_index, x=x, edge_weight=edge_weight, size=None, z=z) if self.bias is not None: out += self.bias return out def message(self, x_j, edge_weight, z): if edge_weight is None: return x_j * z[:, 0].contiguous().view([-1] + [1] * (x_j.dim() - 1)) else: return edge_weight.view([-1] + [1] * (x_j.dim() - 1)) * x_j * z[:, 0].contiguous().view( [-1] + [1] * (x_j.dim() - 1)) def __repr__(self): return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)

class GraphTemporalEmbedding(torch.nn.Module):
def init(self, num_nodes, seq_len, num_levels, kernel_size=3, dropout=0.02, device=torch.device('cuda:0')):
super(GraphTemporalEmbedding, self).init()
self.num_nodes = num_nodes
self.seq_len = seq_len
self.num_levels = num_levels
self.device = device
assert (kernel_size - 1) // 2

text
self.tc_modules = torch.nn.ModuleList([]) self.gc_module = AdaGCNConv(num_nodes, seq_len, seq_len) self.gc_modules=torch.nn.ModuleList([]) for i in range(num_levels): dilation_size = 2 ** i self.tc_modules.extend( [TemporalBlock(num_nodes, num_nodes, kernel_size=kernel_size, stride=1, dilation=dilation_size, padding=(kernel_size - 1) * dilation_size // 2, dropout=dropout)]) self.gc_modules.extend([AdaGCNConv(num_nodes, seq_len, seq_len)]) source_nodes, target_nodes = [], [] for i in range(num_nodes): for j in range(num_nodes): source_nodes.append(j) target_nodes.append(i) self.edge_index = torch.tensor([source_nodes, target_nodes], dtype=torch.long, device=self.device) def forward(self, x): # >> (bsz, seq_len, num_nodes) #x = x.permute(0, 2, 1) # >> (bsz, num_nodes, seq_len) x = self.tc_modules[0](x) # >> (bsz, num_nodes, seq_len) x = self.gc_modules[0](x.transpose(0, 1), self.edge_index).transpose(0, 1) # >> (bsz, num_nodes, seq_len) # output = x for i in range(1, self.num_levels): x = self.tc_modules[i](x) # >> (bsz, num_nodes, seq_len) x = self.gc_module(x.transpose(0, 1), self.edge_index).transpose(0, 1) # >> (bsz, num_nodes, seq_len) # output += x # return output.transpose(1, 2) # >> (bsz, seq_len, num_nodes) return x.transpose(1, 2)

def get_batch_edge_index(org_edge_index, batch_num, node_num):
# org_edge_index:(2, edge_num)
edge_index = org_edge_index.clone().detach()
edge_num = org_edge_index.shape[1]
batch_edge_index = edge_index.repeat(1,batch_num).contiguous()

text
for i in range(batch_num): batch_edge_index[:, i*edge_num:(i+1)*edge_num] += i*node_num return batch_edge_index.long()

class GRUModel(nn.Module):

text
def __init__(self, input_num, hidden_num, output_num): super(GRUModel, self).__init__() self.hidden_size = hidden_num # 这里设置了 batch_first=True, 所以应该 inputs = inputs.view(inputs.shape[0], -1, inputs.shape[1]) # 针对时间序列预测问题,相当于将时间步(seq_len)设置为 1。 self.GRU_layer = nn.GRU(input_size=input_num, hidden_size=hidden_num, batch_first=True) self.output_linear = nn.Linear(hidden_num, output_num) self.hidden = None def forward(self, x): # h_n of shape (num_layers * num_directions, batch, hidden_size) # 这里不用显式地传入隐层状态 self.hidden x, self.hidden = self.GRU_layer(x) x = self.output_linear(x) return x, self.hidden

class TemporalBlocknana(nn.Module):
def init(self,in_channel,out_channel):
super(TemporalBlocknana,self).init()
self.temporal1=nn.Sequential(
nn.Conv1d(in_channel,out_channel,kernel_size=3,padding=1),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
self.temporal2 = nn.Sequential(
nn.Conv1d(in_channel, out_channel, kernel_size=5, padding=2),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
self.temporal3 = nn.Sequential(
nn.Conv1d(in_channel, out_channel, kernel_size=7, padding=3),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
self.temporal4 = nn.Sequential(
nn.Conv1d(in_channel, out_channel, kernel_size=9, padding=4),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
def forward(self,x):
x1=self.temporal1(x)
x2=self.temporal2(x)
x3=self.temporal3(x)
x4=self.temporal4(x)
x_out = (x1+x2+x3+x4)/4
return x_out

class OutLayer(nn.Module):
def init(self, in_num, node_num, layer_num, inter_num = 512):
super(OutLayer, self).init()

text
modules = [] for i in range(layer_num): # last layer, output shape:1 if i == layer_num-1: modules.append(nn.Linear( in_num if layer_num == 1 else inter_num, 1)) else: layer_in_num = in_num if i == 0 else inter_num modules.append(nn.Linear(layer_in_num, inter_num)) modules.append(nn.BatchNorm1d(inter_num)) modules.append(nn.ReLU()) self.mlp = nn.ModuleList(modules) def forward(self, x): out = x for mod in self.mlp: if isinstance(mod, nn.BatchNorm1d): out = out.permute(0,2,1) out = mod(out) out = out.permute(0,2,1) else: out = mod(out) return out

class GNNLayer(nn.Module):
def init(self, in_channel, out_channel, inter_dim=0, heads=1, node_num=100):
super(GNNLayer, self).init()

text
self.gnn = GraphLayer(in_channel, out_channel, inter_dim=inter_dim, heads=heads, concat=False) self.bn = nn.BatchNorm1d(out_channel) self.relu = nn.ReLU() self.leaky_relu = nn.LeakyReLU() def forward(self, x, edge_index, embedding=None, node_num=0): out, (new_edge_index, att_weight) = self.gnn(x, edge_index, embedding, return_attention_weights=True) self.att_weight_1 = att_weight self.edge_index_1 = new_edge_index out = self.bn(out) return self.relu(out),self.att_weight_1,self.edge_index_1

class GDN(nn.Module):
def init(self, edge_index_sets, node_num, dim=64, out_layer_inter_dim=256,
input_dim=10, out_layer_num=1, topk=20,
factor=5, d_model=10, n_heads=8, e_layers=3,d_ff=256,
dropout=0.001, distil=True, activation='gelu', output_attention = False):

text
super(GDN, self).__init__() self.edge_index_sets = edge_index_sets device = get_device() edge_index = edge_index_sets[0] embed_dim = dim self.embedding = nn.Embedding(node_num, embed_dim) self.bn_outlayer_in = nn.BatchNorm1d(embed_dim) # Attention #Attn = ProbAttention # Encoder # self.encoder = Encoder( # [ # EncoderLayer( # AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention), # d_model, n_heads, mix=False), # d_model, # d_ff, # dropout=dropout, # activation=activation # ) for l in range(e_layers) # ], # [ # ConvLayer( # d_model # ) for l in range(e_layers - 1) # ] if distil else None, # norm_layer=torch.nn.LayerNorm(d_model) # ) edge_set_num = len(edge_index_sets) self.gnn_layers = nn.ModuleList([ GNNLayer(input_dim, dim, inter_dim=dim+embed_dim, heads=1) for i in range(edge_set_num) ]) # self.temporalblocknana=TemporalBlocknana(node_num,node_num) # self.tcnmodel = TemporalConvNet(num_inputs=15,num_channels=[25,25,25,25,25,25,15], kernel_size=7, dropout=0.05) self.gte = GraphTemporalEmbedding(10, 15, 3) self.node_embedding = None self.topk = topk self.learned_graph = None self.out_layer = OutLayer(dim*edge_set_num, node_num, out_layer_num, inter_num = out_layer_inter_dim) self.cache_edge_index_sets = [None] * edge_set_num self.cache_embed_index = None self.dp = nn.Dropout(0.2) # self.FC=nn.Linear(4, 15) # self.conv1 = nn.Conv1d(in_channels=256, out_channels=128,kernel_size=1) # self.conv1_=nn.Conv1d(in_channels=24, out_channels=12,kernel_size=1) # self.conv1__ = nn.Conv1d(in_channels=58, out_channels=29, kernel_size=1) # self.conv1___ = nn.Conv1d(in_channels=46, out_channels=23, kernel_size=1) # self.conv1____ = nn.Conv1d(in_channels=62, out_channels=31, kernel_size=1) # self.conv1a = nn.Conv1d(in_channels=228, out_channels=114, kernel_size=1) # self.conv1b=nn.Conv1d(in_channels=204,out_channels=102,kernel_size=1) self.init_params() def init_params(self): nn.init.kaiming_uniform_(self.embedding.weight, a=math.sqrt(5)) def forward(self, data, org_edge_index): x = data.clone().detach() edge_index_sets = self.edge_index_sets device = data.device batch_num, node_num, all_feature = x.shape '''x = x.permute(0, 2, 1) enc_out, attns = self.encoder(x, attn_mask=None) x=enc_out.permute(0,2,1) x=self.FC(x)''' #x=self.gte(x) #x=self.temporalblocknana(x) '''x=x.permute(0, 2, 1) GRUModell = GRUModel(node_num, batch_num, 256).to(device) GRUModell2 = GRUModel(256, batch_num, node_num).to(device) x1,h1=GRUModell(x) x,h2=GRUModell2(x1) #x = self.tcnmodel(x) x = x.permute(0, 2, 1)''' #x = x.permute(0, 2, 1) x = x.reshape(-1, all_feature).contiguous() gcn_outs = [] causal=[] for i, edge_index in enumerate(edge_index_sets): edge_num = edge_index.shape[1] cache_edge_index = self.cache_edge_index_sets[i] if cache_edge_index is None or cache_edge_index.shape[1] != edge_num*batch_num: self.cache_edge_index_sets[i] = get_batch_edge_index(edge_index, batch_num, node_num).to(device) batch_edge_index = self.cache_edge_index_sets[i] all_embeddings = self.embedding(torch.arange(node_num).to(device)) weights_arr = all_embeddings.detach().clone() all_embeddings = all_embeddings.repeat(batch_num, 1) weights = weights_arr.view(node_num, -1) cos_ji_mat = torch.matmul(weights, weights.T) normed_mat = torch.matmul(weights.norm(dim=-1).view(-1,1), weights.norm(dim=-1).view(1,-1)) cos_ji_mat = cos_ji_mat / normed_mat dim = weights.shape[-1] topk_num = self.topk topk_indices_ji = torch.topk(cos_ji_mat, topk_num, dim=-1)[1] self.learned_graph = topk_indices_ji # FIX: replace deprecated .T on non-2D tensor with view() gated_i = torch.arange(0, node_num).view(-1, 1).repeat(1, topk_num).flatten().to(device).unsqueeze(0) gated_j = topk_indices_ji.flatten().unsqueeze(0) gated_edge_index = torch.cat((gated_j, gated_i), dim=0) batch_gated_edge_index = get_batch_edge_index(gated_edge_index, batch_num, node_num).to(device) gcn_out, attentionweight, bianbian = self.gnn_layers[i](x, batch_gated_edge_index, node_num=node_num*batch_num, embedding=all_embeddings) #causal_juhe=self.gnn_layers[i](x,batch_edge_index,node_num=node_num*batch_num,embedding=all_embeddings) # print(attentionweight.squeeze().detach().cpu().numpy()) # print(bianbian.detach().cpu().numpy()) gcn_outs.append(gcn_out) #causal.append(causal_juhe) x = torch.cat(gcn_outs,dim=1) # x_causal = torch.cat(causal,dim=1) # x_fusion=torch.cat((x,x_causal),dim=0) # x_fusion_input=x_fusion.view(-1,2*batch_num,node_num) # print(batch_num) # if batch_num==128: # x_ronghe=self.conv1(x_fusion_input) # elif batch_num==12: # x_ronghe=self.conv1_(x_fusion_input) # elif batch_num==31: # x_ronghe=self.conv1____(x_fusion_input) # elif batch_num == 29: # x_ronghe = self.conv1__(x_fusion_input) # elif batch_num == 23: # x_ronghe = self.conv1___(x_fusion_input) # elif batch_num == 102: # x_ronghe = self.conv1b(x_fusion_input) # else: # x_ronghe = self.conv1a(x_fusion_input) # # x=x_ronghe.permute(1,2,0) x = x.view(batch_num, node_num, -1) indexes = torch.arange(0,node_num).to(device) out = torch.mul(x, self.embedding(indexes)) out = out.permute(0,2,1) out = F.relu(self.bn_outlayer_in(out)) out = out.permute(0,2,1) out = self.dp(out) out = self.out_layer(out) out = out.view(-1, node_num) return out, cos_ji_mattest: (2049, 27)

train: (1565, 27)
test: (2049, 27)
train: (1565, 27)
test: (2049, 27)
train: (1565, 27)
/home/ubuntu/anaconda3/envs/chafen/lib/python3.11/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
WeightNorm.apply(module, name, dim)
iterator : 0.0000, optimization objective: 0.8663418889045715

iterator : 0.0000, optimization objective: 0.8094824552536011

iterator : 0.0000, optimization objective: 0.8071750998497009

iterator : 0.0000, optimization objective: 0.8322016596794128

iterator : 0.0000, optimization objective: 0.940419614315033

iterator : 0.0000, optimization objective: 0.8364759683609009

iterator : 0.0000, optimization objective: 0.8436645269393921

iterator : 0.0000, optimization objective: 0.933905303478241

iterator : 0.0000, optimization objective: 0.8951370716094971

iterator : 0.0000, optimization objective: 0.9112871289253235

iterator : 0.0000, optimization objective: 0.8398826122283936

iterator : 0.0000, optimization objective: 0.9027136564254761

iterator : 0.0000, optimization objective: 0.6421583294868469

epoch (0 / 100) (Loss:0.28361142)
iterator : 0.0000, optimization objective: 0.7916588187217712

iterator : 0.0000, optimization objective: 0.894771158695221

iterator : 0.0000, optimization objective: 0.8909013867378235

iterator : 0.0000, optimization objective: 0.8222718238830566

iterator : 0.0000, optimization objective: 0.8284629583358765

iterator : 0.0000, optimization objective: 0.9187648892402649

iterator : 0.0000, optimization objective: 0.8319133520126343

iterator : 0.0000, optimization objective: 0.8192020058631897

iterator : 0.0000, optimization objective: 0.7168318033218384

iterator : 0.0000, optimization objective: 0.8224521279335022

iterator : 0.0000, optimization objective: 0.8410629630088806

iterator : 0.0000, optimization objective: 0.7462283968925476

iterator : 0.0000, optimization objective: 0.638600766658783

epoch (1 / 100) (Loss:0.27084929)为什么不是一次迭代求因果而是多次呀

Pensamento

Explaining multiple iterations in training and optimization

It seems like the user is asking about why there are multiple iterations, rather than one for causal computation. I'll explain that the model has two levels: training batches and internal optimization. Each batch in the train data calls nt.learn, which happens multiple times across epochs, including during validation/test runs. I’ll suggest computing causal once per epoch if desired.

Thought for 10s

Resposta

因为你现在的代码里 NOTEARS 求因果是在每个 batch 里执行一次,不是整个训练过程只执行一次。

关键位置在 train() 里:

python
for i_epoch in range(epoch): ... for x, labels in train_dataloader: ... for iterator in range(1): nt = Notears() nt.learn(x_input, yilaitu)

虽然这里写的是:

python
for iterator in range(1):

它确实只循环 1 次,所以打印永远是:

text
iterator : 0.0000

但是外面还有:

python
for x, labels in train_dataloader:

所以每来一个 batch,就会重新执行一次:

python
nt = Notears() nt.learn(x_input, yilaitu)

你的训练集大小是:

text
train: (1565, 27)

batch size 是 128,所以每个 epoch 大约有:

text
ceil(1565 / 128) = 13 个 batch

因此你看到每个 epoch 里大概打印 13 次:

text
iterator : 0.0000, optimization objective: ...

这不是 iterator 多次迭代,而是 13 个 batch 分别求了一次因果图


另外,Notears() 本身内部也不是一步完成的。它里面还有优化循环:

python
for i in range(max_iter): while rho < rho_max: sol = sopt.minimize(_func, w_est, method='L-BFGS-B', jac=True, bounds=bnds)

所以从计算角度看,每次 nt.learn() 里面又会调用 scipy.optimize.minimize() 做多轮优化。

因此你现在实际上是:

text
每个 epoch 每个 batch 求一次 NOTEARS 因果图 用这个因果图算 PageRank 用 PageRank 加权 GDN 输出 更新 GDN 参数

如果你想 只求一次因果图,就不要把 nt.learn() 放在 batch 循环里面。可以在训练前先算一次因果图,然后训练时复用。

比如改成这种思路:

python
# 在 epoch 循环前,只求一次因果图 first_x, first_labels = next(iter(train_dataloader)) first_x = first_x.float().to(device) batch_, window_, node_ = first_x.shape x_input = first_x.reshape(batch_ * window_, node_).cpu().numpy() yilaitu = np.zeros((node_, node_)) nt = Notears() nt.learn(x_input, yilaitu) fixed_causal_matrix = torch.from_numpy(nt.predict_matrix).float().to(device) adjacency = np.abs(nt.predict_matrix.T) pagerank = PageRank() if adjacency.sum() < 1e-6: fixed_scores = np.ones(node_) / node_ else: pagerank.fit(adjacency) fixed_scores = pagerank.scores_ fixed_scores = torch.from_numpy(fixed_scores).float().to(device)

然后在 batch 循环里删掉:

python
nt = Notears() nt.learn(x_input, yilaitu) ... scores = torch.from_numpy(scores).to(device)

直接用:

python
scores = fixed_scores

训练时:

python
model_outputs = model(x_model, None) out = model_outputs[0].float().to(device) result = out * scores.unsqueeze(0).to(out.dtype) loss = loss_func(result, y)

这样就是 训练前求一次因果图,后面所有 batch 共用这个因果图/PageRank 权重


不过要注意一点:你当前代码里有这句:

python
yilaitu = model_outputs[1].cpu().numpy()

也就是说你现在的设计像是一个交替更新过程:

text
NOTEARS 因果图 ← 使用上一次 GDN 学到的图作为先验 GDN 图结构 ← 根据当前 batch 更新

所以它不是单纯“求一次因果”,而是在做一种 batch-level 的动态因果/图结构交替学习

如果你的论文或方法目标是“动态因果图”,那每个 batch 求一次是符合这个思路的;
如果你的目标是“全局固定因果图”,那就应该把 NOTEARS 移到训练循环外面,只计算一次。

Compartilhar esta Q&A