# -*- coding: utf-8 -*- import pandas as pd import...
Erstellt am: 17. Juni 2026
Beantwortet mit GPT-5.5 Thinking von Chat01
Erstellt am: 17. Juni 2026
Beantwortet mit GPT-5.5 Thinking von Chat01
import pandas as pd
import numpy as np
import torch
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, random_split, Subset
from sklearn.preprocessing import MinMaxScaler,MaxAbsScaler,StandardScaler
from util.env import get_device, set_device
from util.preprocess import build_loc_net, construct_data
from util.net_struct import get_feature_map, get_fc_graph_struc
from util.iostream import printsep
from datasets.TimeDataset import TimeDataset
#from models.GDN import GDN
from GDN import GDN
from train import train
from test import test
from evaluate import get_err_scores, get_best_performance_data, get_val_performance_data, get_full_err_scores
import sys
from datetime import datetime
import time
import os
import argparse
from pathlib import Path
import matplotlib.pyplot as plt
from dataset import STVSegLoader
from dataset import mslSegLoader
import json
import random
#os.environ['CUDA_LAUNCH_BLOCKING'] = '2'
class Main():
def init(self, train_config, env_config, debug=False):
textself.train_config = train_config self.env_config = env_config self.datestr = None dataset = self.env_config['dataset'] #train_orig_1 = pd.read_csv(f'./data/{dataset}/train.csv', sep=',', index_col=0, nrows=1000) train_orig_1 = pd.read_csv(f'./data/{dataset}/train.csv', sep=',', index_col=0) test_orig_1 = pd.read_csv(f'./data/{dataset}/test.csv', sep=',', index_col=0) # mm = MinMaxScaler() # train_orig= mm.fit_transform(train_orig_1) # test_orig=mm.fit_transform(test_orig_1) # train_orig=pd.DataFrame(train_orig,columns=["docker_001","docker_002","docker_003","docker_004","docker_005","docker_006","docker_007","docker_008","os_021","os_022"]) # test_orig=pd.DataFrame(test_orig,columns=["docker_001","docker_002","docker_003","docker_004","docker_005","docker_006","docker_007","docker_008","os_021","os_022","label"]) if 'label' in test_orig_1.columns: test_orig_2 = test_orig_1.drop(columns=['label']) # mm=StandardScaler() # train_orig = mm.fit_transform(train_orig_1) # test_orig = mm.fit_transform(test_orig_2) # train_orig = pd.DataFrame(train_orig, # columns=["docker_001", "docker_002", "docker_003", "docker_004", "docker_005", # "docker_006", "docker_007", "docker_008", "os_021", "os_022"]) # test_orig_guiyi = pd.DataFrame(test_orig, # columns=["docker_001", "docker_002", "docker_003", "docker_004", "docker_005", # "docker_006", "docker_007", "docker_008", "os_021", "os_022"]) #test_orig=pd.concat([test_orig,test_orig_1[['label']]],axis=1) train, test = train_orig_1, test_orig_1 feature_map = get_feature_map(dataset) fc_struc = get_fc_graph_struc(dataset) '''fc_struc_msl = {'M-6': ['P-10','T-5','P-15','D-14','P-14','T-13'], 'M-1': ['T-4','M-5','C-1','T-12','F-4','D-14','D-16','M-2','S-2','M-3','M-4','P-16','F-8'], 'M-2': ['M-1','M-5', 'C-1', 'T-12', 'D-14','T-9','T-8','D-15','M-3','P-15','C-2','P-16','M-7'], 'S-2': ['M-1','P-10','T-4','T-5','M-5','C-1','C-2','T-12','F-4','D-14','T-9','P-14','T-8','D-16','M-3','P-15','T-13','F-5'], 'P-10': ['D-14','M-6','S-2','C-2','T-13','P-16','M-7'], 'T-4':['M-1','S-2','F-7','M-3','P-15','C-2','T-12','F-4','M-7','M-8'], 'T-5':['M-6','S-2','C-2','T-13'], 'F-7': ['T-4','T-12','T-9','T-8'], 'M-3': ['M-1','M-2','S-2','T-4','M-5','C-1','T-12','F-4','F-5','D-14','T-9','T-8','D-16','M-4','P-15','P-16','M-7'], 'M-4': ['M-1','M-3','T-12','F-4','T-9','T-8','D-15','M-7'], 'M-5':['M-1','M-2','S-2','M-3','P-15','C-2','T-12','F-4','F-5','T-8','P-16','M-7','F-8'], 'P-15': ['M-2','S-2','T-4','M-3','M-5','C-1','C-2','T-12','F-4','F-5','D-14','T-9','T-8','P-16','D-15','M-6','T-13'], 'C-1':['M-1','M-2','S-2','M-3','P-15','C-2','T-12','T-13','F-4','F-5','T-8','P-16','M-7','F-8'], 'C-2': ['M-2','P-10','T-4','T-5','M-5','C-1','T-12','F-4','D-14','T-8','D-15','S-2','P-15','T-13','F-5','P-16'], 'T-12': ['T-4','M-5','C-1','D-14','T-8','M-1','M-2','S-2','F-7','M-3','M-4','P-15','C-2','T-13','F-4','F-5','T-9','P-16','D-16','M-7','F-8'], 'T-13': ['M-6','S-2','P-10','T-5','P-15','C-1','C-2','T-12','F-4','D-14','T-9','P-14','T-8'], 'F-4': ['T-4','M-5','C-1','T-12','D-14','T-9','T-8','M-1','S-2','M-3','M-4','P-15','C-2','T-13','F-5','P-16','M-7','F-8'], 'F-5': ['S-2','M-5','C-1','C-2','T-12','F-4','D-14','T-9','M-3','P-15'], 'D-14':['M-6','M-1','M-2','S-2','P-10','M-3','P-15','C-2','T-12','T-13','F-4','F-5','T-8','P-16','M-7','F-8'], 'T-9': ['T-12','M-2','S-2','F-7','M-3','M-4','P-15','T-13','F-4','F-5','P-16','D-16','M-7'], 'P-14':['M-6','S-2','T-13'], 'T-8': ['M-5','C-1','D-14','D-15','M-2','S-2','F-7','M-3','M-4','P-15','C-2','T-12','T-13','F-4','M-7'], 'P-16': ['M-1','M-2','P-10','M-3','M-5','C-1','C-2','T-12','F-4','D-14','T-9','D-15','D-16','P-15'], 'D-15':['M-2','M-4','P-15','C-2','T-8','P-16','M-7'], 'D-16': ['T-12','T-9','M-1','S-2','M-3','P-16','M-7','F-8','M-1','S-2'], 'M-7': ['M-2','P-10','T-4','M-3','M-4','M-5','C-1','T-12','F-4','D-14','T-9','T-8','D-15','D-16','F-8'], 'F-8':['M-1','T-4','M-5','C-1','T-12','F-4','D-14','D-16','M-7'] }''' # fc_struc_msl ={'docker_001':['docker_006','docker_008'], # 'docker_002':['docker_001','docker_006','docker_007','docker_008'], # 'docker_003':['docker_001','docker_004','docker_006'], # 'docker_004':['docker_001','docker_002','docker_006','docker_008'], # 'docker_005':['docker_001','docker_002','docker_003','docker_004','docker_006','docker_007','docker_008','os_021','os_022'], # 'docker_007':['docker_006'],'docker_008':['docker_006','docker_007'], # 'os_021':['docker_001','docker_002','docker_003','docker_004','docker_006','docker_007','docker_008','os_022'], # 'os_022':['docker_001','docker_002','docker_003','docker_004','docker_006','docker_007','docker_008']} set_device(env_config['device']) self.device = get_device() fc_edge_index = build_loc_net(fc_struc, list(train.columns), feature_map=feature_map) fc_edge_index = torch.tensor(fc_edge_index, dtype=torch.long) # fc_edge_index_msl = build_loc_net(fc_struc_msl, list(train.columns), feature_map=feature_map) # fc_edge_index_msl = torch.tensor(fc_edge_index_msl, dtype=torch.long) self.feature_map = feature_map train_dataset_indata = construct_data(train, feature_map, labels=0) test_dataset_indata = construct_data(test, feature_map, labels=test.attack.tolist()) cfg = { 'slide_win': train_config['slide_win'], 'slide_stride': train_config['slide_stride'], } train_dataset = TimeDataset(train_dataset_indata, fc_edge_index, mode='train', config=cfg) # train_dataset_msl = TimeDataset(train_dataset_indata, fc_edge_index_msl, mode='train', config=cfg) test_dataset = TimeDataset(test_dataset_indata, fc_edge_index, mode='test', config=cfg) train_dataloader, val_dataloader = self.get_loaders(train_dataset, train_config['seed'], train_config['batch'], val_ratio=train_config['val_ratio']) self.train_dataset = train_dataset self.test_dataset = test_dataset self.train_dataloader = DataLoader(mslSegLoader(win_size=27, flag="train"), batch_size=128, shuffle=True) self.val_dataloader = DataLoader(mslSegLoader(win_size=27, flag="test"), batch_size=1, shuffle=False) self.test_dataloader = DataLoader(mslSegLoader(win_size=27, flag="test"), batch_size=1, shuffle=False) # self.train_dataloader = train_dataloader # self.val_dataloader = val_dataloader # self.test_dataloader = DataLoader(test_dataset, batch_size=train_config['batch'], # shuffle=False, num_workers=0) # self.test_dataloader = DataLoader(test_dataset, batch_size=128, # shuffle=False, num_workers=0) edge_index_sets = [] edge_index_sets.append(fc_edge_index) self.model = GDN(edge_index_sets, len(feature_map), dim=train_config['dim'], input_dim=train_config['slide_win'], out_layer_num=train_config['out_layer_num'], out_layer_inter_dim=train_config['out_layer_inter_dim'], topk=train_config['topk'] ).to(self.device) def run(self): if len(self.env_config['load_model_path']) > 0: model_save_path = self.env_config['load_model_path'] else: model_save_path = self.get_save_path()[0] tmana1 = time.perf_counter() self.train_log = train(self.model, model_save_path, config=train_config, train_dataloader=self.train_dataloader, val_dataloader=self.val_dataloader, feature_map=self.feature_map, test_dataloader=self.test_dataloader, test_dataset=self.test_dataset, train_dataset=self.train_dataset, dataset_name=self.env_config['dataset'] ) tmana2 = time.perf_counter() yongshi = tmana2-tmana1 print('CausalTrace+程序运行时间:%s毫秒' % (int(yongshi * 1000))) # test self.model.load_state_dict(torch.load(model_save_path)) best_model = self.model.to(self.device) T00 = time.perf_counter() _, self.test_result = test(best_model, self.test_dataloader) T11 = time.perf_counter() yongshi2 = T11 - T00 print('CausalTrace+程序测试时间:%s毫秒' % (int(yongshi2 * 1000))) _, self.val_result = test(best_model, self.val_dataloader) self.get_score(self.test_result, self.val_result) def get_loaders(self, train_dataset, seed, batch, val_ratio=0.1): dataset_len = int(len(train_dataset)) train_use_len = int(dataset_len * (1 - val_ratio)) val_use_len = int(dataset_len * val_ratio) val_start_index = random.randrange(train_use_len) indices = torch.arange(dataset_len) train_sub_indices = torch.cat([indices[:val_start_index], indices[val_start_index + val_use_len:]]) train_subset = Subset(train_dataset, train_sub_indices) val_sub_indices = indices[val_start_index:val_start_index + val_use_len] val_subset = Subset(train_dataset, val_sub_indices) train_dataloader = DataLoader(train_subset, batch_size=batch, shuffle=True) val_dataloader = DataLoader(val_subset, batch_size=batch, shuffle=False) return train_dataloader, val_dataloader def get_score(self, test_result, val_result): feature_num = len(test_result[0][0]) np_test_result = np.array(test_result) np_val_result = np.array(val_result) test_labels = np_test_result[2, :, 0].tolist() test_scores, normal_scores = get_full_err_scores(test_result, val_result) top1_best_info = get_best_performance_data(test_scores, test_labels, topk=1) top1_val_info = get_val_performance_data(test_scores, normal_scores, test_labels, topk=1) print('=========================** Result **============================\n') info = None if self.env_config['report'] == 'best': info = top1_best_info elif self.env_config['report'] == 'val': info = top1_val_info print(f'F1 score: {info[0]}') print(f'precision: {info[1]}') print(f'recall: {info[2]}\n') print(f'ROC-AUC: {info[3]}') print(f'PR-AUC: {info[4]}\n') print(f'Threshold:{info[5]}\n') # with open("chuntian.txt", "a") as f: # f.write('trace+hinge+yuzhi0.5+0') # f.write('\n') # f.write('F1 score: {}'.format(info[0])) # f.write('\n') # f.write('precision: {}'.format(info[1])) # f.write('\n') # f.write('recall: {}'.format(info[2])) # f.write('\n') # f.write('AUC:{}'.format(info[3])) # f.write('\n') # f.write('threshold:{}'.format(info[4])) # f.write('\n') def get_save_path(self, feature_name=''): dir_path = self.env_config['save_path'] if self.datestr is None: now = datetime.now() self.datestr = now.strftime('%m|%d-%H:%M:%S') datestr = self.datestr paths = [ f'./pretrained/{dir_path}/best_{datestr}.pt', f'./results/{dir_path}/{datestr}.csv', ] for path in paths: dirname = os.path.dirname(path) Path(dirname).mkdir(parents=True, exist_ok=True) return paths
if name == "main":
parser = argparse.ArgumentParser()
textparser.add_argument('-batch', help='batch size', type=int, default=512) parser.add_argument('-epoch', help='train epoch', type=int, default=100) parser.add_argument('-slide_win', help='slide_win', type=int, default=27) parser.add_argument('-dim', help='dimension', type=int, default=64) parser.add_argument('-slide_stride', help='slide_stride', type=int, default=5) parser.add_argument('-save_path_pattern', help='save path pattern', type=str, default='') parser.add_argument('-dataset', help='wadi / swat', type=str, default='msl') parser.add_argument('-device', help='cuda / cpu', type=str, default=torch.device('cuda:0')) parser.add_argument('-random_seed', help='random seed', type=int, default=2023) parser.add_argument('-comment', help='experiment comment', type=str, default='') parser.add_argument('-out_layer_num', help='outlayer num', type=int, default=1) parser.add_argument('-out_layer_inter_dim', help='out_layer_inter_dim', type=int, default=256) parser.add_argument('-decay', help='decay', type=float, default=0) parser.add_argument('-val_ratio', help='val ratio', type=float, default=0.1) parser.add_argument('-topk', help='topk num', type=int, default=27) parser.add_argument('-report', help='best / val', type=str, default='best') parser.add_argument('-load_model_path', help='trained model path', type=str, default='') print("jianbing") args = parser.parse_args() random.seed(args.random_seed) np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True os.environ['PYTHONHASHSEED'] = str(args.random_seed) train_config = { 'batch': args.batch, 'epoch': args.epoch, 'slide_win': args.slide_win, 'dim': args.dim, 'slide_stride': args.slide_stride, 'comment': args.comment, 'seed': args.random_seed, 'out_layer_num': args.out_layer_num, 'out_layer_inter_dim': args.out_layer_inter_dim, 'decay': args.decay, 'val_ratio': args.val_ratio, 'topk': args.topk, } env_config = { 'save_path': args.save_path_pattern, 'dataset': args.dataset, 'report': args.report, 'device': args.device, 'load_model_path': args.load_model_path } main = Main(train_config, env_config, debug=False) main.run()
import numpy as np
import torch
import matplotlib.pyplot as plt
import torch.nn as nn
import time
from util.time import *
from util.env import *
from sklearn.metrics import mean_squared_error
from stocBiO import *
from test import *
import torch.nn.functional as F
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sknetwork.ranking import PageRank
from evaluate import get_best_performance_data, get_val_performance_data, get_full_err_scores
from sklearn.metrics import precision_score, recall_score, roc_auc_score, f1_score
from torch.utils.data import DataLoader, random_split, Subset
from scipy.stats import iqr
import logging
import pandas as pd
import scipy.optimize as sopt
logging.basicConfig(level=logging.INFO, format='%(asctime)s%(message)s')
class BaseLearner(object):
def init(self):
self._causal_matrix = None
textdef learn(self, data, *args, **kwargs): raise NotImplementedError @property def causal_matrix(self): return self._causal_matrix @causal_matrix.setter def causal_matrix(self, value): self._causal_matrix
class Tensor(object):
def init(self, indata):
self._set_tensor(indata)
textdef _set_tensor(self, indata): if isinstance(indata, np.ndarray): if indata.ndim == 2 or indata.ndim == 3: self.data = indata else: raise TypeError("Input numpy.ndarray ndim error!") elif isinstance(indata, pd.DataFrame): self.data = indata.values else: raise TypeError("Input data is not numpy.ndarray or pd.DataFrame")
class Notears(BaseLearner):
def init(self, lambda1=0.1,
eta=0.001,
loss_type='l2',
max_iter=10,
h_tol=1e-8,
rho_max=1e+16,
w_threshold=0.05):
super().init()
self.lambda1 = lambda1
self.eta = eta
self.loss_type = loss_type
self.max_iter = max_iter
self.h_tol = h_tol
self.rho_max = rho_max
self.w_threshold = w_threshold
textdef learn(self, data, yilaitu): if isinstance(data, np.ndarray): X = data elif isinstance(data, Tensor): X = data.data else: raise TypeError( 'The type of data must be Tensor or numpy.ndarray, but got {}'.format(type(data))) predict_matrix, sunshi = self.notears_linear( X, yilaitu, lambda1=self.lambda1, eta=self.eta, loss_type=self.loss_type, max_iter=self.max_iter, h_tol=self.h_tol, rho_max=self.rho_max, w_threshold=self.w_threshold) self.predict_matrix = predict_matrix self.sunshi = sunshi def notears_linear(self, X, yilaitu, lambda1, eta, loss_type, max_iter, h_tol, rho_max, w_threshold): def _loss(W): M = X @ W if loss_type == 'l2': R = X - M loss = 0.5 / X.shape[0] * (R ** 2).sum() G_loss = -1.0 / X.shape[0] * X.T @ R return loss, G_loss def _h(W): M = np.eye(d) + W * W / d E = np.linalg.matrix_power(M, d - 1) h = (E.T * M).sum() - d G_h = E.T * W * 2 return h, G_h def _adj(w): return (w[:d * d] - w[d * d:]).reshape([d, d]) def sqrt_var(x): mean_x = x.mean(0) v = torch.norm(x - mean_x, p=2, dim=0) / ((x.shape[0] - 1) ** 0.5) return v def hinge(input, threshold=1.0, reduction='sum'): diff = F.relu(threshold - input) diff = diff ** 2 if reduction == 'sum': loss = diff.sum() elif reduction == 'mean': loss = diff.mean() return loss def cosine_similarity_loss(A, B): A = torch.from_numpy(A) B = torch.from_numpy(B) similarity = F.cosine_similarity(A, B) loss = 1 - similarity return loss def biased(A, B): A = torch.from_numpy(A) B = torch.from_numpy(B) diff = A - B norm_squared = torch.norm(diff, p=2) ** 2 return norm_squared def _func(w): W = _adj(w) loss, G_loss = _loss(W) h, G_h = _h(W) W_sparse = torch.from_numpy(W) hinge_loss = hinge(input=sqrt_var(W_sparse), threshold=0.5, reduction='sum') cosine = cosine_similarity_loss(W, yilaitu).mean() biased_regular = biased(W, yilaitu) obj = loss + 0.5 * rho * h * h + alpha * h + lambda1 * hinge_loss G_smooth = G_loss + (rho * h + alpha) * G_h g_obj = np.concatenate( (G_smooth + lambda1 + eta, -G_smooth + lambda1 + eta), axis=None) return obj, g_obj n, d = X.shape[0], X.shape[1] w_est, rho, alpha, h = np.zeros(2 * d * d), 1.0, 0.0, np.inf bnds = [(0, 0) if i == j else (0, None) for _ in range(2) for i in range(d) for j in range(d)] if loss_type == 'l2': X = X - np.mean(X, axis=0, keepdims=True) for i in range(max_iter): w_new, h_new = None, None while rho < rho_max: sol = sopt.minimize(_func, w_est, method='L-BFGS-B', jac=True, bounds=bnds) w_new = sol.x h_new, _ = _h(_adj(w_new)) if h_new > 0.25 * h: rho *= 10 else: break w_est, h = w_new, h_new alpha += rho * h if h <= h_tol or rho >= rho_max: break W_est = _adj(w_est) W_est[np.abs(W_est) < w_threshold] = 0 return (W_est != 0).astype(int), _func(w_est)[0]
def loss_func(y_pred, y_true):
# FIX 1: squeeze trailing dim if y_true is 3-D (batch, win_size, 1) → (batch, win_size)
if y_true.dim() == 3 and y_true.shape[-1] == 1:
y_true = y_true.squeeze(-1)
# Guard: align last dim if they still differ
if y_pred.shape != y_true.shape and y_true.dim() == 2:
y_true = y_true[:, :y_pred.shape[1]]
loss = F.mse_loss(y_pred, y_true, reduction='mean')
return loss
def train(model=None, save_path='', config={}, train_dataloader=None, val_dataloader=None,
feature_map={}, test_dataloader=None, test_dataset=None,
dataset_name='swat', train_dataset=None):
seed = config['seed']
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=config['decay'])
l2_reg_params = torch.ones(1).requires_grad_(True)
now = time.time()
texttrain_loss_list = [] device = get_device() acu_loss = 0 min_loss = 1e+8 i = 0 epoch = config['epoch'] early_stop_win = 15 model.train() stop_improve_count = 0 dataloader = train_dataloader # Initial prior graph for Notears (zeros = no prior) yilaitu = np.zeros((27, 27)) for i_epoch in range(epoch): acu_loss = 0 model.train() # FIX 2: mslSegLoader yields (x, labels) — only 2 items, no edge_index for x, labels in train_dataloader: x = x.float().to(device) # (batch, win_size, node_num) e.g. (128, 27, 27) labels = labels.float().to(device) # (batch, win_size, 1) # FIX 4: GDN expects (batch, node_num, win_size); mslSegLoader is time-first x_model = x.permute(0, 2, 1).contiguous() # (128, 27, 27) batch_, node_, window_ = x_model.shape # 128, 27, 27 # Notears operates on (N_samples, N_nodes) — reshape original x (time-first) x_num = x.reshape(batch_ * window_, node_) # (128*27, 27) = (3456, 27) x_input = x_num.cpu().numpy() # FIX 5: derive regression target from last timestep of the window # (actual sensor readings, not anomaly labels) y = x[:, -1, :] # (batch, node_num) = (128, 27) for iterator in range(1): inner_losses = [] nt = Notears() nt.learn(x_input, yilaitu) causalmatrix = torch.from_numpy(nt.predict_matrix).float().to(device) yinguoout = causalmatrix.cpu().numpy() # PageRank requires a non-empty graph; handle sparse/empty causal matrices pagerank = PageRank() adjacency = np.abs(yinguoout.T) # Check if adjacency matrix has any non-zero entries if adjacency.sum() < 1e-6: # Fallback: uniform importance scores if graph is empty print("[Warning] Causal matrix too sparse, using uniform scores") scores = np.ones(node_) / node_ else: try: pagerank.fit(adjacency) scores = pagerank.scores_ except ValueError as e: # Fallback if PageRank fails print(f"[Warning] PageRank fit failed: {e}, using uniform scores") scores = np.ones(node_) / node_ scores = torch.from_numpy(scores).to(device) # FIX 3: nt.sunshi is a numpy/Python float — wrap it in a tensor first trainnotears = torch.tensor(float(nt.sunshi), requires_grad=False, device=device) loss_train = trainnotears.requires_grad_() l2_reg_params = l2_reg_params.to(device) inner_losses.append(loss_train) optimizer.zero_grad() # FIX 2 + FIX 4: use permuted x_model, pass None for org_edge_index # (GDN uses self.edge_index_sets internally; org_edge_index is unused) model_outputs = model(x_model, None) out = model_outputs[0].float().to(device) # (batch, node_num) = (128, 27) result = out * scores.unsqueeze(0).to(out.dtype) # (128, 27) # FIX 1 + FIX 5: loss against actual sensor values, shapes now match loss = loss_func(result, y) # both (128, 27) # FIX 2: get cos_ji_mat from the same forward pass — no separate call needed, # and avoids the undefined `edge_index` NameError yilaitu = model_outputs[1].cpu().numpy() loss.backward() optimizer.step() print("iterator : {:.4f}, optimization objective: {}\n".format( iterator, loss.item())) train_loss_list.append(loss.item()) acu_loss += loss.item() iterator += 1 print('epoch ({} / {}) (Loss:{:.8f})'.format( i_epoch, epoch, acu_loss / len(dataloader) / 3), flush=True) if val_dataloader is not None: val_loss, val_result = test(model, val_dataloader) if val_loss < min_loss: torch.save(model.state_dict(), save_path) min_loss = val_loss stop_improve_count = 0 else: stop_improve_count += 1 if stop_improve_count >= early_stop_win: break else: if acu_loss < min_loss: torch.save(model.state_dict(), save_path) min_loss = acu_loss return train_loss_listimport torch.nn as nn
import time
from util.time import *
from util.env import *
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import pandas as pd
import scipy.optimize as sopt
logging.basicConfig(level=logging.INFO, format='%(asctime)s%(message)s')
class BaseLearner(object):
def init(self):
self._causal_matrix = None
textdef learn(self, data, *args, **kwargs): raise NotImplementedError @property def causal_matrix(self): return self._causal_matrix @causal_matrix.setter def causal_matrix(self, value): self._causal_matrix
class Tensor(object):
def init(self, indata):
self._set_tensor(indata)
textdef _set_tensor(self, indata): if isinstance(indata, np.ndarray): if indata.ndim == 2 or indata.ndim == 3: self.data = indata else: raise TypeError("Input numpy.ndarray ndim error!") elif isinstance(indata, pd.DataFrame): self.data = indata.values else: raise TypeError("Input data is not numpy.ndarray or pd.DataFrame")
class Notears(BaseLearner):
def init(self, lambda1=0.1,
loss_type='l2',
max_iter=5,
h_tol=1e-8,
rho_max=1e+16,
w_threshold=0.05):
super().init()
self.lambda1 = lambda1
self.loss_type = loss_type
self.max_iter = max_iter
self.h_tol = h_tol
self.rho_max = rho_max
self.w_threshold = w_threshold
textdef learn(self, data): if isinstance(data, np.ndarray): X = data elif isinstance(data, Tensor): X = data.data else: raise TypeError( 'The type of data must be Tensor or numpy.ndarray, but got {}'.format(type(data))) predict_matrix, sunshi = self.notears_linear( X, lambda1=self.lambda1, loss_type=self.loss_type, max_iter=self.max_iter, h_tol=self.h_tol, rho_max=self.rho_max, w_threshold=self.w_threshold) self.predict_matrix = predict_matrix self.sunshi = sunshi def notears_linear(self, X, lambda1, loss_type, max_iter, h_tol, rho_max, w_threshold): def _loss(W): M = X @ W if loss_type == 'l2': R = X - M loss = 0.5 / X.shape[0] * (R ** 2).sum() G_loss = -1.0 / X.shape[0] * X.T @ R return loss, G_loss def _h(W): M = np.eye(d) + W * W / d E = np.linalg.matrix_power(M, d - 1) h = (E.T * M).sum() - d G_h = E.T * W * 2 return h, G_h def _adj(w): return (w[:d * d] - w[d * d:]).reshape([d, d]) def sqrt_var(x): mean_x = x.mean(0) v = torch.norm(x - mean_x, p=2, dim=0) / ((x.shape[0] - 1) ** 0.5) return v def hinge(input, threshold=1.0, reduction='sum'): diff = F.relu(threshold - input) diff = diff ** 2 if reduction == 'sum': loss = diff.sum() elif reduction == 'mean': loss = diff.mean() return loss def _func(w): W = _adj(w) loss, G_loss = _loss(W) h, G_h = _h(W) W_sparse = torch.from_numpy(W) hinge_loss = hinge(input=sqrt_var(W_sparse), threshold=0.5, reduction='sum') obj = loss + 0.5 * rho * h * h + alpha * h + lambda1 * hinge_loss G_smooth = G_loss + (rho * h + alpha) * G_h g_obj = np.concatenate( (G_smooth + lambda1, -G_smooth + lambda1), axis=None) return obj, g_obj n, d = X.shape[0], X.shape[1] w_est, rho, alpha, h = np.zeros(2 * d * d), 1.0, 0.0, np.inf bnds = [(0, 0) if i == j else (0, None) for _ in range(2) for i in range(d) for j in range(d)] if loss_type == 'l2': X = X - np.mean(X, axis=0, keepdims=True) for i in range(max_iter): w_new, h_new = None, None while rho < rho_max: sol = sopt.minimize(_func, w_est, method='L-BFGS-B', jac=True, bounds=bnds) w_new = sol.x h_new, _ = _h(_adj(w_new)) if h_new > 0.25 * h: rho *= 10 else: break w_est, h = w_new, h_new alpha += rho * h if h <= h_tol or rho >= rho_max: break W_est = _adj(w_est) W_est[np.abs(W_est) < w_threshold] = 0 return (W_est != 0).astype(int), _func(w_est)[0]
def test(model, dataloader):
loss_func = nn.MSELoss(reduction='mean')
device = get_device()
texttest_loss_list = [] now = time.time() t_test_predicted_list = [] t_test_ground_list = [] t_test_labels_list = [] test_len = len(dataloader) model.eval() i = 0 acu_loss = 0 # FIX 6: mslSegLoader yields (x, labels) — 2 items, not 4 for x, labels in dataloader: x = x.float().to(device) # (batch, win_size, node_num) e.g. (1, 27, 27) labels = labels.float().to(device) # (batch, win_size, 1) with torch.no_grad(): # FIX 8: GDN expects (batch, node_num, win_size); permute from time-first layout x_model = x.permute(0, 2, 1).contiguous() # (batch, 27, 27) batch_, node_, window_ = x_model.shape # e.g. 1, 27, 27 # Derive ground-truth regression target: last timestep sensor readings y = x[:, -1, :] # (batch, node_num) = (1, 27) # Notears input: (batch*win_size, node_num) — time-first reshape from original x x_num = x.reshape(batch_ * window_, node_) # (27, 27) x_input = x_num.cpu().numpy() nt = Notears() nt.learn(x_input) yinguoout = nt.predict_matrix # (27, 27) causal adjacency matrix # FIX 7 + FIX 8: use permuted x_model; org_edge_index=None (GDN uses self.edge_index_sets) predicted = model(x_model, None)[0].float().to(device) # (batch, node_num) loss = loss_func(predicted, y) # Collect labels: take anomaly label at last timestep, expand per-node # labels shape: (batch, win_size, 1) → last step → (batch,) → (batch, node_num) last_labels = labels[:, -1, 0] # (batch,) last_labels = last_labels.unsqueeze(1).repeat(1, predicted.shape[1]) # (batch, node_num) if len(t_test_predicted_list) <= 0: t_test_predicted_list = predicted t_test_ground_list = y t_test_labels_list = last_labels else: t_test_predicted_list = torch.cat((t_test_predicted_list, predicted), dim=0) t_test_ground_list = torch.cat((t_test_ground_list, y), dim=0) t_test_labels_list = torch.cat((t_test_labels_list, last_labels), dim=0) test_loss_list.append(loss.item()) acu_loss += loss.item() i += 1 if i % 10000 == 1 and i > 1: print(timeSincePlus(now, i / test_len)) test_predicted_list = t_test_predicted_list.tolist() test_ground_list = t_test_ground_list.tolist() test_labels_list = t_test_labels_list.tolist() avg_loss = sum(test_loss_list) / len(test_loss_list) return avg_loss, [test_predicted_list, test_ground_list, test_labels_list]import numpy as np
np.set_printoptions(threshold=np.inf)
import torch
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import time
from util.time import *
from util.env import *
'''from torch_geometric.nn import GCNConv, GATConv, EdgeConv'''
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '2'
import math
import torch.nn.functional as F
from graph_layer import GraphLayer
from tcn import TemporalConvNet
from encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack
from attn import FullAttention, ProbAttention, AttentionLayer
from torch.nn import Sequential as Seq, Linear, ReLU, Parameter
from torch_geometric.nn import MessagePassing, GCNConv
from torch_geometric.nn.conv.gcn_conv import gcn_norm
from torch_geometric.nn.inits import glorot, zeros
from tconv import TemporalBlock
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
class AdaGCNConv(MessagePassing):
def init(self, num_nodes, in_channels, out_channels, improved=False,
add_self_loops=False, normalize=True, bias=True, init_method='all'):
super(AdaGCNConv, self).init(aggr='add', node_dim=0) # "Max" aggregation.
self.num_nodes = num_nodes
self.in_channels = in_channels
self.out_channels = out_channels
self.improved = improved
self.add_self_loops = add_self_loops
self.normalize = normalize
self.bias = bias
self.init_method = init_method
textself.weight = Parameter(torch.Tensor(in_channels, out_channels)) if bias: self.bias = Parameter(torch.Tensor(out_channels)) else: self.register_parameter('bias', None) self._init_graph_logits_() self.reset_parameters() def _init_graph_logits_(self): if self.init_method == 'all': logits = .8 * torch.ones(self.num_nodes ** 2, 2) logits[:, 1] = 0 elif self.init_method == 'random': logits = 1e-3 * torch.randn(self.num_nodes ** 2, 2) elif self.init_method == 'equal': logits = .5 * torch.ones(self.num_nodes ** 2, 2) else: raise NotImplementedError('Initial Method %s is not implemented' % self.init_method) self.register_parameter('logits', Parameter(logits, requires_grad=True)) def reset_parameters(self): glorot(self.weight) zeros(self.bias) def forward(self, x, edge_index, edge_weight=None): # x has shape [N, in_channels] # edge_index has shape [2, E] if self.normalize: edge_index, edge_weight = gcn_norm( # yapf: disable edge_index, edge_weight, x.size(self.node_dim), self.improved, self.add_self_loops, dtype=x.dtype) z = torch.nn.functional.gumbel_softmax(self.logits, hard=True) x = torch.matmul(x, self.weight) # propagate_type: (x: Tensor, edge_weight: OptTensor) # out = self.propagate(edge_index.cpu(), x=x, edge_weight=edge_weight.cpu(), # size=None, z=z) out = self.propagate(edge_index, x=x, edge_weight=edge_weight, size=None, z=z) if self.bias is not None: out += self.bias return out def message(self, x_j, edge_weight, z): if edge_weight is None: return x_j * z[:, 0].contiguous().view([-1] + [1] * (x_j.dim() - 1)) else: return edge_weight.view([-1] + [1] * (x_j.dim() - 1)) * x_j * z[:, 0].contiguous().view( [-1] + [1] * (x_j.dim() - 1)) def __repr__(self): return '{}({}, {})'.format(self.__class__.__name__, self.in_channels, self.out_channels)
class GraphTemporalEmbedding(torch.nn.Module):
def init(self, num_nodes, seq_len, num_levels, kernel_size=3, dropout=0.02, device=torch.device('cuda:0')):
super(GraphTemporalEmbedding, self).init()
self.num_nodes = num_nodes
self.seq_len = seq_len
self.num_levels = num_levels
self.device = device
assert (kernel_size - 1) // 2
textself.tc_modules = torch.nn.ModuleList([]) self.gc_module = AdaGCNConv(num_nodes, seq_len, seq_len) self.gc_modules=torch.nn.ModuleList([]) for i in range(num_levels): dilation_size = 2 ** i self.tc_modules.extend( [TemporalBlock(num_nodes, num_nodes, kernel_size=kernel_size, stride=1, dilation=dilation_size, padding=(kernel_size - 1) * dilation_size // 2, dropout=dropout)]) self.gc_modules.extend([AdaGCNConv(num_nodes, seq_len, seq_len)]) source_nodes, target_nodes = [], [] for i in range(num_nodes): for j in range(num_nodes): source_nodes.append(j) target_nodes.append(i) self.edge_index = torch.tensor([source_nodes, target_nodes], dtype=torch.long, device=self.device) def forward(self, x): # >> (bsz, seq_len, num_nodes) #x = x.permute(0, 2, 1) # >> (bsz, num_nodes, seq_len) x = self.tc_modules[0](x) # >> (bsz, num_nodes, seq_len) x = self.gc_modules[0](x.transpose(0, 1), self.edge_index).transpose(0, 1) # >> (bsz, num_nodes, seq_len) # output = x for i in range(1, self.num_levels): x = self.tc_modules[i](x) # >> (bsz, num_nodes, seq_len) x = self.gc_module(x.transpose(0, 1), self.edge_index).transpose(0, 1) # >> (bsz, num_nodes, seq_len) # output += x # return output.transpose(1, 2) # >> (bsz, seq_len, num_nodes) return x.transpose(1, 2)
def get_batch_edge_index(org_edge_index, batch_num, node_num):
# org_edge_index:(2, edge_num)
edge_index = org_edge_index.clone().detach()
edge_num = org_edge_index.shape[1]
batch_edge_index = edge_index.repeat(1,batch_num).contiguous()
textfor i in range(batch_num): batch_edge_index[:, i*edge_num:(i+1)*edge_num] += i*node_num return batch_edge_index.long()
class GRUModel(nn.Module):
textdef __init__(self, input_num, hidden_num, output_num): super(GRUModel, self).__init__() self.hidden_size = hidden_num # 这里设置了 batch_first=True, 所以应该 inputs = inputs.view(inputs.shape[0], -1, inputs.shape[1]) # 针对时间序列预测问题,相当于将时间步(seq_len)设置为 1。 self.GRU_layer = nn.GRU(input_size=input_num, hidden_size=hidden_num, batch_first=True) self.output_linear = nn.Linear(hidden_num, output_num) self.hidden = None def forward(self, x): # h_n of shape (num_layers * num_directions, batch, hidden_size) # 这里不用显式地传入隐层状态 self.hidden x, self.hidden = self.GRU_layer(x) x = self.output_linear(x) return x, self.hidden
class TemporalBlocknana(nn.Module):
def init(self,in_channel,out_channel):
super(TemporalBlocknana,self).init()
self.temporal1=nn.Sequential(
nn.Conv1d(in_channel,out_channel,kernel_size=3,padding=1),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
self.temporal2 = nn.Sequential(
nn.Conv1d(in_channel, out_channel, kernel_size=5, padding=2),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
self.temporal3 = nn.Sequential(
nn.Conv1d(in_channel, out_channel, kernel_size=7, padding=3),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
self.temporal4 = nn.Sequential(
nn.Conv1d(in_channel, out_channel, kernel_size=9, padding=4),
nn.BatchNorm1d(out_channel),
nn.ReLU(),
nn.Dropout(0.2))
def forward(self,x):
x1=self.temporal1(x)
x2=self.temporal2(x)
x3=self.temporal3(x)
x4=self.temporal4(x)
x_out = (x1+x2+x3+x4)/4
return x_out
class OutLayer(nn.Module):
def init(self, in_num, node_num, layer_num, inter_num = 512):
super(OutLayer, self).init()
textmodules = [] for i in range(layer_num): # last layer, output shape:1 if i == layer_num-1: modules.append(nn.Linear( in_num if layer_num == 1 else inter_num, 1)) else: layer_in_num = in_num if i == 0 else inter_num modules.append(nn.Linear(layer_in_num, inter_num)) modules.append(nn.BatchNorm1d(inter_num)) modules.append(nn.ReLU()) self.mlp = nn.ModuleList(modules) def forward(self, x): out = x for mod in self.mlp: if isinstance(mod, nn.BatchNorm1d): out = out.permute(0,2,1) out = mod(out) out = out.permute(0,2,1) else: out = mod(out) return out
class GNNLayer(nn.Module):
def init(self, in_channel, out_channel, inter_dim=0, heads=1, node_num=100):
super(GNNLayer, self).init()
textself.gnn = GraphLayer(in_channel, out_channel, inter_dim=inter_dim, heads=heads, concat=False) self.bn = nn.BatchNorm1d(out_channel) self.relu = nn.ReLU() self.leaky_relu = nn.LeakyReLU() def forward(self, x, edge_index, embedding=None, node_num=0): out, (new_edge_index, att_weight) = self.gnn(x, edge_index, embedding, return_attention_weights=True) self.att_weight_1 = att_weight self.edge_index_1 = new_edge_index out = self.bn(out) return self.relu(out),self.att_weight_1,self.edge_index_1
class GDN(nn.Module):
def init(self, edge_index_sets, node_num, dim=64, out_layer_inter_dim=256,
input_dim=10, out_layer_num=1, topk=20,
factor=5, d_model=10, n_heads=8, e_layers=3,d_ff=256,
dropout=0.001, distil=True, activation='gelu', output_attention = False):
textsuper(GDN, self).__init__() self.edge_index_sets = edge_index_sets device = get_device() edge_index = edge_index_sets[0] embed_dim = dim self.embedding = nn.Embedding(node_num, embed_dim) self.bn_outlayer_in = nn.BatchNorm1d(embed_dim) # Attention #Attn = ProbAttention # Encoder # self.encoder = Encoder( # [ # EncoderLayer( # AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention), # d_model, n_heads, mix=False), # d_model, # d_ff, # dropout=dropout, # activation=activation # ) for l in range(e_layers) # ], # [ # ConvLayer( # d_model # ) for l in range(e_layers - 1) # ] if distil else None, # norm_layer=torch.nn.LayerNorm(d_model) # ) edge_set_num = len(edge_index_sets) self.gnn_layers = nn.ModuleList([ GNNLayer(input_dim, dim, inter_dim=dim+embed_dim, heads=1) for i in range(edge_set_num) ]) # self.temporalblocknana=TemporalBlocknana(node_num,node_num) # self.tcnmodel = TemporalConvNet(num_inputs=15,num_channels=[25,25,25,25,25,25,15], kernel_size=7, dropout=0.05) self.gte = GraphTemporalEmbedding(10, 15, 3) self.node_embedding = None self.topk = topk self.learned_graph = None self.out_layer = OutLayer(dim*edge_set_num, node_num, out_layer_num, inter_num = out_layer_inter_dim) self.cache_edge_index_sets = [None] * edge_set_num self.cache_embed_index = None self.dp = nn.Dropout(0.2) # self.FC=nn.Linear(4, 15) # self.conv1 = nn.Conv1d(in_channels=256, out_channels=128,kernel_size=1) # self.conv1_=nn.Conv1d(in_channels=24, out_channels=12,kernel_size=1) # self.conv1__ = nn.Conv1d(in_channels=58, out_channels=29, kernel_size=1) # self.conv1___ = nn.Conv1d(in_channels=46, out_channels=23, kernel_size=1) # self.conv1____ = nn.Conv1d(in_channels=62, out_channels=31, kernel_size=1) # self.conv1a = nn.Conv1d(in_channels=228, out_channels=114, kernel_size=1) # self.conv1b=nn.Conv1d(in_channels=204,out_channels=102,kernel_size=1) self.init_params() def init_params(self): nn.init.kaiming_uniform_(self.embedding.weight, a=math.sqrt(5)) def forward(self, data, org_edge_index): x = data.clone().detach() edge_index_sets = self.edge_index_sets device = data.device batch_num, node_num, all_feature = x.shape '''x = x.permute(0, 2, 1) enc_out, attns = self.encoder(x, attn_mask=None) x=enc_out.permute(0,2,1) x=self.FC(x)''' #x=self.gte(x) #x=self.temporalblocknana(x) '''x=x.permute(0, 2, 1) GRUModell = GRUModel(node_num, batch_num, 256).to(device) GRUModell2 = GRUModel(256, batch_num, node_num).to(device) x1,h1=GRUModell(x) x,h2=GRUModell2(x1) #x = self.tcnmodel(x) x = x.permute(0, 2, 1)''' #x = x.permute(0, 2, 1) x = x.reshape(-1, all_feature).contiguous() gcn_outs = [] causal=[] for i, edge_index in enumerate(edge_index_sets): edge_num = edge_index.shape[1] cache_edge_index = self.cache_edge_index_sets[i] if cache_edge_index is None or cache_edge_index.shape[1] != edge_num*batch_num: self.cache_edge_index_sets[i] = get_batch_edge_index(edge_index, batch_num, node_num).to(device) batch_edge_index = self.cache_edge_index_sets[i] all_embeddings = self.embedding(torch.arange(node_num).to(device)) weights_arr = all_embeddings.detach().clone() all_embeddings = all_embeddings.repeat(batch_num, 1) weights = weights_arr.view(node_num, -1) cos_ji_mat = torch.matmul(weights, weights.T) normed_mat = torch.matmul(weights.norm(dim=-1).view(-1,1), weights.norm(dim=-1).view(1,-1)) cos_ji_mat = cos_ji_mat / normed_mat dim = weights.shape[-1] topk_num = self.topk topk_indices_ji = torch.topk(cos_ji_mat, topk_num, dim=-1)[1] self.learned_graph = topk_indices_ji # FIX: replace deprecated .T on non-2D tensor with view() gated_i = torch.arange(0, node_num).view(-1, 1).repeat(1, topk_num).flatten().to(device).unsqueeze(0) gated_j = topk_indices_ji.flatten().unsqueeze(0) gated_edge_index = torch.cat((gated_j, gated_i), dim=0) batch_gated_edge_index = get_batch_edge_index(gated_edge_index, batch_num, node_num).to(device) gcn_out, attentionweight, bianbian = self.gnn_layers[i](x, batch_gated_edge_index, node_num=node_num*batch_num, embedding=all_embeddings) #causal_juhe=self.gnn_layers[i](x,batch_edge_index,node_num=node_num*batch_num,embedding=all_embeddings) # print(attentionweight.squeeze().detach().cpu().numpy()) # print(bianbian.detach().cpu().numpy()) gcn_outs.append(gcn_out) #causal.append(causal_juhe) x = torch.cat(gcn_outs,dim=1) # x_causal = torch.cat(causal,dim=1) # x_fusion=torch.cat((x,x_causal),dim=0) # x_fusion_input=x_fusion.view(-1,2*batch_num,node_num) # print(batch_num) # if batch_num==128: # x_ronghe=self.conv1(x_fusion_input) # elif batch_num==12: # x_ronghe=self.conv1_(x_fusion_input) # elif batch_num==31: # x_ronghe=self.conv1____(x_fusion_input) # elif batch_num == 29: # x_ronghe = self.conv1__(x_fusion_input) # elif batch_num == 23: # x_ronghe = self.conv1___(x_fusion_input) # elif batch_num == 102: # x_ronghe = self.conv1b(x_fusion_input) # else: # x_ronghe = self.conv1a(x_fusion_input) # # x=x_ronghe.permute(1,2,0) x = x.view(batch_num, node_num, -1) indexes = torch.arange(0,node_num).to(device) out = torch.mul(x, self.embedding(indexes)) out = out.permute(0,2,1) out = F.relu(self.bn_outlayer_in(out)) out = out.permute(0,2,1) out = self.dp(out) out = self.out_layer(out) out = out.view(-1, node_num) return out, cos_ji_mattest: (2049, 27)
train: (1565, 27)
test: (2049, 27)
train: (1565, 27)
test: (2049, 27)
train: (1565, 27)
/home/ubuntu/anaconda3/envs/chafen/lib/python3.11/site-packages/torch/nn/utils/weight_norm.py:144: FutureWarning: torch.nn.utils.weight_norm is deprecated in favor of torch.nn.utils.parametrizations.weight_norm.
WeightNorm.apply(module, name, dim)
iterator : 0.0000, optimization objective: 0.8663418889045715
iterator : 0.0000, optimization objective: 0.8094824552536011
iterator : 0.0000, optimization objective: 0.8071750998497009
iterator : 0.0000, optimization objective: 0.8322016596794128
iterator : 0.0000, optimization objective: 0.940419614315033
iterator : 0.0000, optimization objective: 0.8364759683609009
iterator : 0.0000, optimization objective: 0.8436645269393921
iterator : 0.0000, optimization objective: 0.933905303478241
iterator : 0.0000, optimization objective: 0.8951370716094971
iterator : 0.0000, optimization objective: 0.9112871289253235
iterator : 0.0000, optimization objective: 0.8398826122283936
iterator : 0.0000, optimization objective: 0.9027136564254761
iterator : 0.0000, optimization objective: 0.6421583294868469
epoch (0 / 100) (Loss:0.28361142)
iterator : 0.0000, optimization objective: 0.7916588187217712
iterator : 0.0000, optimization objective: 0.894771158695221
iterator : 0.0000, optimization objective: 0.8909013867378235
iterator : 0.0000, optimization objective: 0.8222718238830566
iterator : 0.0000, optimization objective: 0.8284629583358765
iterator : 0.0000, optimization objective: 0.9187648892402649
iterator : 0.0000, optimization objective: 0.8319133520126343
iterator : 0.0000, optimization objective: 0.8192020058631897
iterator : 0.0000, optimization objective: 0.7168318033218384
iterator : 0.0000, optimization objective: 0.8224521279335022
iterator : 0.0000, optimization objective: 0.8410629630088806
iterator : 0.0000, optimization objective: 0.7462283968925476
iterator : 0.0000, optimization objective: 0.638600766658783
epoch (1 / 100) (Loss:0.27084929)为什么不是一次迭代求因果而是多次呀
因为你现在的代码里 NOTEARS 求因果是在每个 batch 里执行一次,不是整个训练过程只执行一次。
关键位置在 train() 里:
pythonfor i_epoch in range(epoch): ... for x, labels in train_dataloader: ... for iterator in range(1): nt = Notears() nt.learn(x_input, yilaitu)
虽然这里写的是:
pythonfor iterator in range(1):
它确实只循环 1 次,所以打印永远是:
textiterator : 0.0000
但是外面还有:
pythonfor x, labels in train_dataloader:
所以每来一个 batch,就会重新执行一次:
pythonnt = Notears() nt.learn(x_input, yilaitu)
你的训练集大小是:
texttrain: (1565, 27)
batch size 是 128,所以每个 epoch 大约有:
textceil(1565 / 128) = 13 个 batch
因此你看到每个 epoch 里大概打印 13 次:
textiterator : 0.0000, optimization objective: ...
这不是 iterator 多次迭代,而是 13 个 batch 分别求了一次因果图。
另外,Notears() 本身内部也不是一步完成的。它里面还有优化循环:
pythonfor i in range(max_iter): while rho < rho_max: sol = sopt.minimize(_func, w_est, method='L-BFGS-B', jac=True, bounds=bnds)
所以从计算角度看,每次 nt.learn() 里面又会调用 scipy.optimize.minimize() 做多轮优化。
因此你现在实际上是:
text每个 epoch 每个 batch 求一次 NOTEARS 因果图 用这个因果图算 PageRank 用 PageRank 加权 GDN 输出 更新 GDN 参数
如果你想 只求一次因果图,就不要把 nt.learn() 放在 batch 循环里面。可以在训练前先算一次因果图,然后训练时复用。
比如改成这种思路:
python# 在 epoch 循环前,只求一次因果图 first_x, first_labels = next(iter(train_dataloader)) first_x = first_x.float().to(device) batch_, window_, node_ = first_x.shape x_input = first_x.reshape(batch_ * window_, node_).cpu().numpy() yilaitu = np.zeros((node_, node_)) nt = Notears() nt.learn(x_input, yilaitu) fixed_causal_matrix = torch.from_numpy(nt.predict_matrix).float().to(device) adjacency = np.abs(nt.predict_matrix.T) pagerank = PageRank() if adjacency.sum() < 1e-6: fixed_scores = np.ones(node_) / node_ else: pagerank.fit(adjacency) fixed_scores = pagerank.scores_ fixed_scores = torch.from_numpy(fixed_scores).float().to(device)
然后在 batch 循环里删掉:
pythonnt = Notears() nt.learn(x_input, yilaitu) ... scores = torch.from_numpy(scores).to(device)
直接用:
pythonscores = fixed_scores
训练时:
pythonmodel_outputs = model(x_model, None) out = model_outputs[0].float().to(device) result = out * scores.unsqueeze(0).to(out.dtype) loss = loss_func(result, y)
这样就是 训练前求一次因果图,后面所有 batch 共用这个因果图/PageRank 权重。
不过要注意一点:你当前代码里有这句:
pythonyilaitu = model_outputs[1].cpu().numpy()
也就是说你现在的设计像是一个交替更新过程:
textNOTEARS 因果图 ← 使用上一次 GDN 学到的图作为先验 GDN 图结构 ← 根据当前 batch 更新
所以它不是单纯“求一次因果”,而是在做一种 batch-level 的动态因果/图结构交替学习。
如果你的论文或方法目标是“动态因果图”,那每个 batch 求一次是符合这个思路的;
如果你的目标是“全局固定因果图”,那就应该把 NOTEARS 移到训练循环外面,只计算一次。