Spaces:

Sheldon123z
/

rural-voltage-demo

Runtime error

App Files Files Community

Sheldon123z commited on Feb 3

Commit

5d08a33

verified ·

1 Parent(s): c98a82a

Initial deployment of voltage anomaly detection demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +30 -6
app.py +122 -0
assets/custom.css +96 -0
config.py +101 -0
core/__init__.py +14 -0
core/__pycache__/__init__.cpython-310.pyc +0 -0
core/__pycache__/__init__.cpython-311.pyc +0 -0
core/__pycache__/data_processor.cpython-310.pyc +0 -0
core/__pycache__/data_processor.cpython-311.pyc +0 -0
core/__pycache__/inference.cpython-310.pyc +0 -0
core/__pycache__/inference.cpython-311.pyc +0 -0
core/__pycache__/model_loader.cpython-310.pyc +0 -0
core/__pycache__/model_loader.cpython-311.pyc +0 -0
core/data_processor.py +417 -0
core/inference.py +426 -0
core/model_loader.py +308 -0
docs/model_architectures/svg/01_TimesNet.svg +150 -0
docs/model_architectures/svg/02_VoltageTimesNet.svg +149 -0
docs/model_architectures/svg/03_VoltageTimesNet_v2.svg +197 -0
docs/model_architectures/svg/04_TPATimesNet.svg +175 -0
docs/model_architectures/svg/05_MTSTimesNet.svg +201 -0
docs/model_architectures/svg/06_DLinear.svg +149 -0
layers/AutoCorrelation.py +186 -0
layers/Autoformer_EncDec.py +219 -0
layers/Conv_Blocks.py +85 -0
layers/Embed.py +229 -0
layers/SelfAttention_Family.py +378 -0
layers/StandardNorm.py +85 -0
layers/ThreePhaseAttention.py +504 -0
layers/Transformer_EncDec.py +159 -0
layers/VoltageEmbed.py +465 -0
layers/__init__.py +27 -0
layers/__pycache__/AutoCorrelation.cpython-310.pyc +0 -0
layers/__pycache__/AutoCorrelation.cpython-311.pyc +0 -0
layers/__pycache__/Autoformer_EncDec.cpython-310.pyc +0 -0
layers/__pycache__/Autoformer_EncDec.cpython-311.pyc +0 -0
layers/__pycache__/Conv_Blocks.cpython-310.pyc +0 -0
layers/__pycache__/Conv_Blocks.cpython-311.pyc +0 -0
layers/__pycache__/Embed.cpython-310.pyc +0 -0
layers/__pycache__/Embed.cpython-311.pyc +0 -0
layers/__pycache__/SelfAttention_Family.cpython-310.pyc +0 -0
layers/__pycache__/SelfAttention_Family.cpython-311.pyc +0 -0
layers/__pycache__/StandardNorm.cpython-310.pyc +0 -0
layers/__pycache__/StandardNorm.cpython-311.pyc +0 -0
layers/__pycache__/Transformer_EncDec.cpython-310.pyc +0 -0
layers/__pycache__/Transformer_EncDec.cpython-311.pyc +0 -0
layers/__pycache__/__init__.cpython-310.pyc +0 -0
layers/__pycache__/__init__.cpython-311.pyc +0 -0
models/DLinear.py +127 -0
models/MTSTimesNet.py +418 -0

README.md CHANGED Viewed

@@ -1,12 +1,36 @@
 ---
-title: Rural Voltage Demo
-emoji: 🏃
-colorFrom: green
-colorTo: indigo
 sdk: gradio
-sdk_version: 6.5.1
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: 农村低压配电网电压异常检测
+emoji: ⚡
+colorFrom: blue
+colorTo: green
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# 农村低压配电网电压异常检测系统
+基于 TimesNet 的时间序列异常检测方法研究与应用
+## 功能
+- **原理演示**: FFT 周期发现可视化
+- **创新对比**: VoltageTimesNet vs TimesNet
+- **模型竞技场**: 6 模型性能对比
+- **模型结构**: 网络架构图展示
+- **自定义检测**: 上传 CSV 进行异常检测
+## 模型
+| 模型 | F1 | Recall | 说明 |
+|:-----|:--:|:------:|:-----|
+| VoltageTimesNet_v2 | 0.6622 | 0.5858 | 最优模型 |
+| TimesNet | 0.6520 | 0.5705 | 基线 |
+## 链接
+- [GitHub](https://github.com/sheldon123z/Rural-Low-Voltage-Detection)
+- [数据集](https://huggingface.co/datasets/Sheldon123z/rural-voltage-datasets)
+- [模型权重](https://huggingface.co/Sheldon123z/rural-voltage-detection-models)

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""
+农村低压配电网电压异常检测 - Gradio 交互式演示
+Rural Low-Voltage Distribution Network Voltage Anomaly Detection Demo
+用于论文答辩演示，展示 TimesNet 周期建模原理、VoltageTimesNet 创新点、多模型性能对比
+HuggingFace Spaces 版本
+"""
+import gradio as gr
+import sys
+from pathlib import Path
+# 添加项目路径 (HuggingFace Spaces 兼容)
+DEMO_DIR = Path(__file__).parent
+sys.path.insert(0, str(DEMO_DIR))
+# 导入标签页
+from tabs.tab1_principle import create_principle_tab
+from tabs.tab2_innovation import create_innovation_tab
+from tabs.tab3_arena import create_arena_tab
+from tabs.tab4_detection import create_detection_tab
+from tabs.tab5_architecture import create_architecture_tab
+# 导入配置
+from config import GRADIO_THEME, THESIS_COLORS
+def create_header():
+    """创建页面头部"""
+    return gr.Markdown(
+        """
+        # ⚡ 农村低压配电网电压异常检测系统
+        **基于 TimesNet 的时间序列异常检测方法研究与应用**
+        本演示系统用于论文答辩，展示研究成果和模型性能。
+        ---
+        """
+    )
+def create_footer():
+    """创建页面底部"""
+    return gr.Markdown(
+        """
+        ---
+        ### 📚 系统说明
+        | 标签页 | 功能 | 说明 |
+        |--------|------|------|
+        | 原理演示 | FFT 周期发现 | 展示 TimesNet 核心算法原理 |
+        | 创新对比 | 模型改进 | VoltageTimesNet 与 TimesNet 的差异对比 |
+        | 模型竞技场 | 性能对比 | 6 个模型的多维度性能对比 |
+        | 模型结构 | 架构展示 | 6 个模型的网络结构图和详细说明 |
+        | 自定义检测 | 实时推理 | 上传 CSV 进行异常检测 |
+        **技术栈**: PyTorch + Gradio + Plotly
+        **模型**: VoltageTimesNet_v2 (最优) | VoltageTimesNet | TimesNet | TPATimesNet | MTSTimesNet | DLinear
+        ---
+        <center>
+        📧 联系作者 | 📖 [GitHub](https://github.com/sheldon123z/Rural-Low-Voltage-Detection) | 🤗 [HuggingFace](https://huggingface.co/Sheldon123z)
+        </center>
+        """
+    )
+def create_app():
+    """创建 Gradio 应用"""
+    # 加载自定义 CSS
+    css_path = DEMO_DIR / "assets" / "custom.css"
+    custom_css = ""
+    if css_path.exists():
+        with open(css_path, "r", encoding="utf-8") as f:
+            custom_css = f.read()
+    # 创建应用
+    with gr.Blocks(title="农村低压配电网电压异常检测系统") as app:
+        # 页面头部
+        create_header()
+        # 标签页
+        with gr.Tabs():
+            # Tab 1: 原理演示
+            create_principle_tab()
+            # Tab 2: 创新对比
+            create_innovation_tab()
+            # Tab 3: 模型竞技场
+            create_arena_tab()
+            # Tab 4: 模型结构
+            create_architecture_tab()
+            # Tab 5: 自定义检测
+            create_detection_tab()
+        # 页面底部
+        create_footer()
+    return app
+# 创建应用实例
+app = create_app()
+if __name__ == "__main__":
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+        show_error=True,
+    )

assets/custom.css ADDED Viewed

	@@ -0,0 +1,96 @@

+/* Gradio Demo 自定义样式 */
+/* 主题颜色 */
+:root {
+    --primary-color: #4878A8;
+    --secondary-color: #72A86D;
+    --accent-color: #C4785C;
+    --warning-color: #D4A84C;
+    --neutral-color: #808080;
+}
+/* 标签页样式 */
+.tab-nav button {
+    font-size: 16px !important;
+    font-weight: 500 !important;
+}
+.tab-nav button.selected {
+    background-color: var(--primary-color) !important;
+    color: white !important;
+}
+/* 标题样式 */
+h1, h2, h3 {
+    color: #2c3e50 !important;
+}
+/* 卡片样式 */
+.gr-box {
+    border-radius: 8px !important;
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1) !important;
+}
+/* 按钮样式 */
+.gr-button-primary {
+    background-color: var(--primary-color) !important;
+}
+.gr-button-primary:hover {
+    background-color: #3a6a9a !important;
+}
+/* 滑块样式 */
+.gr-slider input[type="range"] {
+    accent-color: var(--primary-color);
+}
+/* 表格样式 */
+.gr-dataframe {
+    font-size: 14px !important;
+}
+.gr-dataframe th {
+    background-color: var(--primary-color) !important;
+    color: white !important;
+}
+/* 图表容器 */
+.plotly-container {
+    border-radius: 8px;
+    overflow: hidden;
+}
+/* 说明文字 */
+.description-text {
+    color: #666;
+    font-size: 14px;
+    line-height: 1.6;
+}
+/* 指标卡片 */
+.metric-card {
+    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+    border-radius: 12px;
+    padding: 20px;
+    text-align: center;
+}
+.metric-value {
+    font-size: 32px;
+    font-weight: bold;
+    color: var(--primary-color);
+}
+.metric-label {
+    font-size: 14px;
+    color: #666;
+    margin-top: 8px;
+}
+/* 响应式布局 */
+@media (max-width: 768px) {
+    .gr-row {
+        flex-direction: column !important;
+    }
+}

config.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""
+Gradio Demo 配置文件
+农村低压配电网电压异常检测项目 - HuggingFace Spaces 版本
+"""
+import os
+from pathlib import Path
+# 路径配置 (HuggingFace Spaces 兼容)
+DEMO_DIR = Path(__file__).parent
+CODE_DIR = DEMO_DIR  # 在 Spaces 中 demo 就是根目录
+PROJECT_DIR = DEMO_DIR
+# 模型路径 (使用 HuggingFace Hub)
+MODEL_DIR = DEMO_DIR / "models"
+BEST_MODEL_PATH = None  # 将从 HuggingFace Hub 下载
+MODEL_CONFIG_PATH = None
+# 数据路径
+DATASET_DIR = DEMO_DIR / "dataset"
+RURAL_VOLTAGE_DIR = DATASET_DIR / "RuralVoltage" / "realistic_v2"
+PSM_DIR = DATASET_DIR / "PSM"
+# 预计算数据路径
+PRECOMPUTED_DIR = DEMO_DIR / "precomputed"
+# SVG 文件路径
+SVG_DIR = DEMO_DIR / "docs" / "model_architectures" / "svg"
+# 模型配置
+MODEL_CONFIGS = {
+    "VoltageTimesNet_v2": {
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "d_model": 64,
+        "d_ff": 64,
+        "e_layers": 2,
+        "top_k": 5,
+        "num_kernels": 6,
+    },
+    "TimesNet": {
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "d_model": 64,
+        "d_ff": 64,
+        "e_layers": 2,
+        "top_k": 5,
+        "num_kernels": 6,
+    },
+    "DLinear": {
+        "enc_in": 16,
+        "seq_len": 100,
+        "pred_len": 100,
+        "individual": False,
+    },
+}
+# 可视化配色方案 (柔和科研风格)
+THESIS_COLORS = {
+    "primary": "#4878A8",
+    "secondary": "#72A86D",
+    "accent": "#C4785C",
+    "warning": "#D4A84C",
+    "neutral": "#808080",
+    "light_gray": "#B0B0B0",
+    "anomaly": "#E74C3C",
+    "normal": "#2ECC71",
+}
+# 模型对比颜色
+MODEL_COLORS = {
+    "VoltageTimesNet_v2": "#4878A8",
+    "VoltageTimesNet": "#72A86D",
+    "TimesNet": "#C4785C",
+    "TPATimesNet": "#D4A84C",
+    "MTSTimesNet": "#9B59B6",
+    "DLinear": "#808080",
+}
+# Gradio 主题配置
+GRADIO_THEME = "soft"
+# 推理配置
+INFERENCE_CONFIG = {
+    "batch_size": 32,
+    "device": "cpu",
+    "default_threshold": 0.5,
+}
+# 演示数据配置
+DEMO_DATA_CONFIG = {
+    "sample_length": 1000,
+    "window_size": 100,
+    "step_size": 1,
+}
+# HuggingFace Hub 配置
+HF_MODEL_REPO = "Sheldon123z/rural-voltage-detection-models"
+HF_DATASET_REPO = "Sheldon123z/rural-voltage-datasets"

core/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""
+Gradio Demo 核心模块
+"""
+from .model_loader import load_model, get_available_models
+from .data_processor import DataProcessor
+from .inference import VoltageAnomalyDetector
+__all__ = [
+    "load_model",
+    "get_available_models",
+    "DataProcessor",
+    "VoltageAnomalyDetector",
+]

core/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (447 Bytes). View file

core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (536 Bytes). View file

core/__pycache__/data_processor.cpython-310.pyc ADDED Viewed

Binary file (10.9 kB). View file

core/__pycache__/data_processor.cpython-311.pyc ADDED Viewed

Binary file (17.3 kB). View file

core/__pycache__/inference.cpython-310.pyc ADDED Viewed

Binary file (10.3 kB). View file

core/__pycache__/inference.cpython-311.pyc ADDED Viewed

Binary file (15.4 kB). View file

core/__pycache__/model_loader.cpython-310.pyc ADDED Viewed

Binary file (5.87 kB). View file

core/__pycache__/model_loader.cpython-311.pyc ADDED Viewed

Binary file (8.56 kB). View file

core/data_processor.py ADDED Viewed

	@@ -0,0 +1,417 @@

+"""
+Data Processor Module for Gradio Demo
+农村低压配电网电压异常检测项目
+Provides:
+- DataProcessor: Class for data preprocessing, normalization, and windowing
+"""
+import os
+from pathlib import Path
+from typing import Optional, Tuple, List, Union, Dict, Any
+import numpy as np
+import pandas as pd
+from sklearn.preprocessing import StandardScaler
+class DataProcessor:
+    """
+    Data preprocessing module for voltage anomaly detection.
+    Supports:
+    - CSV data loading
+    - StandardScaler normalization
+    - Sliding window segmentation
+    - Feature extraction
+    Example:
+        >>> processor = DataProcessor(seq_len=100)
+        >>> processor.fit(train_data)
+        >>> windows = processor.transform(test_data)
+    """
+    # Default feature columns for RuralVoltage dataset
+    DEFAULT_FEATURE_COLS = [
+        "Va", "Vb", "Vc",           # Three-phase voltages
+        "Ia", "Ib", "Ic",           # Three-phase currents
+        "P", "Q", "S", "PF",        # Power metrics
+        "THD_Va", "THD_Vb", "THD_Vc",  # Harmonic distortion
+        "Freq",                     # Frequency
+        "V_unbalance", "I_unbalance"  # Unbalance ratios
+    ]
+    def __init__(
+        self,
+        seq_len: int = 100,
+        step: int = 1,
+        feature_cols: Optional[List[str]] = None,
+        normalize: bool = True
+    ):
+        """
+        Initialize DataProcessor.
+        Args:
+            seq_len: Length of sliding window (default: 100)
+            step: Step size for sliding window (default: 1)
+            feature_cols: List of feature column names to use
+            normalize: Whether to apply StandardScaler normalization
+        """
+        self.seq_len = seq_len
+        self.step = step
+        self.feature_cols = feature_cols
+        self.normalize = normalize
+        self.scaler = StandardScaler() if normalize else None
+        self._is_fitted = False
+        self._n_features = None
+    def fit(self, data: Union[np.ndarray, pd.DataFrame]) -> "DataProcessor":
+        """
+        Fit the scaler on training data.
+        Args:
+            data: Training data as numpy array [N, C] or DataFrame
+        Returns:
+            self for chaining
+        """
+        data_array = self._to_numpy(data)
+        if self.normalize:
+            self.scaler.fit(data_array)
+        self._n_features = data_array.shape[1]
+        self._is_fitted = True
+        return self
+    def transform(
+        self,
+        data: Union[np.ndarray, pd.DataFrame],
+        return_windows: bool = True
+    ) -> np.ndarray:
+        """
+        Transform data using fitted scaler and optionally create windows.
+        Args:
+            data: Data to transform [N, C] or DataFrame
+            return_windows: If True, return sliding windows [num_windows, seq_len, C]
+                          If False, return normalized data [N, C]
+        Returns:
+            Transformed data
+        """
+        data_array = self._to_numpy(data)
+        # Normalize
+        if self.normalize and self._is_fitted:
+            data_array = self.scaler.transform(data_array)
+        # Create windows
+        if return_windows:
+            return self.create_windows(data_array)
+        return data_array
+    def fit_transform(
+        self,
+        data: Union[np.ndarray, pd.DataFrame],
+        return_windows: bool = True
+    ) -> np.ndarray:
+        """
+        Fit scaler and transform data.
+        Args:
+            data: Training data
+            return_windows: Whether to return sliding windows
+        Returns:
+            Transformed data
+        """
+        self.fit(data)
+        return self.transform(data, return_windows=return_windows)
+    def create_windows(
+        self,
+        data: np.ndarray,
+        step: Optional[int] = None
+    ) -> np.ndarray:
+        """
+        Create sliding windows from sequential data.
+        Args:
+            data: Input data [N, C]
+            step: Optional override for step size
+        Returns:
+            Windows array [num_windows, seq_len, C]
+        """
+        if step is None:
+            step = self.step
+        n_samples, n_features = data.shape
+        # Calculate number of windows
+        n_windows = (n_samples - self.seq_len) // step + 1
+        if n_windows <= 0:
+            raise ValueError(
+                f"Data length {n_samples} is too short for "
+                f"seq_len={self.seq_len}, step={step}"
+            )
+        # Create windows using stride tricks for efficiency
+        windows = np.zeros((n_windows, self.seq_len, n_features), dtype=np.float32)
+        for i in range(n_windows):
+            start_idx = i * step
+            windows[i] = data[start_idx:start_idx + self.seq_len]
+        return windows
+    def inverse_transform(self, data: np.ndarray) -> np.ndarray:
+        """
+        Inverse transform normalized data back to original scale.
+        Args:
+            data: Normalized data [N, C] or [B, T, C]
+        Returns:
+            Data in original scale
+        """
+        if not self.normalize or self.scaler is None:
+            return data
+        original_shape = data.shape
+        # Handle 3D input
+        if len(original_shape) == 3:
+            B, T, C = original_shape
+            data = data.reshape(-1, C)
+            data = self.scaler.inverse_transform(data)
+            data = data.reshape(B, T, C)
+        else:
+            data = self.scaler.inverse_transform(data)
+        return data
+    def _to_numpy(self, data: Union[np.ndarray, pd.DataFrame]) -> np.ndarray:
+        """Convert input to numpy array, selecting feature columns if needed."""
+        if isinstance(data, pd.DataFrame):
+            # Select feature columns
+            if self.feature_cols:
+                available_cols = [c for c in self.feature_cols if c in data.columns]
+                if available_cols:
+                    data = data[available_cols]
+                else:
+                    # Exclude common non-feature columns
+                    exclude_cols = ["timestamp", "date", "time", "label", "index"]
+                    feature_cols = [c for c in data.columns if c not in exclude_cols]
+                    data = data[feature_cols]
+            else:
+                # Use all numeric columns
+                exclude_cols = ["timestamp", "date", "time", "label", "index"]
+                feature_cols = [c for c in data.columns if c not in exclude_cols]
+                data = data[feature_cols]
+            data = data.values
+        # Handle NaN values
+        data = np.nan_to_num(data, nan=0.0)
+        return data.astype(np.float32)
+    @classmethod
+    def load_csv(
+        cls,
+        file_path: Union[str, Path],
+        feature_cols: Optional[List[str]] = None
+    ) -> Tuple[np.ndarray, Optional[np.ndarray], List[str]]:
+        """
+        Load data from CSV file.
+        Args:
+            file_path: Path to CSV file
+            feature_cols: Optional list of feature columns to use
+        Returns:
+            Tuple of (data, labels, feature_names)
+            - data: Feature values [N, C]
+            - labels: Label values [N] if 'label' column exists, else None
+            - feature_names: List of feature column names
+        """
+        file_path = Path(file_path)
+        if not file_path.exists():
+            raise FileNotFoundError(f"File not found: {file_path}")
+        df = pd.read_csv(file_path)
+        # Extract labels if present
+        labels = None
+        if "label" in df.columns:
+            labels = df["label"].values
+        # Determine feature columns
+        exclude_cols = ["timestamp", "date", "time", "label", "index", "Unnamed: 0"]
+        if feature_cols:
+            available_cols = [c for c in feature_cols if c in df.columns]
+            if available_cols:
+                use_cols = available_cols
+            else:
+                use_cols = [c for c in df.columns if c not in exclude_cols]
+        else:
+            use_cols = [c for c in df.columns if c not in exclude_cols]
+        data = df[use_cols].values
+        data = np.nan_to_num(data, nan=0.0).astype(np.float32)
+        return data, labels, use_cols
+    @classmethod
+    def load_dataset(
+        cls,
+        root_path: Union[str, Path],
+        dataset_type: str = "RuralVoltage"
+    ) -> Dict[str, Any]:
+        """
+        Load a complete dataset (train, test, labels).
+        Args:
+            root_path: Path to dataset directory
+            dataset_type: Type of dataset ("RuralVoltage", "PSM", etc.)
+        Returns:
+            Dict with train_data, test_data, test_labels, feature_names
+        """
+        root_path = Path(root_path)
+        if dataset_type == "RuralVoltage":
+            train_path = root_path / "train.csv"
+            test_path = root_path / "test.csv"
+            label_path = root_path / "test_label.csv"
+            train_data, _, feature_names = cls.load_csv(train_path)
+            test_data, _, _ = cls.load_csv(test_path, feature_cols=feature_names)
+            _, test_labels, _ = cls.load_csv(label_path)
+        elif dataset_type == "PSM":
+            train_path = root_path / "train.csv"
+            test_path = root_path / "test.csv"
+            label_path = root_path / "test_label.csv"
+            train_df = pd.read_csv(train_path)
+            test_df = pd.read_csv(test_path)
+            label_df = pd.read_csv(label_path)
+            train_data = train_df.values[:, 1:]  # Skip first column
+            test_data = test_df.values[:, 1:]
+            test_labels = label_df.values[:, 1:]
+            feature_names = list(train_df.columns[1:])
+            train_data = np.nan_to_num(train_data, nan=0.0).astype(np.float32)
+            test_data = np.nan_to_num(test_data, nan=0.0).astype(np.float32)
+        else:
+            raise ValueError(f"Unknown dataset type: {dataset_type}")
+        return {
+            "train_data": train_data,
+            "test_data": test_data,
+            "test_labels": test_labels,
+            "feature_names": feature_names
+        }
+    def get_scaler_params(self) -> Optional[Dict[str, np.ndarray]]:
+        """
+        Get scaler parameters (mean and scale).
+        Returns:
+            Dict with 'mean' and 'scale' arrays, or None if not fitted
+        """
+        if not self._is_fitted or self.scaler is None:
+            return None
+        return {
+            "mean": self.scaler.mean_,
+            "scale": self.scaler.scale_
+        }
+    def set_scaler_params(self, mean: np.ndarray, scale: np.ndarray) -> None:
+        """
+        Set scaler parameters directly (useful for loading saved parameters).
+        Args:
+            mean: Mean values for each feature
+            scale: Scale (std) values for each feature
+        """
+        if self.scaler is None:
+            self.scaler = StandardScaler()
+        self.scaler.mean_ = mean
+        self.scaler.scale_ = scale
+        self.scaler.var_ = scale ** 2
+        self.scaler.n_features_in_ = len(mean)
+        self._n_features = len(mean)
+        self._is_fitted = True
+    @property
+    def n_features(self) -> Optional[int]:
+        """Number of features."""
+        return self._n_features
+    @property
+    def is_fitted(self) -> bool:
+        """Whether the processor has been fitted."""
+        return self._is_fitted
+def preprocess_for_inference(
+    data: Union[np.ndarray, pd.DataFrame],
+    scaler_mean: Optional[np.ndarray] = None,
+    scaler_scale: Optional[np.ndarray] = None,
+    seq_len: int = 100,
+    step: int = 1
+) -> np.ndarray:
+    """
+    Convenience function to preprocess data for model inference.
+    Args:
+        data: Raw input data
+        scaler_mean: Optional pre-computed scaler mean
+        scaler_scale: Optional pre-computed scaler scale
+        seq_len: Window length
+        step: Step size
+    Returns:
+        Preprocessed windows ready for model input
+    """
+    processor = DataProcessor(seq_len=seq_len, step=step)
+    if scaler_mean is not None and scaler_scale is not None:
+        processor.set_scaler_params(scaler_mean, scaler_scale)
+        return processor.transform(data, return_windows=True)
+    else:
+        return processor.fit_transform(data, return_windows=True)
+if __name__ == "__main__":
+    # Test module
+    print("Testing DataProcessor...")
+    # Create sample data
+    np.random.seed(42)
+    sample_data = np.random.randn(1000, 16).astype(np.float32)
+    # Test processor
+    processor = DataProcessor(seq_len=100, step=1)
+    windows = processor.fit_transform(sample_data)
+    print(f"Input shape: {sample_data.shape}")
+    print(f"Windows shape: {windows.shape}")
+    print(f"Expected windows: {(1000 - 100) // 1 + 1}")
+    print(f"Is fitted: {processor.is_fitted}")
+    print(f"N features: {processor.n_features}")
+    # Test inverse transform
+    original = processor.inverse_transform(windows[:5])
+    print(f"Inverse transform shape: {original.shape}")

core/inference.py ADDED Viewed

	@@ -0,0 +1,426 @@

+"""
+Voltage Anomaly Detection Inference Module
+This module provides a high-level API for voltage anomaly detection inference.
+Supports CPU inference with torch.no_grad() optimization.
+"""
+import json
+import sys
+from argparse import Namespace
+from pathlib import Path
+from typing import Dict, Optional, Union
+import numpy as np
+import torch
+import torch.nn as nn
+# Add code directory to path for model imports
+CODE_DIR = Path(__file__).parent.parent.parent
+if str(CODE_DIR) not in sys.path:
+    sys.path.insert(0, str(CODE_DIR))
+from models import model_dict
+class VoltageAnomalyDetector:
+    """
+    High-level API for voltage anomaly detection inference.
+    This class wraps the model loading, preprocessing, and inference logic
+    for easy use in applications like Gradio demos.
+    Example:
+        >>> detector = VoltageAnomalyDetector("VoltageTimesNet_v2", checkpoint_path)
+        >>> detector.load_model()
+        >>> results = detector.predict(data, threshold=0.5)
+        >>> print(results["labels"])  # Anomaly labels (0 or 1)
+    """
+    # Default model configurations
+    DEFAULT_CONFIGS = {
+        "VoltageTimesNet_v2": {
+            "enc_in": 16,
+            "c_out": 16,
+            "seq_len": 100,
+            "d_model": 64,
+            "d_ff": 64,
+            "e_layers": 2,
+            "top_k": 5,
+            "num_kernels": 6,
+            "dropout": 0.1,
+            "embed": "fixed",
+            "freq": "h",
+            "task_name": "anomaly_detection",
+            "pred_len": 0,
+            "label_len": 0,
+        },
+        "VoltageTimesNet": {
+            "enc_in": 16,
+            "c_out": 16,
+            "seq_len": 100,
+            "d_model": 64,
+            "d_ff": 64,
+            "e_layers": 2,
+            "top_k": 5,
+            "num_kernels": 6,
+            "dropout": 0.1,
+            "embed": "fixed",
+            "freq": "h",
+            "task_name": "anomaly_detection",
+            "pred_len": 0,
+            "label_len": 0,
+        },
+        "TimesNet": {
+            "enc_in": 16,
+            "c_out": 16,
+            "seq_len": 100,
+            "d_model": 64,
+            "d_ff": 64,
+            "e_layers": 2,
+            "top_k": 5,
+            "num_kernels": 6,
+            "dropout": 0.1,
+            "embed": "fixed",
+            "freq": "h",
+            "task_name": "anomaly_detection",
+            "pred_len": 0,
+            "label_len": 0,
+        },
+        "DLinear": {
+            "enc_in": 16,
+            "c_out": 16,
+            "seq_len": 100,
+            "pred_len": 100,
+            "individual": False,
+            "task_name": "anomaly_detection",
+            "label_len": 0,
+        },
+    }
+    def __init__(
+        self,
+        model_name: str,
+        checkpoint_path: Optional[str] = None,
+        device: str = "cpu",
+        config_path: Optional[str] = None,
+    ):
+        """
+        Initialize the VoltageAnomalyDetector.
+        Args:
+            model_name: Name of the model (e.g., "VoltageTimesNet_v2", "TimesNet")
+            checkpoint_path: Path to the model checkpoint file (.pth)
+            device: Device to run inference on ("cpu" or "cuda")
+            config_path: Path to model config JSON file (optional)
+        """
+        self.model_name = model_name
+        self.checkpoint_path = checkpoint_path
+        self.device = torch.device(device)
+        self.config_path = config_path
+        self.model: Optional[nn.Module] = None
+        self.config: Dict = {}
+        self._is_loaded = False
+        # MSE criterion for reconstruction error
+        self.anomaly_criterion = nn.MSELoss(reduction='none')
+    def _load_config(self) -> Dict:
+        """
+        Load model configuration from file or use defaults.
+        Returns:
+            Dictionary containing model configuration
+        """
+        config = {}
+        # Try loading from config file first
+        if self.config_path is not None:
+            config_file = Path(self.config_path)
+            if config_file.exists():
+                with open(config_file, "r") as f:
+                    config = json.load(f)
+        # Fall back to default config
+        if not config and self.model_name in self.DEFAULT_CONFIGS:
+            config = self.DEFAULT_CONFIGS[self.model_name].copy()
+        # Ensure model name is set
+        config["model"] = self.model_name
+        return config
+    def _config_to_args(self, config: Dict) -> Namespace:
+        """
+        Convert config dictionary to argparse Namespace.
+        Args:
+            config: Configuration dictionary
+        Returns:
+            Namespace object with configuration attributes
+        """
+        # Merge with defaults
+        default_config = self.DEFAULT_CONFIGS.get(self.model_name, {})
+        merged_config = {**default_config, **config}
+        # Ensure required fields
+        merged_config.setdefault("task_name", "anomaly_detection")
+        merged_config.setdefault("embed", "fixed")
+        merged_config.setdefault("freq", "h")
+        merged_config.setdefault("dropout", 0.1)
+        merged_config.setdefault("pred_len", 0)
+        merged_config.setdefault("label_len", 0)
+        return Namespace(**merged_config)
+    def load_model(self, strict: bool = False) -> None:
+        """
+        Load the model and weights.
+        This method initializes the model architecture and loads
+        pretrained weights if a checkpoint path is provided.
+        Args:
+            strict: Whether to strictly enforce that the keys in state_dict
+                    match the keys returned by the model's state_dict function.
+                    If False, allows loading checkpoints with minor mismatches.
+                    Default is False for better compatibility.
+        Raises:
+            ValueError: If model name is not found in model registry
+            FileNotFoundError: If checkpoint file does not exist
+        """
+        # Load configuration
+        self.config = self._load_config()
+        args = self._config_to_args(self.config)
+        # Check model exists
+        if self.model_name not in model_dict:
+            available = list(model_dict.keys())
+            raise ValueError(
+                f"Model '{self.model_name}' not found. "
+                f"Available models: {available}"
+            )
+        # Build model
+        Model = model_dict[self.model_name]
+        self.model = Model(args)
+        # Load checkpoint if provided
+        if self.checkpoint_path is not None:
+            checkpoint_file = Path(self.checkpoint_path)
+            if not checkpoint_file.exists():
+                raise FileNotFoundError(
+                    f"Checkpoint file not found: {self.checkpoint_path}"
+                )
+            # Load weights
+            state_dict = torch.load(
+                self.checkpoint_path,
+                map_location=self.device,
+                weights_only=True
+            )
+            # Load state dict with optional strict mode
+            missing_keys, unexpected_keys = self.model.load_state_dict(
+                state_dict, strict=strict
+            )
+            if missing_keys:
+                print(f"Warning: Missing keys in checkpoint: {missing_keys}")
+            if unexpected_keys:
+                print(f"Warning: Unexpected keys in checkpoint: {unexpected_keys}")
+            print(f"Loaded checkpoint from: {self.checkpoint_path}")
+        # Move to device and set eval mode
+        self.model = self.model.to(self.device)
+        self.model.eval()
+        self._is_loaded = True
+        print(f"Model '{self.model_name}' loaded successfully on {self.device}")
+    def preprocess(self, data: np.ndarray) -> torch.Tensor:
+        """
+        Preprocess input data for model inference.
+        Args:
+            data: Input numpy array with shape:
+                  - (seq_len, n_features) for single sample
+                  - (batch_size, seq_len, n_features) for batch
+        Returns:
+            Preprocessed tensor with shape (batch_size, seq_len, n_features)
+        """
+        # Ensure numpy array
+        if not isinstance(data, np.ndarray):
+            data = np.array(data)
+        # Convert to float32
+        data = data.astype(np.float32)
+        # Add batch dimension if needed
+        if data.ndim == 2:
+            data = data[np.newaxis, ...]  # (1, seq_len, n_features)
+        # Validate shape
+        if data.ndim != 3:
+            raise ValueError(
+                f"Expected 2D or 3D array, got shape {data.shape}"
+            )
+        # Convert to tensor
+        tensor = torch.from_numpy(data).to(self.device)
+        return tensor
+    def get_reconstruction_error(self, data: np.ndarray) -> np.ndarray:
+        """
+        Compute reconstruction error for input data.
+        The reconstruction error is computed as the mean squared error
+        between the input and the model's reconstruction.
+        Args:
+            data: Input numpy array with shape (batch_size, seq_len, n_features)
+                  or (seq_len, n_features) for single sample
+        Returns:
+            Reconstruction error array with shape (n_samples,)
+            where n_samples = batch_size * seq_len
+        """
+        if not self._is_loaded:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+        # Preprocess data
+        batch_x = self.preprocess(data)
+        # Inference with no gradient computation
+        with torch.no_grad():
+            # Forward pass - reconstruct input
+            outputs = self.model(batch_x, None, None, None)
+            # Compute reconstruction error per sample
+            # Shape: (batch, seq_len, features) -> (batch, seq_len)
+            error = torch.mean(
+                self.anomaly_criterion(batch_x, outputs),
+                dim=-1
+            )
+            # Flatten to (n_samples,)
+            error = error.reshape(-1)
+            # Convert to numpy
+            error_np = error.cpu().numpy()
+        return error_np
+    def predict(
+        self,
+        data: np.ndarray,
+        threshold: float = 0.5,
+        return_scores: bool = True,
+    ) -> Dict[str, Union[np.ndarray, float]]:
+        """
+        Perform anomaly detection inference.
+        Args:
+            data: Input numpy array with shape (batch_size, seq_len, n_features)
+                  or (seq_len, n_features) for single sample
+            threshold: Anomaly score threshold for binary classification.
+                       Samples with scores above threshold are labeled as anomalies.
+            return_scores: Whether to return raw anomaly scores
+        Returns:
+            Dictionary containing:
+                - "scores": np.ndarray of anomaly scores (if return_scores=True)
+                - "labels": np.ndarray of binary labels (0=normal, 1=anomaly)
+                - "threshold": float threshold used for classification
+        """
+        if not self._is_loaded:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+        # Get reconstruction errors as anomaly scores
+        scores = self.get_reconstruction_error(data)
+        # Apply threshold to get binary labels
+        labels = (scores > threshold).astype(np.int32)
+        # Build result dictionary
+        result = {
+            "labels": labels,
+            "threshold": threshold,
+        }
+        if return_scores:
+            result["scores"] = scores
+        return result
+    def predict_with_percentile_threshold(
+        self,
+        data: np.ndarray,
+        anomaly_ratio: float = 1.0,
+        return_scores: bool = True,
+    ) -> Dict[str, Union[np.ndarray, float]]:
+        """
+        Perform anomaly detection using percentile-based threshold.
+        This method computes the threshold based on the anomaly ratio,
+        similar to the training evaluation approach.
+        Args:
+            data: Input numpy array
+            anomaly_ratio: Expected percentage of anomalies (e.g., 1.0 means 1%)
+            return_scores: Whether to return raw anomaly scores
+        Returns:
+            Dictionary containing scores, labels, and computed threshold
+        """
+        if not self._is_loaded:
+            raise RuntimeError("Model not loaded. Call load_model() first.")
+        # Get reconstruction errors
+        scores = self.get_reconstruction_error(data)
+        # Compute threshold using percentile
+        threshold = np.percentile(scores, 100 - anomaly_ratio)
+        # Apply threshold
+        labels = (scores > threshold).astype(np.int32)
+        result = {
+            "labels": labels,
+            "threshold": float(threshold),
+        }
+        if return_scores:
+            result["scores"] = scores
+        return result
+    @property
+    def seq_len(self) -> int:
+        """Get the expected input sequence length."""
+        return self.config.get("seq_len", 100)
+    @property
+    def n_features(self) -> int:
+        """Get the expected number of input features."""
+        return self.config.get("enc_in", 16)
+    @property
+    def is_loaded(self) -> bool:
+        """Check if the model is loaded."""
+        return self._is_loaded
+    def __repr__(self) -> str:
+        status = "loaded" if self._is_loaded else "not loaded"
+        return (
+            f"VoltageAnomalyDetector("
+            f"model={self.model_name}, "
+            f"device={self.device}, "
+            f"status={status})"
+        )

core/model_loader.py ADDED Viewed

	@@ -0,0 +1,308 @@

+"""
+Model Loader Module for Gradio Demo
+农村低压配电网电压异常检测项目
+Provides:
+- load_model(): Load a model with optional checkpoint
+- get_available_models(): List available models
+"""
+import sys
+from pathlib import Path
+from typing import Optional, Dict, List, Any
+from argparse import Namespace
+import torch
+# Add code directory to path for importing models
+CODE_DIR = Path(__file__).parent.parent.parent
+if str(CODE_DIR) not in sys.path:
+    sys.path.insert(0, str(CODE_DIR))
+from models import model_dict
+# Default model configurations for anomaly detection
+DEFAULT_MODEL_CONFIGS: Dict[str, Dict[str, Any]] = {
+    "VoltageTimesNet_v2": {
+        "task_name": "anomaly_detection",
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "pred_len": 0,
+        "label_len": 0,
+        "d_model": 64,
+        "d_ff": 64,
+        "e_layers": 2,
+        "top_k": 5,
+        "num_kernels": 6,
+        "embed": "timeF",
+        "freq": "h",
+        "dropout": 0.1,
+    },
+    "VoltageTimesNet": {
+        "task_name": "anomaly_detection",
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "pred_len": 0,
+        "label_len": 0,
+        "d_model": 64,
+        "d_ff": 64,
+        "e_layers": 2,
+        "top_k": 5,
+        "num_kernels": 6,
+        "embed": "timeF",
+        "freq": "h",
+        "dropout": 0.1,
+    },
+    "TimesNet": {
+        "task_name": "anomaly_detection",
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "pred_len": 0,
+        "label_len": 0,
+        "d_model": 64,
+        "d_ff": 64,
+        "e_layers": 2,
+        "top_k": 5,
+        "num_kernels": 6,
+        "embed": "timeF",
+        "freq": "h",
+        "dropout": 0.1,
+    },
+    "TPATimesNet": {
+        "task_name": "anomaly_detection",
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "pred_len": 0,
+        "label_len": 0,
+        "d_model": 64,
+        "d_ff": 64,
+        "e_layers": 2,
+        "top_k": 5,
+        "num_kernels": 6,
+        "embed": "timeF",
+        "freq": "h",
+        "dropout": 0.1,
+    },
+    "MTSTimesNet": {
+        "task_name": "anomaly_detection",
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "pred_len": 0,
+        "label_len": 0,
+        "d_model": 64,
+        "d_ff": 64,
+        "e_layers": 2,
+        "top_k": 5,
+        "num_kernels": 6,
+        "embed": "timeF",
+        "freq": "h",
+        "dropout": 0.1,
+    },
+    "DLinear": {
+        "task_name": "anomaly_detection",
+        "enc_in": 16,
+        "c_out": 16,
+        "seq_len": 100,
+        "pred_len": 100,  # DLinear requires pred_len = seq_len for anomaly detection
+        "individual": False,
+        "moving_avg": 25,
+    },
+}
+# Models suitable for voltage anomaly detection demo
+DEMO_MODELS = [
+    "VoltageTimesNet_v2",
+    "VoltageTimesNet",
+    "TimesNet",
+    "TPATimesNet",
+    "MTSTimesNet",
+    "DLinear",
+]
+def get_available_models() -> List[str]:
+    """
+    Get list of available models for the demo.
+    Returns:
+        List of model names that can be loaded
+    """
+    return [m for m in DEMO_MODELS if m in model_dict]
+def get_all_models() -> List[str]:
+    """
+    Get list of all registered models.
+    Returns:
+        List of all model names in model_dict
+    """
+    return list(model_dict.keys())
+def create_model_config(
+    model_name: str,
+    config_override: Optional[Dict[str, Any]] = None
+) -> Namespace:
+    """
+    Create model configuration namespace.
+    Args:
+        model_name: Name of the model
+        config_override: Optional dict to override default config
+    Returns:
+        Namespace object with model configuration
+    """
+    # Get default config or create minimal config
+    if model_name in DEFAULT_MODEL_CONFIGS:
+        config = DEFAULT_MODEL_CONFIGS[model_name].copy()
+    else:
+        # Minimal config for unknown models
+        config = {
+            "task_name": "anomaly_detection",
+            "enc_in": 16,
+            "c_out": 16,
+            "seq_len": 100,
+            "pred_len": 0,
+            "label_len": 0,
+            "d_model": 64,
+            "d_ff": 64,
+            "e_layers": 2,
+            "top_k": 5,
+            "num_kernels": 6,
+            "embed": "timeF",
+            "freq": "h",
+            "dropout": 0.1,
+        }
+    # Apply overrides
+    if config_override:
+        config.update(config_override)
+    return Namespace(**config)
+def load_model(
+    model_name: str,
+    checkpoint_path: Optional[str] = None,
+    config_override: Optional[Dict[str, Any]] = None,
+    device: str = "cpu"
+) -> torch.nn.Module:
+    """
+    Load a model with optional checkpoint.
+    Args:
+        model_name: Name of the model (e.g., "VoltageTimesNet_v2", "TimesNet")
+        checkpoint_path: Optional path to model checkpoint (.pth file)
+        config_override: Optional dict to override default model config
+        device: Device to load model on ("cpu" or "cuda")
+    Returns:
+        Loaded model in eval mode
+    Raises:
+        ValueError: If model_name is not found
+        FileNotFoundError: If checkpoint_path doesn't exist
+    Example:
+        >>> model = load_model("VoltageTimesNet_v2")
+        >>> model = load_model("TimesNet", checkpoint_path="./best_model.pth")
+        >>> model = load_model("TimesNet", config_override={"seq_len": 50})
+    """
+    # Validate model name
+    if model_name not in model_dict:
+        available = get_available_models()
+        raise ValueError(
+            f"Model '{model_name}' not found. "
+            f"Available models: {available}"
+        )
+    # Create config
+    config = create_model_config(model_name, config_override)
+    # Build model
+    Model = model_dict[model_name]
+    model = Model(config)
+    # Load checkpoint if provided
+    if checkpoint_path:
+        checkpoint_path = Path(checkpoint_path)
+        if not checkpoint_path.exists():
+            raise FileNotFoundError(f"Checkpoint not found: {checkpoint_path}")
+        state_dict = torch.load(checkpoint_path, map_location=device, weights_only=True)
+        # Handle different checkpoint formats
+        if isinstance(state_dict, dict) and "model_state_dict" in state_dict:
+            state_dict = state_dict["model_state_dict"]
+        # Load state dict with strict=False to handle minor mismatches
+        model.load_state_dict(state_dict, strict=False)
+        print(f"Loaded checkpoint from: {checkpoint_path}")
+    # Move to device and set to eval mode
+    model = model.to(device)
+    model.eval()
+    return model
+def get_model_info(model_name: str) -> Dict[str, Any]:
+    """
+    Get information about a model.
+    Args:
+        model_name: Name of the model
+    Returns:
+        Dict with model information
+    """
+    if model_name not in model_dict:
+        return {"error": f"Model '{model_name}' not found"}
+    config = DEFAULT_MODEL_CONFIGS.get(model_name, {})
+    info = {
+        "name": model_name,
+        "available": True,
+        "config": config,
+        "description": _get_model_description(model_name),
+    }
+    return info
+def _get_model_description(model_name: str) -> str:
+    """Get model description."""
+    descriptions = {
+        "VoltageTimesNet_v2": "Enhanced TimesNet with recall optimization for voltage anomaly detection",
+        "VoltageTimesNet": "TimesNet variant with preset periods for voltage patterns",
+        "TimesNet": "FFT-based period discovery with 2D convolution for temporal patterns",
+        "TPATimesNet": "Three-Phase Attention TimesNet for multi-phase voltage analysis",
+        "MTSTimesNet": "Multi-scale Temporal TimesNet for multi-resolution patterns",
+        "DLinear": "Lightweight linear model with trend-seasonal decomposition",
+    }
+    return descriptions.get(model_name, "No description available")
+if __name__ == "__main__":
+    # Test module
+    print("Available models:", get_available_models())
+    for model_name in get_available_models():
+        print(f"\nLoading {model_name}...")
+        try:
+            model = load_model(model_name)
+            # Count parameters
+            params = sum(p.numel() for p in model.parameters())
+            print(f"  - Parameters: {params:,}")
+            print(f"  - Config: {get_model_info(model_name)['config']}")
+        except Exception as e:
+            print(f"  - Error: {e}")

docs/model_architectures/svg/01_TimesNet.svg ADDED Viewed

docs/model_architectures/svg/02_VoltageTimesNet.svg ADDED Viewed

docs/model_architectures/svg/03_VoltageTimesNet_v2.svg ADDED Viewed

docs/model_architectures/svg/04_TPATimesNet.svg ADDED Viewed

docs/model_architectures/svg/05_MTSTimesNet.svg ADDED Viewed

docs/model_architectures/svg/06_DLinear.svg ADDED Viewed

layers/AutoCorrelation.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+AutoCorrelation Mechanism for Autoformer.
+Period-based dependencies discovery with time delay aggregation.
+"""
+import math
+import torch
+import torch.nn as nn
+class AutoCorrelation(nn.Module):
+    """
+    AutoCorrelation Mechanism with:
+    (1) period-based dependencies discovery
+    (2) time delay aggregation
+    This block can replace the self-attention family mechanism seamlessly.
+    """
+    def __init__(
+        self,
+        mask_flag=True,
+        factor=1,
+        scale=None,
+        attention_dropout=0.1,
+        output_attention=False,
+    ):
+        super(AutoCorrelation, self).__init__()
+        self.factor = factor
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+    def time_delay_agg_training(self, values, corr):
+        """SpeedUp version for training phase."""
+        head = values.shape[1]
+        channel = values.shape[2]
+        length = values.shape[3]
+        top_k = int(self.factor * math.log(length))
+        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
+        index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
+        weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
+        tmp_corr = torch.softmax(weights, dim=-1)
+        tmp_values = values
+        delays_agg = torch.zeros_like(values).float()
+        for i in range(top_k):
+            pattern = torch.roll(tmp_values, -int(index[i]), -1)
+            delays_agg = delays_agg + pattern * (
+                tmp_corr[:, i]
+                .unsqueeze(1)
+                .unsqueeze(1)
+                .unsqueeze(1)
+                .repeat(1, head, channel, length)
+            )
+        return delays_agg
+    def time_delay_agg_inference(self, values, corr):
+        """SpeedUp version for inference phase."""
+        batch = values.shape[0]
+        head = values.shape[1]
+        channel = values.shape[2]
+        length = values.shape[3]
+        init_index = (
+            torch.arange(length)
+            .unsqueeze(0)
+            .unsqueeze(0)
+            .unsqueeze(0)
+            .repeat(batch, head, channel, 1)
+            .to(values.device)
+        )
+        top_k = int(self.factor * math.log(length))
+        mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
+        weights, delay = torch.topk(mean_value, top_k, dim=-1)
+        tmp_corr = torch.softmax(weights, dim=-1)
+        tmp_values = values.repeat(1, 1, 1, 2)
+        delays_agg = torch.zeros_like(values).float()
+        for i in range(top_k):
+            tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(
+                1
+            ).repeat(1, head, channel, length)
+            pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
+            delays_agg = delays_agg + pattern * (
+                tmp_corr[:, i]
+                .unsqueeze(1)
+                .unsqueeze(1)
+                .unsqueeze(1)
+                .repeat(1, head, channel, length)
+            )
+        return delays_agg
+    def time_delay_agg_full(self, values, corr):
+        """Standard version of Autocorrelation."""
+        batch = values.shape[0]
+        head = values.shape[1]
+        channel = values.shape[2]
+        length = values.shape[3]
+        init_index = (
+            torch.arange(length)
+            .unsqueeze(0)
+            .unsqueeze(0)
+            .unsqueeze(0)
+            .repeat(batch, head, channel, 1)
+            .to(values.device)
+        )
+        top_k = int(self.factor * math.log(length))
+        weights, delay = torch.topk(corr, top_k, dim=-1)
+        tmp_corr = torch.softmax(weights, dim=-1)
+        tmp_values = values.repeat(1, 1, 1, 2)
+        delays_agg = torch.zeros_like(values).float()
+        for i in range(top_k):
+            tmp_delay = init_index + delay[..., i].unsqueeze(-1)
+            pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
+            delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
+        return delays_agg
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        if L > S:
+            zeros = torch.zeros_like(queries[:, : (L - S), :]).float()
+            values = torch.cat([values, zeros], dim=1)
+            keys = torch.cat([keys, zeros], dim=1)
+        else:
+            values = values[:, :L, :, :]
+            keys = keys[:, :L, :, :]
+        # period-based dependencies
+        q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
+        k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
+        res = q_fft * torch.conj(k_fft)
+        corr = torch.fft.irfft(res, dim=-1)
+        # time delay agg
+        if self.training:
+            V = self.time_delay_agg_training(
+                values.permute(0, 2, 3, 1).contiguous(), corr
+            ).permute(0, 3, 1, 2)
+        else:
+            V = self.time_delay_agg_inference(
+                values.permute(0, 2, 3, 1).contiguous(), corr
+            ).permute(0, 3, 1, 2)
+        if self.output_attention:
+            return (V.contiguous(), corr.permute(0, 3, 1, 2))
+        else:
+            return (V.contiguous(), None)
+class AutoCorrelationLayer(nn.Module):
+    """AutoCorrelation layer with projections."""
+    def __init__(self, correlation, d_model, n_heads, d_keys=None, d_values=None):
+        super(AutoCorrelationLayer, self).__init__()
+        d_keys = d_keys or (d_model // n_heads)
+        d_values = d_values or (d_model // n_heads)
+        self.inner_correlation = correlation
+        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model, d_values * n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+        self.n_heads = n_heads
+    def forward(self, queries, keys, values, attn_mask):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_heads
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+        out, attn = self.inner_correlation(queries, keys, values, attn_mask)
+        out = out.view(B, L, -1)
+        return self.out_projection(out), attn

layers/Autoformer_EncDec.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""
+Autoformer Encoder-Decoder with Series Decomposition.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class my_Layernorm(nn.Module):
+    """Special designed layernorm for the seasonal part."""
+    def __init__(self, channels):
+        super(my_Layernorm, self).__init__()
+        self.layernorm = nn.LayerNorm(channels)
+    def forward(self, x):
+        x_hat = self.layernorm(x)
+        bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
+        return x_hat - bias
+class moving_avg(nn.Module):
+    """Moving average block to highlight the trend of time series."""
+    def __init__(self, kernel_size, stride):
+        super(moving_avg, self).__init__()
+        self.kernel_size = kernel_size
+        self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
+    def forward(self, x):
+        front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
+        end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
+        x = torch.cat([front, x, end], dim=1)
+        x = self.avg(x.permute(0, 2, 1))
+        x = x.permute(0, 2, 1)
+        return x
+class series_decomp(nn.Module):
+    """Series decomposition block."""
+    def __init__(self, kernel_size):
+        super(series_decomp, self).__init__()
+        self.moving_avg = moving_avg(kernel_size, stride=1)
+    def forward(self, x):
+        moving_mean = self.moving_avg(x)
+        res = x - moving_mean
+        return res, moving_mean
+class series_decomp_multi(nn.Module):
+    """Multiple Series decomposition block from FEDformer."""
+    def __init__(self, kernel_size):
+        super(series_decomp_multi, self).__init__()
+        self.kernel_size = kernel_size
+        self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
+    def forward(self, x):
+        moving_mean = []
+        res = []
+        for func in self.series_decomp:
+            sea, moving_avg = func(x)
+            moving_mean.append(moving_avg)
+            res.append(sea)
+        sea = sum(res) / len(res)
+        moving_mean = sum(moving_mean) / len(moving_mean)
+        return sea, moving_mean
+class EncoderLayer(nn.Module):
+    """Autoformer encoder layer with progressive decomposition."""
+    def __init__(
+        self,
+        attention,
+        d_model,
+        d_ff=None,
+        moving_avg=25,
+        dropout=0.1,
+        activation="relu",
+    ):
+        super(EncoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.attention = attention
+        self.conv1 = nn.Conv1d(
+            in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False
+        )
+        self.conv2 = nn.Conv1d(
+            in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False
+        )
+        self.decomp1 = series_decomp(moving_avg)
+        self.decomp2 = series_decomp(moving_avg)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+    def forward(self, x, attn_mask=None):
+        new_x, attn = self.attention(x, x, x, attn_mask=attn_mask)
+        x = x + self.dropout(new_x)
+        x, _ = self.decomp1(x)
+        y = x
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+        res, _ = self.decomp2(x + y)
+        return res, attn
+class Encoder(nn.Module):
+    """Autoformer encoder."""
+    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
+        super(Encoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.conv_layers = (
+            nn.ModuleList(conv_layers) if conv_layers is not None else None
+        )
+        self.norm = norm_layer
+    def forward(self, x, attn_mask=None):
+        attns = []
+        if self.conv_layers is not None:
+            for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
+                x, attn = attn_layer(x, attn_mask=attn_mask)
+                x = conv_layer(x)
+                attns.append(attn)
+            x, attn = self.attn_layers[-1](x)
+            attns.append(attn)
+        else:
+            for attn_layer in self.attn_layers:
+                x, attn = attn_layer(x, attn_mask=attn_mask)
+                attns.append(attn)
+        if self.norm is not None:
+            x = self.norm(x)
+        return x, attns
+class DecoderLayer(nn.Module):
+    """Autoformer decoder layer with progressive decomposition."""
+    def __init__(
+        self,
+        self_attention,
+        cross_attention,
+        d_model,
+        c_out,
+        d_ff=None,
+        moving_avg=25,
+        dropout=0.1,
+        activation="relu",
+    ):
+        super(DecoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.conv1 = nn.Conv1d(
+            in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False
+        )
+        self.conv2 = nn.Conv1d(
+            in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False
+        )
+        self.decomp1 = series_decomp(moving_avg)
+        self.decomp2 = series_decomp(moving_avg)
+        self.decomp3 = series_decomp(moving_avg)
+        self.dropout = nn.Dropout(dropout)
+        self.projection = nn.Conv1d(
+            in_channels=d_model,
+            out_channels=c_out,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            padding_mode="circular",
+            bias=False,
+        )
+        self.activation = F.relu if activation == "relu" else F.gelu
+    def forward(self, x, cross, x_mask=None, cross_mask=None):
+        x = x + self.dropout(self.self_attention(x, x, x, attn_mask=x_mask)[0])
+        x, trend1 = self.decomp1(x)
+        x = x + self.dropout(
+            self.cross_attention(x, cross, cross, attn_mask=cross_mask)[0]
+        )
+        x, trend2 = self.decomp2(x)
+        y = x
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+        x, trend3 = self.decomp3(x + y)
+        residual_trend = trend1 + trend2 + trend3
+        residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(
+            1, 2
+        )
+        return x, residual_trend
+class Decoder(nn.Module):
+    """Autoformer decoder."""
+    def __init__(self, layers, norm_layer=None, projection=None):
+        super(Decoder, self).__init__()
+        self.layers = nn.ModuleList(layers)
+        self.norm = norm_layer
+        self.projection = projection
+    def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
+        for layer in self.layers:
+            x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
+            trend = trend + residual_trend
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.projection is not None:
+            x = self.projection(x)
+        return x, trend

layers/Conv_Blocks.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""
+Convolution Blocks for Voltage Anomaly Detection
+Standalone version - independent from main TSLib
+"""
+import torch
+import torch.nn as nn
+class Inception_Block_V1(nn.Module):
+    """Inception block with multiple kernel sizes for TimesNet."""
+    def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
+        super(Inception_Block_V1, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_kernels = num_kernels
+        kernels = []
+        for i in range(self.num_kernels):
+            kernels.append(
+                nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i)
+            )
+        self.kernels = nn.ModuleList(kernels)
+        if init_weight:
+            self._initialize_weights()
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        res_list = []
+        for i in range(self.num_kernels):
+            res_list.append(self.kernels[i](x))
+        res = torch.stack(res_list, dim=-1).mean(-1)
+        return res
+class Inception_Block_V2(nn.Module):
+    """Inception block V2 with separable convolutions."""
+    def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
+        super(Inception_Block_V2, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.num_kernels = num_kernels
+        kernels = []
+        for i in range(self.num_kernels // 2):
+            kernels.append(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size=[1, 2 * i + 3],
+                    padding=[0, i + 1],
+                )
+            )
+            kernels.append(
+                nn.Conv2d(
+                    in_channels,
+                    out_channels,
+                    kernel_size=[2 * i + 3, 1],
+                    padding=[i + 1, 0],
+                )
+            )
+        kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1))
+        self.kernels = nn.ModuleList(kernels)
+        if init_weight:
+            self._initialize_weights()
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def forward(self, x):
+        res_list = []
+        for i in range(self.num_kernels // 2 * 2 + 1):
+            res_list.append(self.kernels[i](x))
+        res = torch.stack(res_list, dim=-1).mean(-1)
+        return res

layers/Embed.py ADDED Viewed

	@@ -0,0 +1,229 @@

+"""
+Embedding Layers for Voltage Anomaly Detection
+Standalone version - independent from main TSLib
+"""
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PositionalEmbedding(nn.Module):
+    """Positional encoding using sinusoidal functions."""
+    def __init__(self, d_model, max_len=5000):
+        super(PositionalEmbedding, self).__init__()
+        # Compute the positional encodings once in log space
+        pe = torch.zeros(max_len, d_model).float()
+        pe.require_grad = False
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        div_term = (
+            torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)
+        ).exp()
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0)
+        self.register_buffer("pe", pe)
+    def forward(self, x):
+        return self.pe[:, : x.size(1)]
+class TokenEmbedding(nn.Module):
+    """Token embedding using 1D convolution."""
+    def __init__(self, c_in, d_model):
+        super(TokenEmbedding, self).__init__()
+        padding = 1 if torch.__version__ >= "1.5.0" else 2
+        self.tokenConv = nn.Conv1d(
+            in_channels=c_in,
+            out_channels=d_model,
+            kernel_size=3,
+            padding=padding,
+            padding_mode="circular",
+            bias=False,
+        )
+        for m in self.modules():
+            if isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_in", nonlinearity="leaky_relu"
+                )
+    def forward(self, x):
+        x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
+        return x
+class FixedEmbedding(nn.Module):
+    """Fixed embedding (non-trainable) using sinusoidal functions."""
+    def __init__(self, c_in, d_model):
+        super(FixedEmbedding, self).__init__()
+        w = torch.zeros(c_in, d_model).float()
+        w.require_grad = False
+        position = torch.arange(0, c_in).float().unsqueeze(1)
+        div_term = (
+            torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)
+        ).exp()
+        w[:, 0::2] = torch.sin(position * div_term)
+        w[:, 1::2] = torch.cos(position * div_term)
+        self.emb = nn.Embedding(c_in, d_model)
+        self.emb.weight = nn.Parameter(w, requires_grad=False)
+    def forward(self, x):
+        return self.emb(x).detach()
+class TemporalEmbedding(nn.Module):
+    """Temporal embedding for time features."""
+    def __init__(self, d_model, embed_type="fixed", freq="h"):
+        super(TemporalEmbedding, self).__init__()
+        minute_size = 4
+        hour_size = 24
+        weekday_size = 7
+        day_size = 32
+        month_size = 13
+        Embed = FixedEmbedding if embed_type == "fixed" else nn.Embedding
+        if freq == "t":
+            self.minute_embed = Embed(minute_size, d_model)
+        self.hour_embed = Embed(hour_size, d_model)
+        self.weekday_embed = Embed(weekday_size, d_model)
+        self.day_embed = Embed(day_size, d_model)
+        self.month_embed = Embed(month_size, d_model)
+    def forward(self, x):
+        x = x.long()
+        minute_x = (
+            self.minute_embed(x[:, :, 4]) if hasattr(self, "minute_embed") else 0.0
+        )
+        hour_x = self.hour_embed(x[:, :, 3])
+        weekday_x = self.weekday_embed(x[:, :, 2])
+        day_x = self.day_embed(x[:, :, 1])
+        month_x = self.month_embed(x[:, :, 0])
+        return hour_x + weekday_x + day_x + month_x + minute_x
+class TimeFeatureEmbedding(nn.Module):
+    """Time feature embedding using linear projection."""
+    def __init__(self, d_model, embed_type="timeF", freq="h"):
+        super(TimeFeatureEmbedding, self).__init__()
+        freq_map = {"h": 4, "t": 5, "s": 6, "m": 1, "a": 1, "w": 2, "d": 3, "b": 3}
+        d_inp = freq_map[freq]
+        self.embed = nn.Linear(d_inp, d_model, bias=False)
+    def forward(self, x):
+        return self.embed(x)
+class DataEmbedding(nn.Module):
+    """Complete data embedding with value, position, and temporal components."""
+    def __init__(self, c_in, d_model, embed_type="fixed", freq="h", dropout=0.1):
+        super(DataEmbedding, self).__init__()
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = (
+            TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+            if embed_type != "timeF"
+            else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+        )
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x) + self.position_embedding(x)
+        else:
+            x = (
+                self.value_embedding(x)
+                + self.temporal_embedding(x_mark)
+                + self.position_embedding(x)
+            )
+        return self.dropout(x)
+class DataEmbedding_inverted(nn.Module):
+    """Inverted data embedding (channel-independent)."""
+    def __init__(self, c_in, d_model, embed_type="fixed", freq="h", dropout=0.1):
+        super(DataEmbedding_inverted, self).__init__()
+        self.value_embedding = nn.Linear(c_in, d_model)
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, x, x_mark):
+        x = x.permute(0, 2, 1)
+        # x: [Batch, Variate, Time]
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
+        # x: [Batch, Variate, d_model]
+        return self.dropout(x)
+class DataEmbedding_wo_pos(nn.Module):
+    """Data embedding without positional encoding."""
+    def __init__(self, c_in, d_model, embed_type="fixed", freq="h", dropout=0.1):
+        super(DataEmbedding_wo_pos, self).__init__()
+        self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
+        self.position_embedding = PositionalEmbedding(d_model=d_model)
+        self.temporal_embedding = (
+            TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+            if embed_type != "timeF"
+            else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
+        )
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, x, x_mark):
+        if x_mark is None:
+            x = self.value_embedding(x)
+        else:
+            x = self.value_embedding(x) + self.temporal_embedding(x_mark)
+        return self.dropout(x)
+class PatchEmbedding(nn.Module):
+    """Patch-based embedding for time series."""
+    def __init__(self, d_model, patch_len, stride, padding, dropout):
+        super(PatchEmbedding, self).__init__()
+        # Patching
+        self.patch_len = patch_len
+        self.stride = stride
+        self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
+        # Input encoding: projection of feature vectors onto a d-dim vector space
+        self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
+        # Positional embedding
+        self.position_embedding = PositionalEmbedding(d_model)
+        # Residual dropout
+        self.dropout = nn.Dropout(dropout)
+    def forward(self, x):
+        # do patching
+        n_vars = x.shape[1]
+        x = self.padding_patch_layer(x)
+        x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
+        x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
+        # Input encoding
+        x = self.value_embedding(x) + self.position_embedding(x)
+        return self.dropout(x), n_vars

layers/SelfAttention_Family.py ADDED Viewed

	@@ -0,0 +1,378 @@

+"""
+Self-Attention Family for Time Series Models.
+Includes:
+- FullAttention: Standard scaled dot-product attention
+- ProbAttention: Informer's ProbSparse attention
+- DSAttention: De-stationary attention
+- AttentionLayer: Attention wrapper with projections
+- ReformerLayer: LSH attention (requires reformer_pytorch)
+- TwoStageAttentionLayer: Crossformer's two-stage attention
+"""
+from math import sqrt
+import numpy as np
+import torch
+import torch.nn as nn
+from utils.masking import ProbMask, TriangularCausalMask
+class DSAttention(nn.Module):
+    """De-stationary Attention"""
+    def __init__(
+        self,
+        mask_flag=True,
+        factor=5,
+        scale=None,
+        attention_dropout=0.1,
+        output_attention=False,
+    ):
+        super(DSAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1.0 / sqrt(E)
+        tau = 1.0 if tau is None else tau.unsqueeze(1).unsqueeze(1)
+        delta = 0.0 if delta is None else delta.unsqueeze(1).unsqueeze(1)
+        # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = TriangularCausalMask(B, L, device=queries.device)
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+        if self.output_attention:
+            return V.contiguous(), A
+        else:
+            return V.contiguous(), None
+class FullAttention(nn.Module):
+    """Standard scaled dot-product attention."""
+    def __init__(
+        self,
+        mask_flag=True,
+        factor=5,
+        scale=None,
+        attention_dropout=0.1,
+        output_attention=False,
+    ):
+        super(FullAttention, self).__init__()
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, H, E = queries.shape
+        _, S, _, D = values.shape
+        scale = self.scale or 1.0 / sqrt(E)
+        scores = torch.einsum("blhe,bshe->bhls", queries, keys)
+        if self.mask_flag:
+            if attn_mask is None:
+                attn_mask = TriangularCausalMask(B, L, device=queries.device)
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+        A = self.dropout(torch.softmax(scale * scores, dim=-1))
+        V = torch.einsum("bhls,bshd->blhd", A, values)
+        if self.output_attention:
+            return V.contiguous(), A
+        else:
+            return V.contiguous(), None
+class ProbAttention(nn.Module):
+    """Informer's ProbSparse self-attention mechanism."""
+    def __init__(
+        self,
+        mask_flag=True,
+        factor=5,
+        scale=None,
+        attention_dropout=0.1,
+        output_attention=False,
+    ):
+        super(ProbAttention, self).__init__()
+        self.factor = factor
+        self.scale = scale
+        self.mask_flag = mask_flag
+        self.output_attention = output_attention
+        self.dropout = nn.Dropout(attention_dropout)
+    def _prob_QK(self, Q, K, sample_k, n_top):
+        B, H, L_K, E = K.shape
+        _, _, L_Q, _ = Q.shape
+        K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
+        index_sample = torch.randint(L_K, (L_Q, sample_k))
+        K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
+        Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
+        M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
+        M_top = M.topk(n_top, sorted=False)[1]
+        Q_reduce = Q[
+            torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], M_top, :
+        ]
+        Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))
+        return Q_K, M_top
+    def _get_initial_context(self, V, L_Q):
+        B, H, L_V, D = V.shape
+        if not self.mask_flag:
+            V_sum = V.mean(dim=-2)
+            contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
+        else:
+            assert L_Q == L_V
+            contex = V.cumsum(dim=-2)
+        return contex
+    def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
+        B, H, L_V, D = V.shape
+        if self.mask_flag:
+            attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
+            scores.masked_fill_(attn_mask.mask, -np.inf)
+        attn = torch.softmax(scores, dim=-1)
+        context_in[
+            torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
+        ] = torch.matmul(attn, V).type_as(context_in)
+        if self.output_attention:
+            attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
+            attns[
+                torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :
+            ] = attn
+            return context_in, attns
+        else:
+            return context_in, None
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L_Q, H, D = queries.shape
+        _, L_K, _, _ = keys.shape
+        queries = queries.transpose(2, 1)
+        keys = keys.transpose(2, 1)
+        values = values.transpose(2, 1)
+        U_part = self.factor * np.ceil(np.log(L_K)).astype("int").item()
+        u = self.factor * np.ceil(np.log(L_Q)).astype("int").item()
+        U_part = U_part if U_part < L_K else L_K
+        u = u if u < L_Q else L_Q
+        scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
+        scale = self.scale or 1.0 / sqrt(D)
+        if scale is not None:
+            scores_top = scores_top * scale
+        context = self._get_initial_context(values, L_Q)
+        context, attn = self._update_context(
+            context, values, scores_top, index, L_Q, attn_mask
+        )
+        return context.contiguous(), attn
+class AttentionLayer(nn.Module):
+    """Attention wrapper with input/output projections."""
+    def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None):
+        super(AttentionLayer, self).__init__()
+        d_keys = d_keys or (d_model // n_heads)
+        d_values = d_values or (d_model // n_heads)
+        self.inner_attention = attention
+        self.query_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.key_projection = nn.Linear(d_model, d_keys * n_heads)
+        self.value_projection = nn.Linear(d_model, d_values * n_heads)
+        self.out_projection = nn.Linear(d_values * n_heads, d_model)
+        self.n_heads = n_heads
+    def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
+        B, L, _ = queries.shape
+        _, S, _ = keys.shape
+        H = self.n_heads
+        queries = self.query_projection(queries).view(B, L, H, -1)
+        keys = self.key_projection(keys).view(B, S, H, -1)
+        values = self.value_projection(values).view(B, S, H, -1)
+        out, attn = self.inner_attention(
+            queries, keys, values, attn_mask, tau=tau, delta=delta
+        )
+        out = out.view(B, L, -1)
+        return self.out_projection(out), attn
+class ReformerLayer(nn.Module):
+    """LSH Self-Attention layer (Reformer)."""
+    def __init__(
+        self,
+        attention,
+        d_model,
+        n_heads,
+        d_keys=None,
+        d_values=None,
+        causal=False,
+        bucket_size=4,
+        n_hashes=4,
+    ):
+        super().__init__()
+        self.bucket_size = bucket_size
+        # Note: requires reformer_pytorch package
+        try:
+            from reformer_pytorch import LSHSelfAttention
+            self.attn = LSHSelfAttention(
+                dim=d_model,
+                heads=n_heads,
+                bucket_size=bucket_size,
+                n_hashes=n_hashes,
+                causal=causal,
+            )
+        except ImportError:
+            raise ImportError(
+                "ReformerLayer requires reformer_pytorch. Install with: pip install reformer_pytorch"
+            )
+    def fit_length(self, queries):
+        B, N, C = queries.shape
+        if N % (self.bucket_size * 2) == 0:
+            return queries
+        else:
+            fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
+            return torch.cat(
+                [queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1
+            )
+    def forward(self, queries, keys, values, attn_mask, tau, delta):
+        B, N, C = queries.shape
+        queries = self.attn(self.fit_length(queries))[:, :N, :]
+        return queries, None
+class TwoStageAttentionLayer(nn.Module):
+    """
+    Two Stage Attention (TSA) Layer for Crossformer.
+    input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model]
+    """
+    def __init__(
+        self, configs, seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1
+    ):
+        super(TwoStageAttentionLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.time_attention = AttentionLayer(
+            FullAttention(
+                False,
+                configs.factor,
+                attention_dropout=configs.dropout,
+                output_attention=False,
+            ),
+            d_model,
+            n_heads,
+        )
+        self.dim_sender = AttentionLayer(
+            FullAttention(
+                False,
+                configs.factor,
+                attention_dropout=configs.dropout,
+                output_attention=False,
+            ),
+            d_model,
+            n_heads,
+        )
+        self.dim_receiver = AttentionLayer(
+            FullAttention(
+                False,
+                configs.factor,
+                attention_dropout=configs.dropout,
+                output_attention=False,
+            ),
+            d_model,
+            n_heads,
+        )
+        self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
+        self.dropout = nn.Dropout(dropout)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.norm4 = nn.LayerNorm(d_model)
+        self.MLP1 = nn.Sequential(
+            nn.Linear(d_model, d_ff), nn.GELU(), nn.Linear(d_ff, d_model)
+        )
+        self.MLP2 = nn.Sequential(
+            nn.Linear(d_model, d_ff), nn.GELU(), nn.Linear(d_ff, d_model)
+        )
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        try:
+            from einops import rearrange, repeat
+        except ImportError:
+            raise ImportError(
+                "TwoStageAttentionLayer requires einops. Install with: pip install einops"
+            )
+        batch = x.shape[0]
+        time_in = rearrange(x, "b ts_d seg_num d_model -> (b ts_d) seg_num d_model")
+        time_enc, attn = self.time_attention(
+            time_in, time_in, time_in, attn_mask=None, tau=None, delta=None
+        )
+        dim_in = time_in + self.dropout(time_enc)
+        dim_in = self.norm1(dim_in)
+        dim_in = dim_in + self.dropout(self.MLP1(dim_in))
+        dim_in = self.norm2(dim_in)
+        dim_send = rearrange(
+            dim_in, "(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model", b=batch
+        )
+        batch_router = repeat(
+            self.router,
+            "seg_num factor d_model -> (repeat seg_num) factor d_model",
+            repeat=batch,
+        )
+        dim_buffer, attn = self.dim_sender(
+            batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None
+        )
+        dim_receive, attn = self.dim_receiver(
+            dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None
+        )
+        dim_enc = dim_send + self.dropout(dim_receive)
+        dim_enc = self.norm3(dim_enc)
+        dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc))
+        dim_enc = self.norm4(dim_enc)
+        final_out = rearrange(
+            dim_enc, "(b seg_num) ts_d d_model -> b ts_d seg_num d_model", b=batch
+        )
+        return final_out

layers/StandardNorm.py ADDED Viewed

	@@ -0,0 +1,85 @@

+"""
+StandardNorm / Normalize layer (RevIN-style normalization)
+"""
+import torch
+import torch.nn as nn
+class Normalize(nn.Module):
+    """
+    Reversible Instance Normalization for time series.
+    """
+    def __init__(
+        self,
+        num_features: int,
+        eps=1e-5,
+        affine=False,
+        subtract_last=False,
+        non_norm=False,
+    ):
+        """
+        :param num_features: the number of features or channels
+        :param eps: a value added for numerical stability
+        :param affine: if True, RevIN has learnable affine parameters
+        """
+        super(Normalize, self).__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.affine = affine
+        self.subtract_last = subtract_last
+        self.non_norm = non_norm
+        if self.affine:
+            self._init_params()
+    def forward(self, x, mode: str):
+        if mode == "norm":
+            self._get_statistics(x)
+            x = self._normalize(x)
+        elif mode == "denorm":
+            x = self._denormalize(x)
+        else:
+            raise NotImplementedError
+        return x
+    def _init_params(self):
+        # initialize RevIN params: (C,)
+        self.affine_weight = nn.Parameter(torch.ones(self.num_features))
+        self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
+    def _get_statistics(self, x):
+        dim2reduce = tuple(range(1, x.ndim - 1))
+        if self.subtract_last:
+            self.last = x[:, -1, :].unsqueeze(1)
+        else:
+            self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
+        self.stdev = torch.sqrt(
+            torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps
+        ).detach()
+    def _normalize(self, x):
+        if self.non_norm:
+            return x
+        if self.subtract_last:
+            x = x - self.last
+        else:
+            x = x - self.mean
+        x = x / self.stdev
+        if self.affine:
+            x = x * self.affine_weight
+            x = x + self.affine_bias
+        return x
+    def _denormalize(self, x):
+        if self.non_norm:
+            return x
+        if self.affine:
+            x = x - self.affine_bias
+            x = x / (self.affine_weight + self.eps * self.eps)
+        x = x * self.stdev
+        if self.subtract_last:
+            x = x + self.last
+        else:
+            x = x + self.mean
+        return x

layers/ThreePhaseAttention.py ADDED Viewed

	@@ -0,0 +1,504 @@

+"""
+ThreePhaseAttention: Attention Mechanisms for Three-Phase Power Systems
+This module provides attention mechanisms specifically designed for analyzing
+three-phase voltage signals in power grid anomaly detection:
+1. InterPhaseAttention: Captures relationships between Va, Vb, Vc phases
+2. SymmetricalComponentAttention: Analyzes positive/negative/zero sequences
+3. TransientAttention: Multi-scale attention for transient event detection
+4. VoltageChannelAttention: Channel-wise attention for voltage features
+Key concepts:
+- In balanced systems, Va, Vb, Vc have 120° phase difference
+- Unbalance indicates issues (asymmetric loads, faults)
+- Symmetrical components help diagnose fault types
+"""
+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class InterPhaseAttention(nn.Module):
+    """
+    Attention mechanism that captures inter-phase relationships.
+    In three-phase systems:
+    - Normal: Va, Vb, Vc are balanced with 120° phase shifts
+    - Fault: Phase relationships deviate from normal patterns
+    This attention helps detect anomalies by modeling phase interactions.
+    """
+    def __init__(self, d_model, n_heads=4, dropout=0.1, num_phases=3):
+        """
+        Args:
+            d_model: Model dimension
+            n_heads: Number of attention heads
+            dropout: Dropout rate
+            num_phases: Number of phases (default 3 for three-phase)
+        """
+        super(InterPhaseAttention, self).__init__()
+        self.d_model = d_model
+        self.n_heads = n_heads
+        self.num_phases = num_phases
+        self.d_k = d_model // n_heads
+        # Query, Key, Value projections for each phase
+        self.W_q = nn.Linear(d_model, d_model)
+        self.W_k = nn.Linear(d_model, d_model)
+        self.W_v = nn.Linear(d_model, d_model)
+        # Phase-specific transformations
+        self.phase_transforms = nn.ModuleList(
+            [nn.Linear(d_model, d_model) for _ in range(num_phases)]
+        )
+        # Output projection
+        self.W_o = nn.Linear(d_model, d_model)
+        # Phase relationship encoding (learnable)
+        # Initialize with 120° phase shifts
+        phase_angles = torch.tensor([0, 2 * math.pi / 3, 4 * math.pi / 3])
+        self.register_buffer("phase_angles", phase_angles)
+        self.phase_bias = nn.Parameter(torch.zeros(num_phases, num_phases))
+        self.dropout = nn.Dropout(dropout)
+        self.layer_norm = nn.LayerNorm(d_model)
+    def forward(self, x, phase_mask=None):
+        """
+        Args:
+            x: Input tensor [B, T, C] where C = num_phases * features_per_phase
+            phase_mask: Optional mask for phase interactions [num_phases, num_phases]
+        Returns:
+            Attention output [B, T, d_model]
+        """
+        B, T, C = x.size()
+        # Project inputs
+        Q = self.W_q(x).view(B, T, self.n_heads, self.d_k)
+        K = self.W_k(x).view(B, T, self.n_heads, self.d_k)
+        V = self.W_v(x).view(B, T, self.n_heads, self.d_k)
+        # Transpose for attention: [B, n_heads, T, d_k]
+        Q = Q.transpose(1, 2)
+        K = K.transpose(1, 2)
+        V = V.transpose(1, 2)
+        # Scaled dot-product attention
+        scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
+        # Add phase relationship bias
+        # This encourages the model to learn phase-specific interactions
+        if self.num_phases <= T:
+            phase_bias_expanded = self.phase_bias.unsqueeze(0).unsqueeze(0)
+            # Tile to match temporal dimension
+            n_tiles = (T + self.num_phases - 1) // self.num_phases
+            phase_bias_tiled = phase_bias_expanded.repeat(1, 1, n_tiles, n_tiles)
+            phase_bias_tiled = phase_bias_tiled[:, :, :T, :T]
+            scores = scores + phase_bias_tiled
+        if phase_mask is not None:
+            scores = scores.masked_fill(phase_mask == 0, -1e9)
+        attn_weights = F.softmax(scores, dim=-1)
+        attn_weights = self.dropout(attn_weights)
+        # Apply attention to values
+        context = torch.matmul(attn_weights, V)
+        # Reshape and project output
+        context = context.transpose(1, 2).contiguous().view(B, T, self.d_model)
+        output = self.W_o(context)
+        # Residual connection and layer norm
+        output = self.layer_norm(output + x)
+        return output
+class SymmetricalComponentAttention(nn.Module):
+    """
+    Attention based on symmetrical component analysis.
+    Symmetrical components decompose unbalanced three-phase systems into:
+    - Positive sequence (balanced, normal operation)
+    - Negative sequence (indicates unbalance)
+    - Zero sequence (indicates ground faults)
+    This attention helps identify different types of power system faults.
+    """
+    def __init__(self, d_model, dropout=0.1):
+        """
+        Args:
+            d_model: Model dimension
+            dropout: Dropout rate
+        """
+        super(SymmetricalComponentAttention, self).__init__()
+        self.d_model = d_model
+        # Fortescue transformation matrix for symmetrical components
+        # a = exp(j*2π/3) = -0.5 + j*0.866
+        a_real = -0.5
+        a_imag = math.sqrt(3) / 2
+        # Transformation matrix (real and imaginary parts)
+        # [1, 1, 1; 1, a², a; 1, a, a²] for positive, negative, zero
+        self.register_buffer(
+            "fortescue_real",
+            torch.tensor(
+                [
+                    [1, 1, 1],
+                    [1, a_real**2 - a_imag**2, a_real],
+                    [1, a_real, a_real**2 - a_imag**2],
+                ]
+            )
+            / 3.0,
+        )
+        self.register_buffer(
+            "fortescue_imag",
+            torch.tensor(
+                [
+                    [0, 0, 0],
+                    [0, 2 * a_real * a_imag, a_imag],
+                    [0, a_imag, 2 * a_real * a_imag],
+                ]
+            )
+            / 3.0,
+        )
+        # Sequence-specific attention
+        self.pos_seq_attn = nn.MultiheadAttention(
+            d_model, num_heads=4, dropout=dropout, batch_first=True
+        )
+        self.neg_seq_attn = nn.MultiheadAttention(
+            d_model, num_heads=4, dropout=dropout, batch_first=True
+        )
+        self.zero_seq_attn = nn.MultiheadAttention(
+            d_model, num_heads=4, dropout=dropout, batch_first=True
+        )
+        # Sequence weighting (learnable importance of each sequence)
+        self.sequence_weights = nn.Parameter(torch.tensor([1.0, 0.5, 0.3]))
+        # Output projection
+        self.output_proj = nn.Linear(d_model * 3, d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.layer_norm = nn.LayerNorm(d_model)
+    def compute_symmetrical_components(self, x):
+        """
+        Compute symmetrical components from three-phase signals.
+        Args:
+            x: Three-phase signals [B, T, 3]
+        Returns:
+            Tuple of (positive, negative, zero) sequences, each [B, T, 1]
+        """
+        B, T, _ = x.size()
+        # Apply Fortescue transformation (simplified real-valued version)
+        # For real signals, we approximate the transformation
+        x_transformed = torch.matmul(x, self.fortescue_real.T)
+        pos_seq = x_transformed[:, :, 0:1]  # Positive sequence
+        neg_seq = x_transformed[:, :, 1:2]  # Negative sequence
+        zero_seq = x_transformed[:, :, 2:3]  # Zero sequence
+        return pos_seq, neg_seq, zero_seq
+    def forward(self, x, return_sequences=False):
+        """
+        Args:
+            x: Input tensor [B, T, d_model]
+            return_sequences: Whether to return individual sequence outputs
+        Returns:
+            Attention output [B, T, d_model]
+            Optionally: (pos_out, neg_out, zero_out) if return_sequences=True
+        """
+        B, T, _ = x.size()
+        # Apply attention for each sequence type
+        pos_out, _ = self.pos_seq_attn(x, x, x)
+        neg_out, _ = self.neg_seq_attn(x, x, x)
+        zero_out, _ = self.zero_seq_attn(x, x, x)
+        # Weighted combination based on sequence importance
+        weights = F.softmax(self.sequence_weights, dim=0)
+        combined = torch.cat(
+            [weights[0] * pos_out, weights[1] * neg_out, weights[2] * zero_out], dim=-1
+        )
+        # Project back to d_model
+        output = self.output_proj(combined)
+        output = self.dropout(output)
+        # Residual connection and layer norm
+        output = self.layer_norm(output + x)
+        if return_sequences:
+            return output, (pos_out, neg_out, zero_out)
+        return output
+class TransientAttention(nn.Module):
+    """
+    Multi-scale attention for detecting transient events in voltage signals.
+    Transient events in power systems include:
+    - Voltage sags (ms to seconds)
+    - Voltage swells
+    - Momentary interruptions
+    - Switching transients (μs to ms)
+    This attention uses multiple time scales to capture different transient types.
+    """
+    def __init__(self, d_model, n_heads=4, dropout=0.1, scales=[1, 3, 5, 10]):
+        """
+        Args:
+            d_model: Model dimension
+            n_heads: Number of attention heads
+            dropout: Dropout rate
+            scales: List of temporal scales for multi-scale attention
+        """
+        super(TransientAttention, self).__init__()
+        self.d_model = d_model
+        self.n_heads = n_heads
+        self.scales = scales
+        # Multi-scale convolutions for different transient durations
+        self.scale_convs = nn.ModuleList(
+            [
+                nn.Conv1d(
+                    d_model, d_model, kernel_size=s, padding=s // 2, groups=d_model
+                )
+                for s in scales
+            ]
+        )
+        # Scale-specific attention
+        self.scale_attentions = nn.ModuleList(
+            [
+                nn.MultiheadAttention(
+                    d_model, num_heads=n_heads, dropout=dropout, batch_first=True
+                )
+                for _ in scales
+            ]
+        )
+        # Scale importance weights
+        self.scale_weights = nn.Parameter(torch.ones(len(scales)) / len(scales))
+        # Output projection
+        self.output_proj = nn.Linear(d_model * len(scales), d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.layer_norm = nn.LayerNorm(d_model)
+    def forward(self, x):
+        """
+        Args:
+            x: Input tensor [B, T, d_model]
+        Returns:
+            Multi-scale attention output [B, T, d_model]
+        """
+        B, T, D = x.size()
+        scale_outputs = []
+        for i, (conv, attn) in enumerate(zip(self.scale_convs, self.scale_attentions)):
+            # Apply scale-specific convolution
+            x_conv = conv(x.permute(0, 2, 1)).permute(0, 2, 1)
+            # Ensure same length
+            if x_conv.size(1) != T:
+                x_conv = x_conv[:, :T, :]
+            # Apply attention at this scale
+            scale_out, _ = attn(x_conv, x_conv, x_conv)
+            scale_outputs.append(scale_out)
+        # Weighted combination of scales
+        weights = F.softmax(self.scale_weights, dim=0)
+        combined = torch.cat(
+            [w * out for w, out in zip(weights, scale_outputs)], dim=-1
+        )
+        # Project to output dimension
+        output = self.output_proj(combined)
+        output = self.dropout(output)
+        # Residual connection and layer norm
+        output = self.layer_norm(output + x)
+        return output
+class VoltageChannelAttention(nn.Module):
+    """
+    Channel-wise attention for voltage feature selection.
+    Different voltage features have varying importance for anomaly detection:
+    - Va, Vb, Vc: Direct voltage measurements
+    - Ia, Ib, Ic: Current measurements
+    - P, Q, S: Power metrics
+    - THD: Harmonic distortion
+    - Unbalance: Phase unbalance factor
+    This attention learns to weight different features based on their
+    relevance to anomaly detection.
+    """
+    def __init__(self, num_channels, reduction_ratio=4):
+        """
+        Args:
+            num_channels: Number of input channels/features
+            reduction_ratio: Reduction ratio for the bottleneck
+        """
+        super(VoltageChannelAttention, self).__init__()
+        self.num_channels = num_channels
+        reduced_channels = max(1, num_channels // reduction_ratio)
+        # Channel attention with squeeze-and-excitation style
+        self.avg_pool = nn.AdaptiveAvgPool1d(1)
+        self.max_pool = nn.AdaptiveMaxPool1d(1)
+        self.fc = nn.Sequential(
+            nn.Linear(num_channels, reduced_channels, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(reduced_channels, num_channels, bias=False),
+        )
+        # Feature group weighting (voltage, current, power, quality)
+        # Learnable importance for different feature groups
+        self.group_weights = nn.Parameter(torch.ones(4))
+    def forward(self, x):
+        """
+        Args:
+            x: Input tensor [B, T, C]
+        Returns:
+            Channel-attended output [B, T, C]
+        """
+        B, T, C = x.size()
+        # Global average and max pooling
+        x_permuted = x.permute(0, 2, 1)  # [B, C, T]
+        avg_out = self.avg_pool(x_permuted).squeeze(-1)  # [B, C]
+        max_out = self.max_pool(x_permuted).squeeze(-1)  # [B, C]
+        # Channel attention weights
+        avg_attn = self.fc(avg_out)
+        max_attn = self.fc(max_out)
+        attn = torch.sigmoid(avg_attn + max_attn)  # [B, C]
+        # Apply attention
+        output = x * attn.unsqueeze(1)
+        return output
+class VoltageAttentionBlock(nn.Module):
+    """
+    Combined attention block for voltage anomaly detection.
+    Integrates multiple attention mechanisms:
+    1. Inter-phase attention for phase relationships
+    2. Transient attention for multi-scale temporal patterns
+    3. Channel attention for feature importance
+    This provides comprehensive attention for power grid signals.
+    """
+    def __init__(
+        self,
+        d_model,
+        num_channels,
+        n_heads=4,
+        dropout=0.1,
+        use_inter_phase=True,
+        use_transient=True,
+        use_channel=True,
+    ):
+        """
+        Args:
+            d_model: Model dimension
+            num_channels: Number of input channels
+            n_heads: Number of attention heads
+            dropout: Dropout rate
+            use_inter_phase: Whether to use inter-phase attention
+            use_transient: Whether to use transient attention
+            use_channel: Whether to use channel attention
+        """
+        super(VoltageAttentionBlock, self).__init__()
+        self.use_inter_phase = use_inter_phase
+        self.use_transient = use_transient
+        self.use_channel = use_channel
+        if use_inter_phase:
+            self.inter_phase_attn = InterPhaseAttention(d_model, n_heads, dropout)
+        if use_transient:
+            self.transient_attn = TransientAttention(d_model, n_heads, dropout)
+        if use_channel:
+            self.channel_attn = VoltageChannelAttention(num_channels)
+        # Final projection
+        self.output_proj = nn.Linear(d_model, d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.layer_norm = nn.LayerNorm(d_model)
+    def forward(self, x, x_raw=None):
+        """
+        Args:
+            x: Embedded input [B, T, d_model]
+            x_raw: Raw input for channel attention [B, T, num_channels] (optional)
+        Returns:
+            Attention output [B, T, d_model]
+        """
+        output = x
+        # Apply inter-phase attention
+        if self.use_inter_phase:
+            output = self.inter_phase_attn(output)
+        # Apply transient attention
+        if self.use_transient:
+            output = self.transient_attn(output)
+        # Apply channel attention on raw features if provided
+        if self.use_channel and x_raw is not None:
+            channel_weights = self.channel_attn(x_raw)
+            # Broadcast channel weights to embedded dimension
+            # This is a simplified application; in practice, might need adaptation
+            output = output * channel_weights.mean(dim=-1, keepdim=True).expand_as(
+                output
+            )
+        # Final projection with residual
+        output = self.output_proj(output)
+        output = self.dropout(output)
+        output = self.layer_norm(output + x)
+        return output

layers/Transformer_EncDec.py ADDED Viewed

	@@ -0,0 +1,159 @@

+"""
+Transformer Encoder-Decoder Layers.
+Standard Transformer architecture components for time series.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ConvLayer(nn.Module):
+    """Convolutional layer with downsampling for distilling."""
+    def __init__(self, c_in):
+        super(ConvLayer, self).__init__()
+        self.downConv = nn.Conv1d(
+            in_channels=c_in,
+            out_channels=c_in,
+            kernel_size=3,
+            padding=2,
+            padding_mode="circular",
+        )
+        self.norm = nn.BatchNorm1d(c_in)
+        self.activation = nn.ELU()
+        self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
+    def forward(self, x):
+        x = self.downConv(x.permute(0, 2, 1))
+        x = self.norm(x)
+        x = self.activation(x)
+        x = self.maxPool(x)
+        x = x.transpose(1, 2)
+        return x
+class EncoderLayer(nn.Module):
+    """Standard Transformer encoder layer."""
+    def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
+        super(EncoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.attention = attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        new_x, attn = self.attention(x, x, x, attn_mask=attn_mask, tau=tau, delta=delta)
+        x = x + self.dropout(new_x)
+        y = x = self.norm1(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+        return self.norm2(x + y), attn
+class Encoder(nn.Module):
+    """Transformer encoder with optional convolutional layers."""
+    def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
+        super(Encoder, self).__init__()
+        self.attn_layers = nn.ModuleList(attn_layers)
+        self.conv_layers = (
+            nn.ModuleList(conv_layers) if conv_layers is not None else None
+        )
+        self.norm = norm_layer
+    def forward(self, x, attn_mask=None, tau=None, delta=None):
+        attns = []
+        if self.conv_layers is not None:
+            for i, (attn_layer, conv_layer) in enumerate(
+                zip(self.attn_layers, self.conv_layers)
+            ):
+                delta = delta if i == 0 else None
+                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
+                x = conv_layer(x)
+                attns.append(attn)
+            x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
+            attns.append(attn)
+        else:
+            for attn_layer in self.attn_layers:
+                x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
+                attns.append(attn)
+        if self.norm is not None:
+            x = self.norm(x)
+        return x, attns
+class DecoderLayer(nn.Module):
+    """Standard Transformer decoder layer with self and cross attention."""
+    def __init__(
+        self,
+        self_attention,
+        cross_attention,
+        d_model,
+        d_ff=None,
+        dropout=0.1,
+        activation="relu",
+    ):
+        super(DecoderLayer, self).__init__()
+        d_ff = d_ff or 4 * d_model
+        self.self_attention = self_attention
+        self.cross_attention = cross_attention
+        self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
+        self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout = nn.Dropout(dropout)
+        self.activation = F.relu if activation == "relu" else F.gelu
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        x = x + self.dropout(
+            self.self_attention(x, x, x, attn_mask=x_mask, tau=tau, delta=None)[0]
+        )
+        x = self.norm1(x)
+        x = x + self.dropout(
+            self.cross_attention(
+                x, cross, cross, attn_mask=cross_mask, tau=tau, delta=delta
+            )[0]
+        )
+        y = x = self.norm2(x)
+        y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
+        y = self.dropout(self.conv2(y).transpose(-1, 1))
+        return self.norm3(x + y)
+class Decoder(nn.Module):
+    """Transformer decoder."""
+    def __init__(self, layers, norm_layer=None, projection=None):
+        super(Decoder, self).__init__()
+        self.layers = nn.ModuleList(layers)
+        self.norm = norm_layer
+        self.projection = projection
+    def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
+        for layer in self.layers:
+            x = layer(
+                x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta
+            )
+        if self.norm is not None:
+            x = self.norm(x)
+        if self.projection is not None:
+            x = self.projection(x)
+        return x

layers/VoltageEmbed.py ADDED Viewed

	@@ -0,0 +1,465 @@

+"""
+VoltageEmbed: Specialized Embedding Layers for Power Grid Voltage Signals
+This module provides domain-specific embeddings designed for rural power grid
+voltage anomaly detection:
+1. PowerFrequencyEmbedding: Encodes 50Hz power frequency cycles
+2. DailyLoadEmbedding: Captures daily load patterns (24-hour cycles)
+3. ThreePhaseEmbedding: Encodes Va-Vb-Vc phase relationships (120-degree shift)
+4. VoltageDataEmbedding: Combined embedding for voltage signals
+Key innovations:
+- Exploit known periodicities in power systems (50Hz, daily, weekly)
+- Encode three-phase relationships (phase angle differences)
+- Integrate voltage quality features (THD, unbalance) into embeddings
+"""
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PowerFrequencyEmbedding(nn.Module):
+    """
+    Embedding that encodes power frequency cycle information.
+    In 50Hz power systems, each cycle is 20ms. For data sampled at different
+    rates, this embedding helps the model understand where each sample falls
+    within the power frequency cycle.
+    For anomaly detection with second-level sampling, this embedding encodes
+    the phase relationship with respect to longer-term harmonics.
+    """
+    def __init__(self, d_model, max_len=5000, power_freq=50.0, sample_rate=1.0):
+        """
+        Args:
+            d_model: Embedding dimension
+            max_len: Maximum sequence length
+            power_freq: Power system frequency (50Hz or 60Hz)
+            sample_rate: Sampling rate in Hz
+        """
+        super(PowerFrequencyEmbedding, self).__init__()
+        self.d_model = d_model
+        self.power_freq = power_freq
+        self.sample_rate = sample_rate
+        # Pre-compute power frequency cycle embeddings
+        pe = torch.zeros(max_len, d_model).float()
+        pe.requires_grad = False
+        position = torch.arange(0, max_len).float().unsqueeze(1)
+        # Multiple harmonics of power frequency
+        harmonics = [1, 2, 3, 5, 7]  # Fundamental + common harmonics
+        harmonic_dim = d_model // (len(harmonics) * 2)
+        for h_idx, harmonic in enumerate(harmonics):
+            freq = power_freq * harmonic
+            # Angular frequency considering sample rate
+            omega = 2.0 * math.pi * freq / sample_rate
+            start_idx = h_idx * harmonic_dim * 2
+            end_idx = min(start_idx + harmonic_dim * 2, d_model)
+            # Use alternating sin/cos for each harmonic
+            for i in range(0, end_idx - start_idx, 2):
+                phase_shift = i * math.pi / (end_idx - start_idx)
+                if start_idx + i < d_model:
+                    pe[:, start_idx + i] = torch.sin(
+                        position.squeeze() * omega + phase_shift
+                    )
+                if start_idx + i + 1 < d_model:
+                    pe[:, start_idx + i + 1] = torch.cos(
+                        position.squeeze() * omega + phase_shift
+                    )
+        pe = pe.unsqueeze(0)
+        self.register_buffer("pe", pe)
+    def forward(self, x):
+        """
+        Args:
+            x: Input tensor [B, T, C]
+        Returns:
+            Power frequency positional encoding [B, T, d_model]
+        """
+        return self.pe[:, : x.size(1)]
+class DailyLoadEmbedding(nn.Module):
+    """
+    Embedding that captures daily load patterns in power systems.
+    Power consumption follows predictable daily patterns:
+    - Morning peak (7-9 AM)
+    - Midday trough (12-2 PM)
+    - Evening peak (6-9 PM)
+    - Night low (12-6 AM)
+    This embedding helps the model understand time-of-day context.
+    """
+    def __init__(self, d_model, samples_per_day=86400):
+        """
+        Args:
+            d_model: Embedding dimension
+            samples_per_day: Number of samples per day (86400 for 1Hz sampling)
+        """
+        super(DailyLoadEmbedding, self).__init__()
+        self.d_model = d_model
+        self.samples_per_day = samples_per_day
+        # Pre-compute daily cycle embeddings
+        # Use multiple frequencies to capture different daily patterns
+        daily_periods = [
+            samples_per_day,  # Full day cycle
+            samples_per_day // 2,  # Half-day (AM/PM)
+            samples_per_day // 3,  # 8-hour cycles
+            samples_per_day // 4,  # 6-hour cycles
+            samples_per_day // 6,  # 4-hour cycles
+        ]
+        self.period_embeddings = nn.ModuleList(
+            [
+                nn.Embedding(max(period, 1), d_model // len(daily_periods))
+                for period in daily_periods
+            ]
+        )
+        # Projection to combine period embeddings
+        self.projection = nn.Linear(
+            d_model // len(daily_periods) * len(daily_periods), d_model
+        )
+    def forward(self, x, time_indices=None):
+        """
+        Args:
+            x: Input tensor [B, T, C]
+            time_indices: Optional time indices within day [B, T]
+        Returns:
+            Daily load pattern embedding [B, T, d_model]
+        """
+        B, T, _ = x.size()
+        if time_indices is None:
+            # Assume sequential time indices
+            time_indices = torch.arange(T, device=x.device).unsqueeze(0).expand(B, -1)
+        embeddings = []
+        for i, emb in enumerate(self.period_embeddings):
+            period = emb.num_embeddings
+            period_idx = (time_indices % period).long()
+            embeddings.append(emb(period_idx))
+        combined = torch.cat(embeddings, dim=-1)
+        return self.projection(combined)
+class ThreePhaseEmbedding(nn.Module):
+    """
+    Embedding that encodes three-phase relationships for Va, Vb, Vc.
+    In balanced three-phase systems:
+    - Va, Vb, Vc are 120 degrees (2π/3) apart
+    - Positive sequence: Va leads Vb leads Vc
+    - Negative sequence: Va leads Vc leads Vb (indicates unbalance)
+    This embedding helps the model understand inter-phase relationships.
+    """
+    def __init__(self, d_model, num_phases=3):
+        """
+        Args:
+            d_model: Embedding dimension
+            num_phases: Number of phases (3 for three-phase systems)
+        """
+        super(ThreePhaseEmbedding, self).__init__()
+        self.d_model = d_model
+        self.num_phases = num_phases
+        # Phase angle embeddings (0, 120, 240 degrees)
+        phase_angles = torch.tensor([0, 2 * math.pi / 3, 4 * math.pi / 3])
+        self.register_buffer("phase_angles", phase_angles)
+        # Learnable phase embedding
+        self.phase_embed = nn.Embedding(num_phases, d_model)
+        # Positive and negative sequence embeddings
+        self.pos_seq_embed = nn.Linear(num_phases, d_model)
+        self.neg_seq_embed = nn.Linear(num_phases, d_model)
+    def forward(self, x, channel_ids=None):
+        """
+        Args:
+            x: Input tensor [B, T, C] where C includes three-phase channels
+            channel_ids: Optional tensor indicating which channels are Va, Vb, Vc
+        Returns:
+            Three-phase embedding [B, T, d_model]
+        """
+        B, T, C = x.size()
+        # Assume first 3 channels are Va, Vb, Vc if not specified
+        if channel_ids is None:
+            voltage_channels = min(3, C)
+        else:
+            voltage_channels = len(channel_ids)
+        # Get phase embeddings
+        phase_ids = torch.arange(voltage_channels, device=x.device)
+        phase_emb = self.phase_embed(phase_ids)  # [num_phases, d_model]
+        # Calculate symmetrical components (simplified)
+        # Positive sequence: Va + a*Vb + a²*Vc where a = exp(j*2π/3)
+        if voltage_channels >= 3:
+            v_abc = x[:, :, :3]  # [B, T, 3]
+            # Positive sequence embedding
+            pos_emb = self.pos_seq_embed(v_abc)  # [B, T, d_model]
+            # Negative sequence: Va + a²*Vb + a*Vc
+            v_acb = torch.stack([x[:, :, 0], x[:, :, 2], x[:, :, 1]], dim=-1)
+            neg_emb = self.neg_seq_embed(v_acb)  # [B, T, d_model]
+            # Combine with phase embeddings
+            combined = (
+                pos_emb + 0.1 * neg_emb + phase_emb.mean(0).unsqueeze(0).unsqueeze(0)
+            )
+        else:
+            combined = phase_emb[:voltage_channels].mean(0).unsqueeze(0).unsqueeze(0)
+            combined = combined.expand(B, T, -1)
+        return combined
+class VoltageQualityEmbedding(nn.Module):
+    """
+    Embedding that encodes voltage quality indicators.
+    Key voltage quality metrics:
+    - Voltage deviation from nominal
+    - Total Harmonic Distortion (THD)
+    - Voltage unbalance factor
+    - Frequency deviation
+    """
+    def __init__(self, d_model, nominal_voltage=220.0, nominal_freq=50.0):
+        """
+        Args:
+            d_model: Embedding dimension
+            nominal_voltage: Nominal voltage value (V)
+            nominal_freq: Nominal frequency (Hz)
+        """
+        super(VoltageQualityEmbedding, self).__init__()
+        self.d_model = d_model
+        self.nominal_voltage = nominal_voltage
+        self.nominal_freq = nominal_freq
+        # Quality indicator projections
+        self.voltage_deviation_proj = nn.Linear(1, d_model // 4)
+        self.thd_proj = nn.Linear(1, d_model // 4)
+        self.unbalance_proj = nn.Linear(1, d_model // 4)
+        self.freq_deviation_proj = nn.Linear(1, d_model // 4)
+        # Combination projection
+        self.combine_proj = nn.Linear(d_model, d_model)
+    def forward(self, voltage, thd=None, unbalance=None, freq=None):
+        """
+        Args:
+            voltage: Voltage values [B, T, num_phases]
+            thd: Total harmonic distortion [B, T, 1] or None
+            unbalance: Voltage unbalance factor [B, T, 1] or None
+            freq: System frequency [B, T, 1] or None
+        Returns:
+            Voltage quality embedding [B, T, d_model]
+        """
+        B, T, _ = voltage.size()
+        # Calculate voltage deviation
+        mean_voltage = voltage.mean(dim=-1, keepdim=True)
+        voltage_dev = (mean_voltage - self.nominal_voltage) / self.nominal_voltage
+        volt_emb = self.voltage_deviation_proj(voltage_dev)
+        # THD embedding
+        if thd is not None:
+            thd_emb = self.thd_proj(thd)
+        else:
+            thd_emb = torch.zeros(B, T, self.d_model // 4, device=voltage.device)
+        # Unbalance embedding
+        if unbalance is not None:
+            unb_emb = self.unbalance_proj(unbalance)
+        else:
+            # Calculate simple unbalance from voltage
+            if voltage.size(-1) >= 3:
+                v_mean = voltage[:, :, :3].mean(dim=-1, keepdim=True)
+                v_max_dev = (
+                    (voltage[:, :, :3] - v_mean).abs().max(dim=-1, keepdim=True)[0]
+                )
+                unb = v_max_dev / (v_mean + 1e-8) * 100
+                unb_emb = self.unbalance_proj(unb)
+            else:
+                unb_emb = torch.zeros(B, T, self.d_model // 4, device=voltage.device)
+        # Frequency deviation embedding
+        if freq is not None:
+            freq_dev = (freq - self.nominal_freq) / self.nominal_freq
+            freq_emb = self.freq_deviation_proj(freq_dev)
+        else:
+            freq_emb = torch.zeros(B, T, self.d_model // 4, device=voltage.device)
+        # Combine all quality embeddings
+        combined = torch.cat([volt_emb, thd_emb, unb_emb, freq_emb], dim=-1)
+        return self.combine_proj(combined)
+class VoltageDataEmbedding(nn.Module):
+    """
+    Complete data embedding for voltage anomaly detection.
+    Combines:
+    1. Token embedding (from raw values)
+    2. Power frequency embedding
+    3. Daily load embedding
+    4. Three-phase embedding
+    5. Voltage quality embedding
+    """
+    def __init__(
+        self,
+        c_in,
+        d_model,
+        embed_type="fixed",
+        freq="h",
+        dropout=0.1,
+        use_power_freq=True,
+        use_daily=True,
+        use_three_phase=True,
+        use_quality=True,
+        sample_rate=1.0,
+    ):
+        """
+        Args:
+            c_in: Number of input channels
+            d_model: Embedding dimension
+            embed_type: Type of temporal embedding
+            freq: Frequency of data
+            dropout: Dropout rate
+            use_power_freq: Whether to use power frequency embedding
+            use_daily: Whether to use daily load embedding
+            use_three_phase: Whether to use three-phase embedding
+            use_quality: Whether to use voltage quality embedding
+            sample_rate: Sampling rate in Hz
+        """
+        super(VoltageDataEmbedding, self).__init__()
+        self.d_model = d_model
+        self.use_power_freq = use_power_freq
+        self.use_daily = use_daily
+        self.use_three_phase = use_three_phase
+        self.use_quality = use_quality
+        # Token embedding (1D convolution)
+        padding = 1
+        self.token_conv = nn.Conv1d(
+            in_channels=c_in,
+            out_channels=d_model,
+            kernel_size=3,
+            padding=padding,
+            padding_mode="circular",
+            bias=False,
+        )
+        nn.init.kaiming_normal_(
+            self.token_conv.weight, mode="fan_in", nonlinearity="leaky_relu"
+        )
+        # Optional embeddings
+        if use_power_freq:
+            self.power_freq_embedding = PowerFrequencyEmbedding(
+                d_model, sample_rate=sample_rate
+            )
+        if use_daily:
+            # Samples per day depends on sampling rate
+            samples_per_day = int(86400 * sample_rate)
+            self.daily_embedding = DailyLoadEmbedding(
+                d_model, samples_per_day=samples_per_day
+            )
+        if use_three_phase:
+            self.three_phase_embedding = ThreePhaseEmbedding(d_model)
+        if use_quality:
+            self.quality_embedding = VoltageQualityEmbedding(d_model)
+        # Combination weights
+        num_embeddings = 1 + use_power_freq + use_daily + use_three_phase + use_quality
+        self.combination_weights = nn.Parameter(
+            torch.ones(num_embeddings) / num_embeddings
+        )
+        self.dropout = nn.Dropout(p=dropout)
+    def forward(self, x, x_mark=None):
+        """
+        Args:
+            x: Input tensor [B, T, C]
+            x_mark: Optional temporal marks [B, T, mark_dim]
+        Returns:
+            Embedded tensor [B, T, d_model]
+        """
+        # Token embedding
+        token_emb = self.token_conv(x.permute(0, 2, 1)).transpose(1, 2)
+        embeddings = [token_emb]
+        weights = [self.combination_weights[0]]
+        idx = 1
+        # Power frequency embedding
+        if self.use_power_freq:
+            pf_emb = self.power_freq_embedding(x)
+            embeddings.append(pf_emb)
+            weights.append(self.combination_weights[idx])
+            idx += 1
+        # Daily embedding
+        if self.use_daily:
+            daily_emb = self.daily_embedding(x)
+            embeddings.append(daily_emb)
+            weights.append(self.combination_weights[idx])
+            idx += 1
+        # Three-phase embedding
+        if self.use_three_phase:
+            tp_emb = self.three_phase_embedding(x)
+            embeddings.append(tp_emb)
+            weights.append(self.combination_weights[idx])
+            idx += 1
+        # Quality embedding
+        if self.use_quality:
+            # Extract voltage channels (assume first 3)
+            voltage = x[:, :, : min(3, x.size(-1))]
+            qual_emb = self.quality_embedding(voltage)
+            embeddings.append(qual_emb)
+            weights.append(self.combination_weights[idx])
+        # Weighted combination
+        weights = F.softmax(torch.stack(weights), dim=0)
+        combined = sum(w * e for w, e in zip(weights, embeddings))
+        return self.dropout(combined)

layers/__init__.py ADDED Viewed

	@@ -0,0 +1,27 @@

+# Layers module for Voltage Anomaly Detection
+from .Conv_Blocks import Inception_Block_V1, Inception_Block_V2
+from .Embed import (
+    DataEmbedding,
+    DataEmbedding_inverted,
+    DataEmbedding_wo_pos,
+    FixedEmbedding,
+    PatchEmbedding,
+    PositionalEmbedding,
+    TemporalEmbedding,
+    TimeFeatureEmbedding,
+    TokenEmbedding,
+)
+__all__ = [
+    "PositionalEmbedding",
+    "TokenEmbedding",
+    "FixedEmbedding",
+    "TemporalEmbedding",
+    "TimeFeatureEmbedding",
+    "DataEmbedding",
+    "DataEmbedding_inverted",
+    "DataEmbedding_wo_pos",
+    "PatchEmbedding",
+    "Inception_Block_V1",
+    "Inception_Block_V2",
+]

layers/__pycache__/AutoCorrelation.cpython-310.pyc ADDED Viewed

Binary file (5.38 kB). View file

layers/__pycache__/AutoCorrelation.cpython-311.pyc ADDED Viewed

Binary file (11.7 kB). View file

layers/__pycache__/Autoformer_EncDec.cpython-310.pyc ADDED Viewed

Binary file (6.85 kB). View file

layers/__pycache__/Autoformer_EncDec.cpython-311.pyc ADDED Viewed

Binary file (13.7 kB). View file

layers/__pycache__/Conv_Blocks.cpython-310.pyc ADDED Viewed

Binary file (2.65 kB). View file

layers/__pycache__/Conv_Blocks.cpython-311.pyc ADDED Viewed

Binary file (5.28 kB). View file

layers/__pycache__/Embed.cpython-310.pyc ADDED Viewed

Binary file (7.66 kB). View file

layers/__pycache__/Embed.cpython-311.pyc ADDED Viewed

Binary file (15.4 kB). View file

layers/__pycache__/SelfAttention_Family.cpython-310.pyc ADDED Viewed

Binary file (9.84 kB). View file

layers/__pycache__/SelfAttention_Family.cpython-311.pyc ADDED Viewed

Binary file (20 kB). View file

layers/__pycache__/StandardNorm.cpython-310.pyc ADDED Viewed

Binary file (2.57 kB). View file

layers/__pycache__/StandardNorm.cpython-311.pyc ADDED Viewed

Binary file (4.31 kB). View file

layers/__pycache__/Transformer_EncDec.cpython-310.pyc ADDED Viewed

Binary file (4.97 kB). View file

layers/__pycache__/Transformer_EncDec.cpython-311.pyc ADDED Viewed

Binary file (9.95 kB). View file

layers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (599 Bytes). View file

layers/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (756 Bytes). View file

models/DLinear.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""
+DLinear: Decomposition Linear Model for Time Series.
+Paper: Are Transformers Effective for Time Series Forecasting?
+Link: https://arxiv.org/pdf/2205.13504.pdf
+"""
+import torch
+import torch.nn as nn
+from layers.Autoformer_EncDec import series_decomp
+class Model(nn.Module):
+    """
+    DLinear: Simple linear model with series decomposition.
+    Decomposes time series into trend and seasonal, applies linear layers separately.
+    """
+    def __init__(self, configs, individual=False):
+        """
+        individual: Bool, whether shared model among different variates.
+        """
+        super(Model, self).__init__()
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        if (
+            self.task_name == "classification"
+            or self.task_name == "anomaly_detection"
+            or self.task_name == "imputation"
+        ):
+            self.pred_len = configs.seq_len
+        else:
+            self.pred_len = configs.pred_len
+        # Series decomposition block from Autoformer
+        self.decompsition = series_decomp(configs.moving_avg)
+        self.individual = individual
+        self.channels = configs.enc_in
+        if self.individual:
+            self.Linear_Seasonal = nn.ModuleList()
+            self.Linear_Trend = nn.ModuleList()
+            for i in range(self.channels):
+                self.Linear_Seasonal.append(nn.Linear(self.seq_len, self.pred_len))
+                self.Linear_Trend.append(nn.Linear(self.seq_len, self.pred_len))
+                self.Linear_Seasonal[i].weight = nn.Parameter(
+                    (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])
+                )
+                self.Linear_Trend[i].weight = nn.Parameter(
+                    (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])
+                )
+        else:
+            self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
+            self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)
+            self.Linear_Seasonal.weight = nn.Parameter(
+                (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])
+            )
+            self.Linear_Trend.weight = nn.Parameter(
+                (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])
+            )
+        if self.task_name == "classification":
+            self.projection = nn.Linear(
+                configs.enc_in * configs.seq_len, configs.num_class
+            )
+    def encoder(self, x):
+        seasonal_init, trend_init = self.decompsition(x)
+        seasonal_init, trend_init = seasonal_init.permute(0, 2, 1), trend_init.permute(
+            0, 2, 1
+        )
+        if self.individual:
+            seasonal_output = torch.zeros(
+                [seasonal_init.size(0), seasonal_init.size(1), self.pred_len],
+                dtype=seasonal_init.dtype,
+            ).to(seasonal_init.device)
+            trend_output = torch.zeros(
+                [trend_init.size(0), trend_init.size(1), self.pred_len],
+                dtype=trend_init.dtype,
+            ).to(trend_init.device)
+            for i in range(self.channels):
+                seasonal_output[:, i, :] = self.Linear_Seasonal[i](
+                    seasonal_init[:, i, :]
+                )
+                trend_output[:, i, :] = self.Linear_Trend[i](trend_init[:, i, :])
+        else:
+            seasonal_output = self.Linear_Seasonal(seasonal_init)
+            trend_output = self.Linear_Trend(trend_init)
+        x = seasonal_output + trend_output
+        return x.permute(0, 2, 1)
+    def forecast(self, x_enc):
+        return self.encoder(x_enc)
+    def imputation(self, x_enc):
+        return self.encoder(x_enc)
+    def anomaly_detection(self, x_enc):
+        return self.encoder(x_enc)
+    def classification(self, x_enc):
+        enc_out = self.encoder(x_enc)
+        output = enc_out.reshape(enc_out.shape[0], -1)
+        output = self.projection(output)
+        return output
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if (
+            self.task_name == "long_term_forecast"
+            or self.task_name == "short_term_forecast"
+        ):
+            dec_out = self.forecast(x_enc)
+            return dec_out[:, -self.pred_len :, :]
+        if self.task_name == "imputation":
+            dec_out = self.imputation(x_enc)
+            return dec_out
+        if self.task_name == "anomaly_detection":
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out
+        if self.task_name == "classification":
+            dec_out = self.classification(x_enc)
+            return dec_out
+        return None

models/MTSTimesNet.py ADDED Viewed

	@@ -0,0 +1,418 @@

+"""
+MTSTimesNet: Multi-scale Temporal TimesNet for Rural Voltage Anomaly Detection
+Core Innovation: Parallel multi-scale temporal branches that simultaneously capture
+patterns at different time scales (short-term fluctuations, medium-term trends, long-term patterns).
+Rural power grids exhibit multi-scale temporal patterns:
+- Short-term (seconds to minutes): Transient events, voltage sags
+- Medium-term (minutes to hours): Load variations, daily patterns
+- Long-term (hours to days): Seasonal patterns, systematic issues
+Key Components:
+1. Multi-scale TimesBlocks: Parallel branches with different period focus
+2. Adaptive Fusion Gate: Learns optimal combination of scales
+3. Cross-scale Residual Connections: Information flow across scales
+Author: Voltage Anomaly Detection Research
+"""
+from typing import List, Optional, Tuple
+import numpy as np
+import torch
+import torch.fft
+import torch.nn as nn
+import torch.nn.functional as F
+from layers.Conv_Blocks import Inception_Block_V1
+from layers.Embed import DataEmbedding
+def FFT_for_Period_Range(x, k=2, min_period=2, max_period=None):
+    """FFT-based period discovery with range constraints."""
+    B, T, C = x.size()
+    if max_period is None:
+        max_period = T // 2
+    xf = torch.fft.rfft(x, dim=1)
+    frequency_list = abs(xf).mean(0).mean(-1)
+    frequency_list[0] = 0
+    periods = T / (torch.arange(len(frequency_list), device=x.device) + 1e-8)
+    mask = (periods >= min_period) & (periods <= max_period)
+    frequency_list = frequency_list * mask.float()
+    _, top_list = torch.topk(frequency_list, min(k, mask.sum().item()))
+    top_list = top_list.detach().cpu().numpy()
+    period_list = T // (top_list + 1)
+    period_list = np.clip(period_list, min_period, max_period)
+    return period_list, abs(xf).mean(-1)[:, top_list]
+class ScaleSpecificTimesBlock(nn.Module):
+    """TimesBlock focused on a specific temporal scale."""
+    def __init__(
+        self,
+        configs,
+        scale_name: str = "medium",
+        min_period: int = 10,
+        max_period: int = 50,
+    ):
+        super(ScaleSpecificTimesBlock, self).__init__()
+        self.seq_len = configs.seq_len
+        self.pred_len = configs.pred_len
+        self.k = configs.top_k
+        self.scale_name = scale_name
+        self.min_period = min_period
+        self.max_period = min(max_period, configs.seq_len // 2)
+        self.conv = nn.Sequential(
+            Inception_Block_V1(
+                configs.d_model, configs.d_ff, num_kernels=configs.num_kernels
+            ),
+            nn.GELU(),
+            Inception_Block_V1(
+                configs.d_ff, configs.d_model, num_kernels=configs.num_kernels
+            ),
+        )
+        self.layer_norm = nn.LayerNorm(configs.d_model)
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        B, T, N = x.size()
+        period_list, period_weight = FFT_for_Period_Range(
+            x, self.k, self.min_period, self.max_period
+        )
+        res = []
+        for i in range(len(period_list)):
+            period = int(period_list[i])
+            if period < 2:
+                period = 2
+            total_len = self.seq_len + self.pred_len
+            if total_len % period != 0:
+                length = ((total_len // period) + 1) * period
+                padding = torch.zeros([B, length - total_len, N], device=x.device)
+                out = torch.cat([x, padding], dim=1)
+            else:
+                length = total_len
+                out = x
+            out = (
+                out.reshape(B, length // period, period, N)
+                .permute(0, 3, 1, 2)
+                .contiguous()
+            )
+            out = self.conv(out)
+            out = out.permute(0, 2, 3, 1).reshape(B, -1, N)
+            res.append(out[:, :total_len, :])
+        if len(res) == 0:
+            return x, torch.ones(B, 1, device=x.device)
+        res = torch.stack(res, dim=-1)
+        period_weight = F.softmax(period_weight, dim=1)
+        period_weight = period_weight.unsqueeze(1).unsqueeze(1).repeat(1, T, N, 1)
+        output = torch.sum(res * period_weight, dim=-1)
+        output = self.layer_norm(output + x)
+        return output, period_weight.mean(dim=(1, 2))
+class AdaptiveFusionGate(nn.Module):
+    """Adaptive gate for fusing multi-scale features."""
+    def __init__(self, d_model: int, n_scales: int = 3):
+        super(AdaptiveFusionGate, self).__init__()
+        self.n_scales = n_scales
+        self.global_pool = nn.AdaptiveAvgPool1d(1)
+        self.gate_network = nn.Sequential(
+            nn.Linear(d_model * n_scales, d_model),
+            nn.ReLU(),
+            nn.Linear(d_model, n_scales),
+            nn.Softmax(dim=-1),
+        )
+    def forward(self, scale_features: List[torch.Tensor]) -> torch.Tensor:
+        B, T, D = scale_features[0].size()
+        contexts = []
+        for feat in scale_features:
+            ctx = self.global_pool(feat.transpose(1, 2)).squeeze(-1)
+            contexts.append(ctx)
+        combined_ctx = torch.cat(contexts, dim=-1)
+        weights = self.gate_network(combined_ctx)
+        weights = weights.unsqueeze(1).unsqueeze(-1)
+        stacked = torch.stack(scale_features, dim=2)
+        fused = (stacked * weights).sum(dim=2)
+        return fused
+class CrossScaleConnection(nn.Module):
+    """Cross-scale residual connections for information exchange between scales."""
+    def __init__(self, d_model: int, n_scales: int = 3):
+        super(CrossScaleConnection, self).__init__()
+        self.n_scales = n_scales
+        self.cross_attention = nn.MultiheadAttention(
+            d_model, num_heads=4, dropout=0.1, batch_first=True
+        )
+        self.projections = nn.ModuleList(
+            [nn.Linear(d_model, d_model) for _ in range(n_scales)]
+        )
+    def forward(self, scale_features: List[torch.Tensor]) -> List[torch.Tensor]:
+        B, T, D = scale_features[0].size()
+        all_scales = torch.cat(scale_features, dim=1)
+        enhanced = []
+        for i, feat in enumerate(scale_features):
+            attended, _ = self.cross_attention(feat, all_scales, all_scales)
+            enhanced_feat = self.projections[i](feat + attended)
+            enhanced.append(enhanced_feat)
+        return enhanced
+class Model(nn.Module):
+    """
+    MTSTimesNet: Multi-scale Temporal TimesNet
+    Architecture:
+    - Shared Embedding Layer
+    - Parallel Multi-scale TimesBlocks
+    - Cross-scale Connections
+    - Adaptive Fusion Gate
+    - Output Projection
+    """
+    SCALE_CONFIGS = {
+        "short": {"min_period": 2, "max_period": 20},
+        "medium": {"min_period": 20, "max_period": 60},
+        "long": {"min_period": 60, "max_period": 200},
+    }
+    def __init__(self, configs):
+        super(Model, self).__init__()
+        self.configs = configs
+        self.task_name = configs.task_name
+        self.seq_len = configs.seq_len
+        self.label_len = getattr(configs, "label_len", 0)
+        self.pred_len = getattr(configs, "pred_len", 0)
+        # 创建实例级别的 scale_configs 副本，避免修改类属性
+        import copy
+        self.scale_configs = copy.deepcopy(self.SCALE_CONFIGS)
+        self._adjust_scale_configs()
+        self.enc_embedding = DataEmbedding(
+            configs.enc_in,
+            configs.d_model,
+            configs.embed,
+            configs.freq,
+            configs.dropout,
+        )
+        self.n_layers = configs.e_layers
+        self.n_scales = len(self.scale_configs)
+        self.scale_blocks = nn.ModuleDict()
+        for scale_name, scale_cfg in self.scale_configs.items():
+            self.scale_blocks[scale_name] = nn.ModuleList(
+                [
+                    ScaleSpecificTimesBlock(
+                        configs,
+                        scale_name=scale_name,
+                        min_period=scale_cfg["min_period"],
+                        max_period=scale_cfg["max_period"],
+                    )
+                    for _ in range(self.n_layers)
+                ]
+            )
+        self.cross_scale = nn.ModuleList(
+            [
+                CrossScaleConnection(configs.d_model, self.n_scales)
+                for _ in range(self.n_layers - 1)
+            ]
+        )
+        self.fusion_gate = AdaptiveFusionGate(configs.d_model, self.n_scales)
+        self.layer_norm = nn.LayerNorm(configs.d_model)
+        if self.task_name == "anomaly_detection" or self.task_name == "imputation":
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if (
+            self.task_name == "long_term_forecast"
+            or self.task_name == "short_term_forecast"
+        ):
+            self.predict_linear = nn.Linear(self.seq_len, self.pred_len + self.seq_len)
+            self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
+        if self.task_name == "classification":
+            self.act = F.gelu
+            self.dropout = nn.Dropout(configs.dropout)
+            self.projection = nn.Linear(
+                configs.d_model * configs.seq_len, configs.num_class
+            )
+    def _adjust_scale_configs(self):
+        """调整尺度配置，使用实例属性避免类属性污染"""
+        seq_len = self.seq_len
+        for scale_name in self.scale_configs:
+            max_period = self.scale_configs[scale_name]["max_period"]
+            if max_period > seq_len // 2:
+                self.scale_configs[scale_name]["max_period"] = seq_len // 2
+        valid_scales = {
+            k: v
+            for k, v in self.scale_configs.items()
+            if v["min_period"] < v["max_period"]
+        }
+        if len(valid_scales) < 3:
+            self.scale_configs = {
+                "short": {"min_period": 2, "max_period": max(4, seq_len // 10)},
+                "medium": {
+                    "min_period": max(4, seq_len // 10),
+                    "max_period": max(8, seq_len // 5),
+                },
+                "long": {
+                    "min_period": max(8, seq_len // 5),
+                    "max_period": seq_len // 2,
+                },
+            }
+    def _process_multi_scale(self, x: torch.Tensor) -> torch.Tensor:
+        scale_names = list(self.scale_blocks.keys())
+        scale_features = {name: x for name in scale_names}
+        for layer_idx in range(self.n_layers):
+            new_features = {}
+            for scale_name in scale_names:
+                # self.scale_blocks[scale_name] 是 ModuleList，直接索引
+                block_list = self.scale_blocks[scale_name]
+                feat, _ = block_list[layer_idx](scale_features[scale_name])
+                new_features[scale_name] = feat
+            if layer_idx < self.n_layers - 1:
+                feature_list = [new_features[name] for name in scale_names]
+                enhanced_list = self.cross_scale[layer_idx](feature_list)
+                for i, name in enumerate(scale_names):
+                    new_features[name] = enhanced_list[i]
+            scale_features = new_features
+        feature_list = [scale_features[name] for name in scale_names]
+        fused = self.fusion_gate(feature_list)
+        return self.layer_norm(fused)
+    def anomaly_detection(self, x_enc: torch.Tensor) -> torch.Tensor:
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc = x_enc / stdev
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out = self._process_multi_scale(enc_out)
+        dec_out = self.projection(enc_out)
+        dec_out = dec_out * stdev[:, 0, :].unsqueeze(1).repeat(
+            1, self.seq_len + self.pred_len, 1
+        )
+        dec_out = dec_out + means[:, 0, :].unsqueeze(1).repeat(
+            1, self.seq_len + self.pred_len, 1
+        )
+        return dec_out
+    def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
+        means = x_enc.mean(1, keepdim=True).detach()
+        x_enc = x_enc - means
+        stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
+        x_enc = x_enc / stdev
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute(0, 2, 1)
+        enc_out = self._process_multi_scale(enc_out)
+        dec_out = self.projection(enc_out)
+        dec_out = dec_out * stdev[:, 0, :].unsqueeze(1).repeat(
+            1, self.pred_len + self.seq_len, 1
+        )
+        dec_out = dec_out + means[:, 0, :].unsqueeze(1).repeat(
+            1, self.pred_len + self.seq_len, 1
+        )
+        return dec_out
+    def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
+        means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)
+        means = means.unsqueeze(1).detach()
+        x_enc = x_enc - means
+        x_enc = x_enc.masked_fill(mask == 0, 0)
+        stdev = torch.sqrt(
+            torch.sum(x_enc * x_enc, dim=1) / torch.sum(mask == 1, dim=1) + 1e-5
+        )
+        stdev = stdev.unsqueeze(1).detach()
+        x_enc = x_enc / stdev
+        enc_out = self.enc_embedding(x_enc, x_mark_enc)
+        enc_out = self._process_multi_scale(enc_out)
+        dec_out = self.projection(enc_out)
+        dec_out = dec_out * stdev[:, 0, :].unsqueeze(1).repeat(
+            1, self.pred_len + self.seq_len, 1
+        )
+        dec_out = dec_out + means[:, 0, :].unsqueeze(1).repeat(
+            1, self.pred_len + self.seq_len, 1
+        )
+        return dec_out
+    def classification(self, x_enc, x_mark_enc):
+        enc_out = self.enc_embedding(x_enc, None)
+        enc_out = self._process_multi_scale(enc_out)
+        output = self.act(enc_out)
+        output = self.dropout(output)
+        output = output * x_mark_enc.unsqueeze(-1)
+        output = output.reshape(output.shape[0], -1)
+        output = self.projection(output)
+        return output
+    def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
+        if (
+            self.task_name == "long_term_forecast"
+            or self.task_name == "short_term_forecast"
+        ):
+            dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
+            return dec_out[:, -self.pred_len :, :]
+        if self.task_name == "imputation":
+            dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
+            return dec_out
+        if self.task_name == "anomaly_detection":
+            dec_out = self.anomaly_detection(x_enc)
+            return dec_out
+        if self.task_name == "classification":
+            dec_out = self.classification(x_enc, x_mark_enc)
+            return dec_out
+        return None