Transformer - 时间特征的处理

时间:2024-04-23 07:34:49

Transformer - 时间特征的处理

flyfish

ETTm1.csv有如下内容
在这里插入图片描述

假如有2016/7/1 0:45:00有这样的时间字符串,如何变成时间特征列表

from typing import List

import numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset


class TimeFeature:
    def __init__(self):
        pass

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        pass

    def __repr__(self):
        return self.__class__.__name__ + "()"


class SecondOfMinute(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.second / 59.0 - 0.5


class MinuteOfHour(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.minute / 59.0 - 0.5


class HourOfDay(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.hour / 23.0 - 0.5


class DayOfWeek(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.dayofweek / 6.0 - 0.5


class DayOfMonth(TimeFeature):
    """Day of month encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.day - 1) / 30.0 - 0.5


class DayOfYear(TimeFeature):
    """Day of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.dayofyear - 1) / 365.0 - 0.5


class MonthOfYear(TimeFeature):
    """Month of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.month - 1) / 11.0 - 0.5


class WeekOfYear(TimeFeature):
    """Week of year encoded as value between [-0.5, 0.5]"""

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.isocalendar().week - 1) / 52.0 - 0.5


def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
    """
    Returns a list of time features that will be appropriate for the given frequency string.
    Parameters
    ----------
    freq_str
        Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
    """

    features_by_offsets = {
        offsets.YearEnd: [],
        offsets.QuarterEnd: [MonthOfYear],
        offsets.MonthEnd: [MonthOfYear],
        offsets.Week: [DayOfMonth, WeekOfYear],
        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Minute: [
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
        offsets.Second: [
            SecondOfMinute,
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
    }

    offset = to_offset(freq_str)

    for offset_type, feature_classes in features_by_offsets.items():
        if isinstance(offset, offset_type):
            return [cls() for cls in feature_classes]

    supported_freq_msg = f"""
    Unsupported frequency {freq_str}
    The following frequencies are supported:
        Y   - yearly
            alias: A
        M   - monthly
        W   - weekly
        D   - daily
        B   - business days
        H   - hourly
        T   - minutely
            alias: min
        S   - secondly
    """
    raise RuntimeError(supported_freq_msg)
def printf_time_features():
    freq="h"
    dates=pd.to_datetime("2016/7/1 0:45:00")
    for feat in time_features_from_frequency_str(freq):
        print(feat,"\n")
        print(feat(dates))
        
   
printf_time_features()
#返回适用于给定频率字符串的时间特征列表
# 频率字符串举例
# Y   - yearly
#     alias: A
# M   - monthly
# W   - weekly
# D   - daily
# B   - business days
# H   - hourly
# T   - minutely
#     alias: min
# S   - secondly

输出
输出4特征,时间字符串将编码为[-0.5,0.5]之间的值

# HourOfDay()

# -0.5
# DayOfWeek()

# 0.16666666666666663
# DayOfMonth()

# -0.5
# DayOfYear()

# -0.0013698630136986245
batch_x_mark: tensor([[[-0.5000,  0.1667, -0.5000, -0.0014],
         [-0.5000,  0.1667, -0.5000, -0.0014],
         [-0.5000,  0.1667, -0.5000, -0.0014],
         [-0.5000,  0.1667, -0.5000, -0.0014],
         [-0.4565,  0.1667, -0.5000, -0.0014],
         [-0.4565,  0.1667, -0.5000, -0.0014],
         [-0.4565,  0.1667, -0.5000, -0.0014],
         [-0.4565,  0.1667, -0.5000, -0.0014],
         [-0.4130,  0.1667, -0.5000, -0.0014],
         [-0.4130,  0.1667, -0.5000, -0.0014],
         [-0.4130,  0.1667, -0.5000, -0.0014],
         [-0.4130,  0.1667, -0.5000, -0.0014],
         [-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014]]])
batch_y_mark: tensor([[[-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3696,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.3261,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014],
         [-0.2826,  0.1667, -0.5000, -0.0014],
         [-0.2391,  0.1667, -0.5000, -0.0014],
         [-0.2391,  0.1667, -0.5000, -0.0014],
         [-0.2391,  0.1667, -0.5000, -0.0014],
         [-0.2391,  0.1667, -0.5000, -0.0014],
         [-0.1957,  0.1667, -0.5000, -0.0014],
         [-0.1957,  0.1667, -0.5000, -0.0014],
         [-0.1957,  0.1667, -0.5000, -0.0014],
         [-0.1957,  0.1667, -0.5000, -0.0014],
         [-0.1522,  0.1667, -0.5000, -0.0014],
         [-0.1522,  0.1667, -0.5000, -0.0014],
         [-0.1522,  0.1667, -0.5000, -0.0014],
         [-0.1522,  0.1667, -0.5000, -0.0014],
         [-0.1087,  0.1667, -0.5000, -0.0014],
         [-0.1087,  0.1667, -0.5000, -0.0014],
         [-0.1087,  0.1667, -0.5000, -0.0014],
         [-0.1087,  0.1667, -0.5000, -0.0014],
         [-0.0652,  0.1667, -0.5000, -0.0014],
         [-0.0652,  0.1667, -0.5000, -0.0014],
         [-0.0652,  0.1667, -0.5000, -0.0014],
         [-0.0652,  0.1667, -0.5000, -0.0014],
         [-0.0217,  0.1667, -0.5000, -0.0014],
         [-0.0217,  0.1667, -0.5000, -0.0014],
         [-0.0217,  0.1667, -0.5000, -0.0014],
         [-0.0217,  0.1667, -0.5000, -0.0014]]])

查看配置
打印属性值

 print('\n'.join(['%s:%s' % item for item in self.__dict__.items()]) )
seq_len:24
label_len:12
pred_len:24
set_type:0
features:M
target:OT
scale:True
timeenc:1
freq:h
root_path:./dataset/ETT-small/
data_path:ETTm1.csv
scaler:StandardScaler()

batch_x, batch_y, batch_x_mark, batch_y_mark各自的形状

 for i, (batch_x, , , ): torch.Size([1, 24, 7])
 for i, (, batch_y, , ): torch.Size([1, 36, 7])
 for i, (, , batch_x_mark, ): torch.Size([1, 24, 4])
 for i, (, , , batch_y_mark): torch.Size([1, 36, 4])