【原】EarnMore強(qiáng)化學(xué)習(xí)投資組合框架代碼結(jié)構(gòu)拆解

AI量化實(shí)驗(yàn)室 2024-12-15 發(fā)布于北京

展開(kāi)全文

原創(chuàng)內(nèi)容第740篇，專(zhuān)注量化投資、個(gè)人成長(zhǎng)與財(cái)富自由。

繼續(xù)講這篇論文：

我們來(lái)拆解代碼，EearnMore使用了MMEngine 這個(gè)底層框架來(lái)構(gòu)建工程。

MMEngine 提供了堅(jiān)實(shí)的工程基礎(chǔ)，使開(kāi)發(fā)人員無(wú)需在工作流程上編寫(xiě)冗余代碼。它作為所有 OpenMMLab 代碼庫(kù)的訓(xùn)練引擎，支持各個(gè)研究領(lǐng)域的數(shù)百種算法。此外，MMEngine 還可以通用地應(yīng)用于非 OpenMMLab 項(xiàng)目。

它是OpenMMLab在2022年世界人工智能大會(huì)上發(fā)布的具備通用、統(tǒng)一和靈活的深度學(xué)習(xí)模型訓(xùn)練的基礎(chǔ)庫(kù)。

代碼相對(duì)比較復(fù)雜。

但很多是關(guān)于強(qiáng)化學(xué)習(xí)庫(kù)本身的實(shí)現(xiàn)。

數(shù)據(jù)集加載，就是從csv讀到dataframe。

import os.path
import pandas as pd
from typing import List
from glob import glob
import numpy as np

from pm.registry import DATASET


@DATASET.register_module()
class PortfolioManagementDataset():
    def __init__(self,
                 root: str = None,
                 data_path: str = None,
                 stocks_path: str = None,
                 aux_stocks_path: str = None,
                 features_name: List[str] = None,
                 temporals_name: List[str] = None,
                 labels_name: List[str] = None):
        super(PortfolioManagementDataset, self).__init__()

        self.root = root
        self.data_path = data_path
        self.stocks_path = stocks_path
        self.features_name = features_name
        self.temporals_name = temporals_name
        self.labels_name = labels_name

        self.data_path = os.path.join(root, self.data_path)
        self.stocks_path = os.path.join(root, self.stocks_path)
        self.aux_stocks_path = os.path.join(root, aux_stocks_path)

        self.stocks = self._init_stocks()

        self.stocks2id = {stock: i for i, stock in enumerate(self.stocks)}
        self.id2stocks = {i: stock for i, stock in enumerate(self.stocks)}

        self.aux_stocks = self._init_aux_stocks()

        self.aux_stocks[0] = {
            "id":0,
            "type": "all",
            "name": "All",
            "stocks": self.stocks,
            "mask": np.zeros(len(self.stocks)),
        }

        self.stocks_df = self._init_stocks_df()

    def _init_stocks(self):
        print("init stocks...")
        stocks = []
        with open(self.stocks_path) as op:
            for line in op.readlines():
                line = line.strip()
                stocks.append(line)
        print("init stocks success...")
        return stocks

    def _init_stocks_df(self):
        print("init stocks dataframe...")
        stocks_df = []
        for stock in self.stocks:
            path = os.path.join(self.data_path, f"{stock}.csv")
            df = pd.read_csv(path, index_col=0)
            df = df.set_index("Date")
            df = df[self.features_name + self.temporals_name + self.labels_name]
            stocks_df.append(df)
        print("init stocks dataframe success...")
        return stocks_df

    def _init_aux_stocks(self)->dict:
        print("init aux stocks...")
        aux_stocks = {}
        aux_stocks_files = glob(os.path.join(self.aux_stocks_path, "*.txt"))
        for path in aux_stocks_files:
            name = os.path.basename(path).split(".")[0]
            id, name = name.split("_")
            id = int(id)

            with open(path) as op:
                stocks = []
                for line in op.readlines():
                    line = line.strip()
                    stocks.append(line)
            aux_stocks[id] = {
                "name": name,
                "type": "aux",
                "stocks": stocks,
                "num_stocks": len(stocks),
                "mask": np.array([0.0 if stock in stocks else 1.0 for stock in self.stocks])
            }

        for k,v in aux_stocks.items():
            print(f"aux stocks id: {k}, name: {v['name']}, num stocks: {v['num_stocks']}")
        print("init aux stocks success...")
        return aux_stocks

環(huán)境就是gym.Env：

from gym import Wrapper, spaces
import random
import numpy as np

class EnvironmentWrapper(Wrapper):
    def __init__(self, env,
                 transition_shape,
                 seed=42,):
        super().__init__(env)
        self.seed = seed

        random.seed(seed)
        np.random.seed(seed)

        self.env = env
        self.num_stocks = len(env.stocks)

        action_shape = transition_shape["action"]["shape"][1:]
        action_type = transition_shape["action"]["type"]
        state_shape = transition_shape["state"]["shape"][1:]
        state_type = transition_shape["state"]["type"]
        print("action shape {}, action type {}, state shape {}, state type {}".format(action_shape, action_type, state_shape, state_type))

        self.action_space = spaces.Box(
            low=0,
            high=1.0,
            shape=action_shape,
            dtype=action_type,
        )
        self.observation_space = spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=state_shape,
            dtype=state_type,
        )

    def reset(self):
        state = self.env.reset()
        return state

    def step(self, action):
        next_state, reward, done, info = self.env.step(action)
        return next_state, reward, done, info