{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# LSTM" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.preprocessing import MinMaxScaler \n", "from keras.models import Sequential,load_model#线性神经网络\n", "from keras.layers.core import Dense,Activation,Dropout#神经网络的激活函数\n", "from keras.optimizers import SGD\n", "import numpy as numpy\n", "import matplotlib.pyplot as plt\n", "from keras.layers.recurrent import LSTM" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "security='000001.XSHG'\n", "df=get_price(security, start_date=None, end_date='2020-12-10', frequency='daily', fields=['open', 'close', 'low', 'high', 'volume', 'money', 'pre_close', ], \n", " skip_paused=False, fq='pre', count=1500, panel=True)\n", "df['rate']=(df['close']/df['pre_close']-1)*100\n", "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 数据处理\n", "face_back=10\n", "def Processing_data(array,face_back=5):\n", " data=list()\n", " for i in range(len(array)-face_back):\n", " a=list(array[i:i+face_back].values)\n", " data.append(a)\n", " return np.array(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "array=df['rate']\n", "x=Processing_data(array,face_back)\n", "y=array.values[face_back:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X=np.expand_dims(x, axis=1)#增加数据维度,LSTM神经网络维度至少为3维\n", "X.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 分割数据为训练集和测试集\n", "train_X,test_X=X[:1000,:,:],X[1000:,:,:]\n", "train_y,test_y=y[:1000,],y[1000:,]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 搭建LSTM模型\n", "def build_STLM():\n", " model = Sequential()\n", " model.add(LSTM(25, input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=True))\n", " model.add(LSTM(48))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# fit network\n", "model=build_STLM()\n", "history = model.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 绘制损失图\n", "plt.plot(history.history['loss'], label='train')\n", "plt.plot(history.history['val_loss'], label='test')\n", "plt.title('LSTM_600000.SH', fontsize='12')\n", "plt.ylabel('loss', fontsize='10')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 数据标准化后的模型" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pre_data=pd.DataFrame()\n", "pre_data['y']=y\n", "prediction5=model.predict(X)\n", "pre_data['prediction5']=prediction5" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 进行归一化处理\n", "from sklearn.preprocessing import StandardScaler\n", "minmax=StandardScaler()\n", "minmax.fit(np.array(df['rate']).reshape(1500,1))\n", "df['ration']=minmax.transform(np.array(df['rate']).reshape(len(df),1))\n", "x_scaler=Processing_data(df['ration'],face_back)\n", "y_scaler=df['ration'].values[face_back:]\n", "X_scaler=np.expand_dims(x_scaler, axis=1)#增加维度" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# fit network\n", "model2=build_STLM()\n", "history = model2.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predict6=model2.predict(X_scaler)\n", "prediction6=minmax.inverse_transform(predict6)\n", "pre_data['prediction6']=prediction6" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def draw_Distribution_map(data=pre_data,col='y'):\n", " cats=pd.cut(data[col],bins=100).value_counts(sort=False)\n", " cats.plot(kind='bar',title='%s的区间频数统计'%(col),figsize=(8,5))\n", " new_xticks=np.linspace(0,99,10)\n", " atick=[cats.index[int(x)] for x in new_xticks]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 查看预测值分布\n", "draw_Distribution_map(data=pre_data,col='prediction5')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#查看预测值分布图\n", "draw_Distribution_map(data=pre_data,col='prediction6')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#MSE均方误差\n", "from sklearn.metrics import mean_squared_error\n", "#MAEX\n", "from sklearn.metrics import mean_absolute_error\n", "#R^2决定系数" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "valuetion(model='model5',col='prediction5')\n", "valuetion(model='model6',col='prediction6')" ] } ], "metadata": { "kernelspec": { "display_name": "common_3.8", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.8.17" } }, "nbformat": 4, "nbformat_minor": 2 }