|
|
@@ -0,0 +1,252 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# LSTM"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import numpy as np\n",
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "from sklearn.preprocessing import MinMaxScaler \n",
|
|
|
+ "from keras.models import Sequential,load_model#线性神经网络\n",
|
|
|
+ "from keras.layers.core import Dense,Activation,Dropout#神经网络的激活函数\n",
|
|
|
+ "from keras.optimizers import SGD\n",
|
|
|
+ "import numpy as numpy\n",
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
+ "from keras.layers.recurrent import LSTM"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "security='000001.XSHG'\n",
|
|
|
+ "df=get_price(security, start_date=None, end_date='2020-12-10', frequency='daily', fields=['open', 'close', 'low', 'high', 'volume', 'money', 'pre_close', ], \n",
|
|
|
+ " skip_paused=False, fq='pre', count=1500, panel=True)\n",
|
|
|
+ "df['rate']=(df['close']/df['pre_close']-1)*100\n",
|
|
|
+ "df.head()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 数据处理\n",
|
|
|
+ "face_back=10\n",
|
|
|
+ "def Processing_data(array,face_back=5):\n",
|
|
|
+ " data=list()\n",
|
|
|
+ " for i in range(len(array)-face_back):\n",
|
|
|
+ " a=list(array[i:i+face_back].values)\n",
|
|
|
+ " data.append(a)\n",
|
|
|
+ " return np.array(data)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "array=df['rate']\n",
|
|
|
+ "x=Processing_data(array,face_back)\n",
|
|
|
+ "y=array.values[face_back:]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "X=np.expand_dims(x, axis=1)#增加数据维度,LSTM神经网络维度至少为3维\n",
|
|
|
+ "X.shape"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 分割数据为训练集和测试集\n",
|
|
|
+ "train_X,test_X=X[:1000,:,:],X[1000:,:,:]\n",
|
|
|
+ "train_y,test_y=y[:1000,],y[1000:,]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 搭建LSTM模型\n",
|
|
|
+ "def build_STLM():\n",
|
|
|
+ " model = Sequential()\n",
|
|
|
+ " model.add(LSTM(25, input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=True))\n",
|
|
|
+ " model.add(LSTM(48))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# fit network\n",
|
|
|
+ "model=build_STLM()\n",
|
|
|
+ "history = model.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 绘制损失图\n",
|
|
|
+ "plt.plot(history.history['loss'], label='train')\n",
|
|
|
+ "plt.plot(history.history['val_loss'], label='test')\n",
|
|
|
+ "plt.title('LSTM_600000.SH', fontsize='12')\n",
|
|
|
+ "plt.ylabel('loss', fontsize='10')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## 数据标准化后的模型"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "pre_data=pd.DataFrame()\n",
|
|
|
+ "pre_data['y']=y\n",
|
|
|
+ "prediction5=model.predict(X)\n",
|
|
|
+ "pre_data['prediction5']=prediction5"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 进行归一化处理\n",
|
|
|
+ "from sklearn.preprocessing import StandardScaler\n",
|
|
|
+ "minmax=StandardScaler()\n",
|
|
|
+ "minmax.fit(np.array(df['rate']).reshape(1500,1))\n",
|
|
|
+ "df['ration']=minmax.transform(np.array(df['rate']).reshape(len(df),1))\n",
|
|
|
+ "x_scaler=Processing_data(df['ration'],face_back)\n",
|
|
|
+ "y_scaler=df['ration'].values[face_back:]\n",
|
|
|
+ "X_scaler=np.expand_dims(x_scaler, axis=1)#增加维度"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# fit network\n",
|
|
|
+ "model2=build_STLM()\n",
|
|
|
+ "history = model2.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "predict6=model2.predict(X_scaler)\n",
|
|
|
+ "prediction6=minmax.inverse_transform(predict6)\n",
|
|
|
+ "pre_data['prediction6']=prediction6"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def draw_Distribution_map(data=pre_data,col='y'):\n",
|
|
|
+ " cats=pd.cut(data[col],bins=100).value_counts(sort=False)\n",
|
|
|
+ " cats.plot(kind='bar',title='%s的区间频数统计'%(col),figsize=(8,5))\n",
|
|
|
+ " new_xticks=np.linspace(0,99,10)\n",
|
|
|
+ " atick=[cats.index[int(x)] for x in new_xticks]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# 查看预测值分布\n",
|
|
|
+ "draw_Distribution_map(data=pre_data,col='prediction5')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "#查看预测值分布图\n",
|
|
|
+ "draw_Distribution_map(data=pre_data,col='prediction6')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "#MSE均方误差\n",
|
|
|
+ "from sklearn.metrics import mean_squared_error\n",
|
|
|
+ "#MAEX\n",
|
|
|
+ "from sklearn.metrics import mean_absolute_error\n",
|
|
|
+ "#R^2决定系数"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "valuetion(model='model5',col='prediction5')\n",
|
|
|
+ "valuetion(model='model6',col='prediction6')"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "common_3.8",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "name": "python",
|
|
|
+ "version": "3.8.17"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|