Browse Source

初始化用模型来预测

maxfeng 4 tháng trước cách đây
mục cha
commit
ae4e340c6f
1 tập tin đã thay đổi với 252 bổ sung0 xóa
  1. 252 0
      Lib/future/with_model_anaylsis.ipynb

+ 252 - 0
Lib/future/with_model_anaylsis.ipynb

@@ -0,0 +1,252 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# LSTM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.preprocessing import MinMaxScaler \n",
+    "from keras.models import Sequential,load_model#线性神经网络\n",
+    "from keras.layers.core import Dense,Activation,Dropout#神经网络的激活函数\n",
+    "from keras.optimizers import SGD\n",
+    "import numpy as numpy\n",
+    "import matplotlib.pyplot as plt\n",
+    "from keras.layers.recurrent import LSTM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "security='000001.XSHG'\n",
+    "df=get_price(security, start_date=None, end_date='2020-12-10', frequency='daily', fields=['open', 'close', 'low', 'high', 'volume', 'money',  'pre_close', ], \n",
+    "          skip_paused=False, fq='pre', count=1500, panel=True)\n",
+    "df['rate']=(df['close']/df['pre_close']-1)*100\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 数据处理\n",
+    "face_back=10\n",
+    "def Processing_data(array,face_back=5):\n",
+    "    data=list()\n",
+    "    for i in range(len(array)-face_back):\n",
+    "        a=list(array[i:i+face_back].values)\n",
+    "        data.append(a)\n",
+    "    return np.array(data)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "array=df['rate']\n",
+    "x=Processing_data(array,face_back)\n",
+    "y=array.values[face_back:]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "X=np.expand_dims(x, axis=1)#增加数据维度,LSTM神经网络维度至少为3维\n",
+    "X.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 分割数据为训练集和测试集\n",
+    "train_X,test_X=X[:1000,:,:],X[1000:,:,:]\n",
+    "train_y,test_y=y[:1000,],y[1000:,]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 搭建LSTM模型\n",
+    "def build_STLM():\n",
+    "    model = Sequential()\n",
+    "    model.add(LSTM(25, input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=True))\n",
+    "    model.add(LSTM(48))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fit network\n",
+    "model=build_STLM()\n",
+    "history = model.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 绘制损失图\n",
+    "plt.plot(history.history['loss'], label='train')\n",
+    "plt.plot(history.history['val_loss'], label='test')\n",
+    "plt.title('LSTM_600000.SH', fontsize='12')\n",
+    "plt.ylabel('loss', fontsize='10')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 数据标准化后的模型"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pre_data=pd.DataFrame()\n",
+    "pre_data['y']=y\n",
+    "prediction5=model.predict(X)\n",
+    "pre_data['prediction5']=prediction5"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 进行归一化处理\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "minmax=StandardScaler()\n",
+    "minmax.fit(np.array(df['rate']).reshape(1500,1))\n",
+    "df['ration']=minmax.transform(np.array(df['rate']).reshape(len(df),1))\n",
+    "x_scaler=Processing_data(df['ration'],face_back)\n",
+    "y_scaler=df['ration'].values[face_back:]\n",
+    "X_scaler=np.expand_dims(x_scaler, axis=1)#增加维度"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fit network\n",
+    "model2=build_STLM()\n",
+    "history = model2.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predict6=model2.predict(X_scaler)\n",
+    "prediction6=minmax.inverse_transform(predict6)\n",
+    "pre_data['prediction6']=prediction6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def draw_Distribution_map(data=pre_data,col='y'):\n",
+    "    cats=pd.cut(data[col],bins=100).value_counts(sort=False)\n",
+    "    cats.plot(kind='bar',title='%s的区间频数统计'%(col),figsize=(8,5))\n",
+    "    new_xticks=np.linspace(0,99,10)\n",
+    "    atick=[cats.index[int(x)] for x in new_xticks]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 查看预测值分布\n",
+    "draw_Distribution_map(data=pre_data,col='prediction5')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#查看预测值分布图\n",
+    "draw_Distribution_map(data=pre_data,col='prediction6')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#MSE均方误差\n",
+    "from sklearn.metrics import mean_squared_error\n",
+    "#MAEX\n",
+    "from sklearn.metrics import mean_absolute_error\n",
+    "#R^2决定系数"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "valuetion(model='model5',col='prediction5')\n",
+    "valuetion(model='model6',col='prediction6')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "common_3.8",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.8.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}