7 kuukautta sitten · ae4e340c6f
--- a/Lib/future/with_model_anaylsis.ipynb
+++ b/Lib/future/with_model_anaylsis.ipynb
@@ -0,0 +1,252 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "# LSTM"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import numpy as np\n",
			
 
				+    "import pandas as pd\n",
			
 
				+    "from sklearn.preprocessing import MinMaxScaler \n",
			
 
				+    "from keras.models import Sequential,load_model#线性神经网络\n",
			
 
				+    "from keras.layers.core import Dense,Activation,Dropout#神经网络的激活函数\n",
			
 
				+    "from keras.optimizers import SGD\n",
			
 
				+    "import numpy as numpy\n",
			
 
				+    "import matplotlib.pyplot as plt\n",
			
 
				+    "from keras.layers.recurrent import LSTM"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "security='000001.XSHG'\n",
			
 
				+    "df=get_price(security, start_date=None, end_date='2020-12-10', frequency='daily', fields=['open', 'close', 'low', 'high', 'volume', 'money',  'pre_close', ], \n",
			
 
				+    "          skip_paused=False, fq='pre', count=1500, panel=True)\n",
			
 
				+    "df['rate']=(df['close']/df['pre_close']-1)*100\n",
			
 
				+    "df.head()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 数据处理\n",
			
 
				+    "face_back=10\n",
			
 
				+    "def Processing_data(array,face_back=5):\n",
			
 
				+    "    data=list()\n",
			
 
				+    "    for i in range(len(array)-face_back):\n",
			
 
				+    "        a=list(array[i:i+face_back].values)\n",
			
 
				+    "        data.append(a)\n",
			
 
				+    "    return np.array(data)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "array=df['rate']\n",
			
 
				+    "x=Processing_data(array,face_back)\n",
			
 
				+    "y=array.values[face_back:]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "X=np.expand_dims(x, axis=1)#增加数据维度，LSTM神经网络维度至少为3维\n",
			
 
				+    "X.shape"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 分割数据为训练集和测试集\n",
			
 
				+    "train_X,test_X=X[:1000,:,:],X[1000:,:,:]\n",
			
 
				+    "train_y,test_y=y[:1000,],y[1000:,]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 搭建LSTM模型\n",
			
 
				+    "def build_STLM():\n",
			
 
				+    "    model = Sequential()\n",
			
 
				+    "    model.add(LSTM(25, input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=True))\n",
			
 
				+    "    model.add(LSTM(48))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# fit network\n",
			
 
				+    "model=build_STLM()\n",
			
 
				+    "history = model.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 绘制损失图\n",
			
 
				+    "plt.plot(history.history['loss'], label='train')\n",
			
 
				+    "plt.plot(history.history['val_loss'], label='test')\n",
			
 
				+    "plt.title('LSTM_600000.SH', fontsize='12')\n",
			
 
				+    "plt.ylabel('loss', fontsize='10')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## 数据标准化后的模型"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "pre_data=pd.DataFrame()\n",
			
 
				+    "pre_data['y']=y\n",
			
 
				+    "prediction5=model.predict(X)\n",
			
 
				+    "pre_data['prediction5']=prediction5"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 进行归一化处理\n",
			
 
				+    "from sklearn.preprocessing import StandardScaler\n",
			
 
				+    "minmax=StandardScaler()\n",
			
 
				+    "minmax.fit(np.array(df['rate']).reshape(1500,1))\n",
			
 
				+    "df['ration']=minmax.transform(np.array(df['rate']).reshape(len(df),1))\n",
			
 
				+    "x_scaler=Processing_data(df['ration'],face_back)\n",
			
 
				+    "y_scaler=df['ration'].values[face_back:]\n",
			
 
				+    "X_scaler=np.expand_dims(x_scaler, axis=1)#增加维度"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# fit network\n",
			
 
				+    "model2=build_STLM()\n",
			
 
				+    "history = model2.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "predict6=model2.predict(X_scaler)\n",
			
 
				+    "prediction6=minmax.inverse_transform(predict6)\n",
			
 
				+    "pre_data['prediction6']=prediction6"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "def draw_Distribution_map(data=pre_data,col='y'):\n",
			
 
				+    "    cats=pd.cut(data[col],bins=100).value_counts(sort=False)\n",
			
 
				+    "    cats.plot(kind='bar',title='%s的区间频数统计'%(col),figsize=(8,5))\n",
			
 
				+    "    new_xticks=np.linspace(0,99,10)\n",
			
 
				+    "    atick=[cats.index[int(x)] for x in new_xticks]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# 查看预测值分布\n",
			
 
				+    "draw_Distribution_map(data=pre_data,col='prediction5')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "#查看预测值分布图\n",
			
 
				+    "draw_Distribution_map(data=pre_data,col='prediction6')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "#MSE均方误差\n",
			
 
				+    "from sklearn.metrics import mean_squared_error\n",
			
 
				+    "#MAEX\n",
			
 
				+    "from sklearn.metrics import mean_absolute_error\n",
			
 
				+    "#R^2决定系数"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "valuetion(model='model5',col='prediction5')\n",
			
 
				+    "valuetion(model='model6',col='prediction6')"
			
 
				+   ]
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "common_3.8",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "name": "python",
			
 
				+   "version": "3.8.17"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 2
			
 
				+}