{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.preprocessing import MinMaxScaler \n",
    "from keras.models import Sequential,load_model#线性神经网络\n",
    "from keras.layers.core import Dense,Activation,Dropout#神经网络的激活函数\n",
    "from keras.optimizers import SGD\n",
    "import numpy as numpy\n",
    "import matplotlib.pyplot as plt\n",
    "from keras.layers.recurrent import LSTM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "security='000001.XSHG'\n",
    "df=get_price(security, start_date=None, end_date='2020-12-10', frequency='daily', fields=['open', 'close', 'low', 'high', 'volume', 'money',  'pre_close', ], \n",
    "          skip_paused=False, fq='pre', count=1500, panel=True)\n",
    "df['rate']=(df['close']/df['pre_close']-1)*100\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据处理\n",
    "face_back=10\n",
    "def Processing_data(array,face_back=5):\n",
    "    data=list()\n",
    "    for i in range(len(array)-face_back):\n",
    "        a=list(array[i:i+face_back].values)\n",
    "        data.append(a)\n",
    "    return np.array(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "array=df['rate']\n",
    "x=Processing_data(array,face_back)\n",
    "y=array.values[face_back:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X=np.expand_dims(x, axis=1)#增加数据维度，LSTM神经网络维度至少为3维\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 分割数据为训练集和测试集\n",
    "train_X,test_X=X[:1000,:,:],X[1000:,:,:]\n",
    "train_y,test_y=y[:1000,],y[1000:,]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 搭建LSTM模型\n",
    "def build_STLM():\n",
    "    model = Sequential()\n",
    "    model.add(LSTM(25, input_shape=(train_X.shape[1], train_X.shape[2]),return_sequences=True))\n",
    "    model.add(LSTM(48))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fit network\n",
    "model=build_STLM()\n",
    "history = model.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 绘制损失图\n",
    "plt.plot(history.history['loss'], label='train')\n",
    "plt.plot(history.history['val_loss'], label='test')\n",
    "plt.title('LSTM_600000.SH', fontsize='12')\n",
    "plt.ylabel('loss', fontsize='10')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 数据标准化后的模型"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pre_data=pd.DataFrame()\n",
    "pre_data['y']=y\n",
    "prediction5=model.predict(X)\n",
    "pre_data['prediction5']=prediction5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 进行归一化处理\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "minmax=StandardScaler()\n",
    "minmax.fit(np.array(df['rate']).reshape(1500,1))\n",
    "df['ration']=minmax.transform(np.array(df['rate']).reshape(len(df),1))\n",
    "x_scaler=Processing_data(df['ration'],face_back)\n",
    "y_scaler=df['ration'].values[face_back:]\n",
    "X_scaler=np.expand_dims(x_scaler, axis=1)#增加维度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fit network\n",
    "model2=build_STLM()\n",
    "history = model2.fit(X, y, epochs=50, batch_size=300, validation_split=0.25, verbose=1,shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "predict6=model2.predict(X_scaler)\n",
    "prediction6=minmax.inverse_transform(predict6)\n",
    "pre_data['prediction6']=prediction6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def draw_Distribution_map(data=pre_data,col='y'):\n",
    "    cats=pd.cut(data[col],bins=100).value_counts(sort=False)\n",
    "    cats.plot(kind='bar',title='%s的区间频数统计'%(col),figsize=(8,5))\n",
    "    new_xticks=np.linspace(0,99,10)\n",
    "    atick=[cats.index[int(x)] for x in new_xticks]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 查看预测值分布\n",
    "draw_Distribution_map(data=pre_data,col='prediction5')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#查看预测值分布图\n",
    "draw_Distribution_map(data=pre_data,col='prediction6')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#MSE均方误差\n",
    "from sklearn.metrics import mean_squared_error\n",
    "#MAEX\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "#R^2决定系数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "valuetion(model='model5',col='prediction5')\n",
    "valuetion(model='model6',col='prediction6')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "common_3.8",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.8.17"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}