Stock Price Prediction

Stock Price

February 25, 2021 · 13 mins read
# Impoort the libraries
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense,LSTM
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
C:\Python\Python38\lib\site-packages\pandas_datareader\compat\__init__.py:7: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  from pandas.util.testing import assert_frame_equal
Using TensorFlow backend.
#get the stoke quote
df = web.DataReader('RBS.L',data_source='yahoo',start='2010-01-01',end='2020-04-29')
#show the data
df
High Low Open Close Volume Adj Close
Date
2010-01-04 321.000000 295.000000 299.000000 321.000000 11667982.0 299.654846
2010-01-05 355.000000 318.000000 320.000000 354.000000 18885923.0 330.460480
2010-01-06 384.899994 355.000000 357.200012 366.799988 19304534.0 342.409332
2010-01-07 371.000000 354.500000 366.799988 358.700012 14208016.0 334.847961
2010-01-08 366.500000 346.600006 365.500000 351.200012 10188018.0 327.846680
... ... ... ... ... ... ...
2020-04-23 1.070000 1.035500 1.049500 1.064500 19678442.0 1.064500
2020-04-24 1.069000 1.038500 1.063500 1.046000 19949004.0 1.046000
2020-04-27 1.086500 1.043000 1.082500 1.075000 22654143.0 1.075000
2020-04-28 1.172000 1.055500 1.071000 1.150500 38581905.0 1.150500
2020-04-29 120.485901 114.099998 116.050003 120.449997 33760944.0 120.449997

2603 rows × 6 columns

df.shape
(2603, 6)
#visualise the closing price graph
plt.style.use('seaborn-darkgrid')
plt.figure(figsize=(16,8))
plt.title('Close Price History',color='darkBlue',fontsize=22)
plt.plot(df['Close'],linewidth=1.5,color='darkBlue')
plt.xlabel('Date',fontsize=18,color='darkBlue')
plt.ylabel('Close Price GBP (£)',fontsize=18,color='darkBlue')
plt.xticks(color='darkBlue',fontsize=18)
plt.yticks(color='darkBlue',fontsize=18)
plt.show()

png

#Create a new dataframe with only the Close column
data = df.filter(['Close'])
#Converty the dataframe to a numpy array
dataset = data.values
#get the number of rows to train the model on
training_data_len = math.ceil(len(dataset)*.8)

training_data_len
2083
#Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data
array([[5.52164624e-01],
       [6.09114787e-01],
       [6.31204527e-01],
       ...,
       [5.00471890e-05],
       [1.80342280e-04],
       [2.06062944e-01]])
#Create the training data set
#Create the scaled training data set
train_data = scaled_data[0:training_data_len , :]
#Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
  x_train.append(train_data[i-60:i, 0])
  y_train.append(train_data[i, 0])
  if i<=60:
    print(x_train)
    print(y_train)
    print() 
[array([0.55216462, 0.60911479, 0.63120453, 0.61722589, 0.60428267,
       0.60186656, 0.59841504, 0.61256631, 0.61946936, 0.63223999,
       0.64086881, 0.6598522 , 0.65398461, 0.6077342 , 0.59668928,
       0.60842449, 0.59876021, 0.59099428, 0.56027573, 0.55561615,
       0.59979568, 0.62205801, 0.60876961, 0.57339148, 0.55509844,
       0.55302751, 0.5480228 , 0.56079344, 0.56079344, 0.53749564,
       0.54353581, 0.57218348, 0.58322835, 0.58046712, 0.59410063,
       0.61567266, 0.61946936, 0.62171283, 0.66451178, 0.64828962,
       0.63137711, 0.64949762, 0.65260402, 0.67745499, 0.68832728,
       0.67935334, 0.67089706, 0.69540291, 0.69799156, 0.73285198,
       0.73561316, 0.74510485, 0.74976443, 0.72301511, 0.75753036,
       0.75373367, 0.75856583, 0.76598659, 0.78514257, 0.78496998])]
[0.7709912914434884]
#Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train),np.array(y_train)
#reshape the data
x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
x_train.shape
(2023, 60, 1)
#Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(50,return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))
#Compile the model
model.compile(optimizer='adam', loss = 'mean_squared_error')
#Train the model
model.fit(x_train,y_train,batch_size=1,epochs=1)
Epoch 1/1
2023/2023 [==============================] - 92s 46ms/step - loss: 0.0015





<keras.callbacks.callbacks.History at 0x2b2d73e6e80>
#Create the testing data set
#Create a new array containing scaled values from index 2020 to 2080
test_data = scaled_data[training_data_len-60: , :]
#Create the data set x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
  x_test.append(test_data[i-60:i, 0])
#Convert the data to a numpy array
x_test = np.array(x_test)
#Reshape the data
x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1], 1))
#Get the models predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)
#Get the root mean squared error(RMSE)
rmse = np.sqrt(np.mean(predictions - y_test)**2)
rmse
4.338851857643861
#Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

#Visualise the data
plt.style.use('seaborn-darkgrid')
plt.figure(figsize=(16,8))
plt.title('Model',color='red',fontname='Consolas')
plt.xlabel('Date',fontsize=18,color='green',fontname='Consolas')
plt.ylabel('Close Price GBP (£)',fontsize=18,color='green',fontname='Consolas')

plt.plot(train['Close'],linewidth=1.5,color='darkBlue')
#plt.plot(valid[['Close','Predictions']],linewidth=1.5)
plt.plot(valid.Close,linewidth=1.5,color='green') #real price
plt.plot(valid.Predictions,linewidth=1.5,color='red') #estimated price

plt.legend(['Train','Val','Predictions'], loc='upper right',frameon=True,fancybox=True,shadow=True,framealpha=1)
plt.show()
<ipython-input-18-4f6b43f84d62>:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid['Predictions'] = predictions

png

#show the valid and predicted prices
valid
Close Predictions
Date
2018-04-11 263.200012 264.371277
2018-04-12 264.700012 265.043915
2018-04-13 264.799988 265.859009
2018-04-16 262.899994 266.557800
2018-04-17 268.399994 266.636963
... ... ...
2020-04-23 1.064500 92.099251
2020-04-24 1.046000 70.637138
2020-04-27 1.075000 51.948002
2020-04-28 1.150500 37.720310
2020-04-29 120.449997 27.833059

520 rows × 2 columns

#Get the Quote
rbs_quote = web.DataReader('RBS.L',data_source='yahoo',start='2010-01-01',end='2020-04-29')
#Create a new dataframe
new_df = rbs_quote.filter(['Close'])
#Get the last 60 days closing price and convert the dataframe to an array
last_60_days = new_df[-60:].values
#Scale the data to be values between 0 and 1
last_60_days_scaled = scaler.transform(last_60_days)
#Create an empty list
X_test = []
#Append the last 60 days to the list
X_test.append(last_60_days_scaled)
#convert the X_test dataset to a numpy array
X_test = np.array(X_test)
#Reshape the data
X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1], 1))
#Get the predicted scaled price
pred_price = model.predict(X_test)
#Undo the scaling
pred_price = scaler.inverse_transform(pred_price)
print(pred_price)
[[39.654922]]
#Get the quote
rbs_quote2 = web.DataReader('RBS.L',data_source='yahoo',start='2020-04-29',end='2020-04-29')
print(rbs_quote2['Close'])
Date
2020-04-29    120.449997
Name: Close, dtype: float64