Stock Price Prediction - Datriks Blog Python and R Programming

# Impoort the libraries
import math
import pandas_datareader as web
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense,LSTM
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

C:\Python\Python38\lib\site-packages\pandas_datareader\compat\__init__.py:7: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.
  from pandas.util.testing import assert_frame_equal
Using TensorFlow backend.

#get the stoke quote
df = web.DataReader('RBS.L',data_source='yahoo',start='2010-01-01',end='2020-04-29')
#show the data
df

	High	Low	Open	Close	Volume	Adj Close
Date
2010-01-04	321.000000	295.000000	299.000000	321.000000	11667982.0	299.654846
2010-01-05	355.000000	318.000000	320.000000	354.000000	18885923.0	330.460480
2010-01-06	384.899994	355.000000	357.200012	366.799988	19304534.0	342.409332
2010-01-07	371.000000	354.500000	366.799988	358.700012	14208016.0	334.847961
2010-01-08	366.500000	346.600006	365.500000	351.200012	10188018.0	327.846680
...	...	...	...	...	...	...
2020-04-23	1.070000	1.035500	1.049500	1.064500	19678442.0	1.064500
2020-04-24	1.069000	1.038500	1.063500	1.046000	19949004.0	1.046000
2020-04-27	1.086500	1.043000	1.082500	1.075000	22654143.0	1.075000
2020-04-28	1.172000	1.055500	1.071000	1.150500	38581905.0	1.150500
2020-04-29	120.485901	114.099998	116.050003	120.449997	33760944.0	120.449997

2603 rows × 6 columns

df.shape

(2603, 6)

#visualise the closing price graph
plt.style.use('seaborn-darkgrid')
plt.figure(figsize=(16,8))
plt.title('Close Price History',color='darkBlue',fontsize=22)
plt.plot(df['Close'],linewidth=1.5,color='darkBlue')
plt.xlabel('Date',fontsize=18,color='darkBlue')
plt.ylabel('Close Price GBP (£)',fontsize=18,color='darkBlue')
plt.xticks(color='darkBlue',fontsize=18)
plt.yticks(color='darkBlue',fontsize=18)
plt.show()

png

#Create a new dataframe with only the Close column
data = df.filter(['Close'])
#Converty the dataframe to a numpy array
dataset = data.values
#get the number of rows to train the model on
training_data_len = math.ceil(len(dataset)*.8)

training_data_len

#Scale the data
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data

array([[5.52164624e-01],
       [6.09114787e-01],
       [6.31204527e-01],
       ...,
       [5.00471890e-05],
       [1.80342280e-04],
       [2.06062944e-01]])

#Create the training data set
#Create the scaled training data set
train_data = scaled_data[0:training_data_len , :]
#Split the data into x_train and y_train data sets
x_train = []
y_train = []

for i in range(60, len(train_data)):
  x_train.append(train_data[i-60:i, 0])
  y_train.append(train_data[i, 0])
  if i<=60:
    print(x_train)
    print(y_train)
    print() 

[array([0.55216462, 0.60911479, 0.63120453, 0.61722589, 0.60428267,
       0.60186656, 0.59841504, 0.61256631, 0.61946936, 0.63223999,
       0.64086881, 0.6598522 , 0.65398461, 0.6077342 , 0.59668928,
       0.60842449, 0.59876021, 0.59099428, 0.56027573, 0.55561615,
       0.59979568, 0.62205801, 0.60876961, 0.57339148, 0.55509844,
       0.55302751, 0.5480228 , 0.56079344, 0.56079344, 0.53749564,
       0.54353581, 0.57218348, 0.58322835, 0.58046712, 0.59410063,
       0.61567266, 0.61946936, 0.62171283, 0.66451178, 0.64828962,
       0.63137711, 0.64949762, 0.65260402, 0.67745499, 0.68832728,
       0.67935334, 0.67089706, 0.69540291, 0.69799156, 0.73285198,
       0.73561316, 0.74510485, 0.74976443, 0.72301511, 0.75753036,
       0.75373367, 0.75856583, 0.76598659, 0.78514257, 0.78496998])]
[0.7709912914434884]

#Convert the x_train and y_train to numpy arrays
x_train, y_train = np.array(x_train),np.array(y_train)

#reshape the data
x_train = np.reshape(x_train,(x_train.shape[0],x_train.shape[1],1))
x_train.shape

(2023, 60, 1)

#Build the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(50,return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

#Compile the model
model.compile(optimizer='adam', loss = 'mean_squared_error')

#Train the model
model.fit(x_train,y_train,batch_size=1,epochs=1)

Epoch 1/1
2023/2023 [==============================] - 92s 46ms/step - loss: 0.0015

<keras.callbacks.callbacks.History at 0x2b2d73e6e80>

#Create the testing data set
#Create a new array containing scaled values from index 2020 to 2080
test_data = scaled_data[training_data_len-60: , :]
#Create the data set x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(60, len(test_data)):
  x_test.append(test_data[i-60:i, 0])

#Convert the data to a numpy array
x_test = np.array(x_test)

#Reshape the data
x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1], 1))

#Get the models predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

#Get the root mean squared error(RMSE)
rmse = np.sqrt(np.mean(predictions - y_test)**2)
rmse

4.338851857643861

#Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions

#Visualise the data
plt.style.use('seaborn-darkgrid')
plt.figure(figsize=(16,8))
plt.title('Model',color='red',fontname='Consolas')
plt.xlabel('Date',fontsize=18,color='green',fontname='Consolas')
plt.ylabel('Close Price GBP (£)',fontsize=18,color='green',fontname='Consolas')

plt.plot(train['Close'],linewidth=1.5,color='darkBlue')
#plt.plot(valid[['Close','Predictions']],linewidth=1.5)
plt.plot(valid.Close,linewidth=1.5,color='green') #real price
plt.plot(valid.Predictions,linewidth=1.5,color='red') #estimated price

plt.legend(['Train','Val','Predictions'], loc='upper right',frameon=True,fancybox=True,shadow=True,framealpha=1)
plt.show()

<ipython-input-18-4f6b43f84d62>:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  valid['Predictions'] = predictions

png

#show the valid and predicted prices
valid

	Close	Predictions
Date
2018-04-11	263.200012	264.371277
2018-04-12	264.700012	265.043915
2018-04-13	264.799988	265.859009
2018-04-16	262.899994	266.557800
2018-04-17	268.399994	266.636963
...	...	...
2020-04-23	1.064500	92.099251
2020-04-24	1.046000	70.637138
2020-04-27	1.075000	51.948002
2020-04-28	1.150500	37.720310
2020-04-29	120.449997	27.833059

520 rows × 2 columns

#Get the Quote
rbs_quote = web.DataReader('RBS.L',data_source='yahoo',start='2010-01-01',end='2020-04-29')
#Create a new dataframe
new_df = rbs_quote.filter(['Close'])
#Get the last 60 days closing price and convert the dataframe to an array
last_60_days = new_df[-60:].values
#Scale the data to be values between 0 and 1
last_60_days_scaled = scaler.transform(last_60_days)
#Create an empty list
X_test = []
#Append the last 60 days to the list
X_test.append(last_60_days_scaled)
#convert the X_test dataset to a numpy array
X_test = np.array(X_test)
#Reshape the data
X_test = np.reshape(X_test,(X_test.shape[0],X_test.shape[1], 1))
#Get the predicted scaled price
pred_price = model.predict(X_test)
#Undo the scaling
pred_price = scaler.inverse_transform(pred_price)
print(pred_price)

[[39.654922]]

#Get the quote
rbs_quote2 = web.DataReader('RBS.L',data_source='yahoo',start='2020-04-29',end='2020-04-29')
print(rbs_quote2['Close'])

Date
2020-04-29    120.449997
Name: Close, dtype: float64

← Previous Post Next Post →