{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Hussnain\\Anaconda3\\envs\\tensorflow\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n",
"Using TensorFlow backend.\n"
]
}
],
"source": [
"#Imports\n",
"from keras.datasets import imdb\n",
"\n",
"from keras import models\n",
"from keras import layers\n",
"from keras import optimizers\n",
"from keras import losses\n",
"from keras import metrics,activations\n",
"\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
" 1048576/17464789 [>.............................] - ETA: 53:49"
]
}
],
"source": [
"#Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
"\n",
"(xtrain,ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Exploring the dataset\n",
"\n",
"print('xtrain shape', xtrain.shape)\n",
"print('ytrain shape', ytrain.shape)\n",
"print()\n",
"print('xtest shape', xtest.shape)\n",
"print('ytest shape', ytest.shape)\n",
"print()\n",
"print('xtrain first review as dictionary index', xtrain[1])\n",
"print()\n",
"print()\n",
"print('ytrain label', ytrain[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#index to words mapping\n",
"word_index = imdb.get_word_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"decode_review = ' '.join([reverse_word_index.get(i-3, reverse_word_index.get(i)) for i in xtrain[22]])\n",
"decode_review"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"def vectorize_sequences(sequences, dimension=10000):\n",
" results = np.zeros((len(sequences), dimension))\n",
" for i, sequence in enumerate(sequences):\n",
" results[i, sequence] = 1. \n",
" return results\n",
"\n",
"x_train = vectorize_sequences(xtrain)\n",
"x_test = vectorize_sequences(xtest)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ytrain = np.asarray(ytrain).astype('float32')\n",
"ytest = np.asarray(ytest).astype('float32')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#model\n",
"model = models.Sequential()\n",
"model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))\n",
"model.add(layers.Dense(16, activation=activations.relu))\n",
"model.add(layers.Dense(1, activation=activations.sigmoid))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=['acc'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x_val = x_train[:10000]\n",
"y_val = ytrain[:10000]\n",
"\n",
"x_train_partial = x_train[10000:]\n",
"y_train_partial = ytrain[10000:]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"history = model.fit(x_train_partial, y_train_partial, epochs=4, batch_size=512, validation_data=(x_val,y_val))\n",
"history_dict = history.history\n",
"history_dict.keys()\n",
"print(history.history['acc'][-1])\n",
"print(history.history['val_acc'][-1])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(model.predict(x_train_partial[22:23]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"loss = history_dict['loss']\n",
"val_loss = history_dict['val_loss']\n",
"epochs = range(0, len(loss)+1)\n",
"epochs"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"%matplotlib\n",
"acc = history.history['acc']\n",
"val_acc = history.history['val_acc']\n",
"loss = history.history['loss']\n",
"val_loss = history.history['val_loss']\n",
"\n",
"epochs = range(1, len(acc) + 1)\n",
"\n",
"# \"bo\" is for \"blue dot\"\n",
"plt.plot(epochs, loss, 'ro', label='Training loss')\n",
"# b is for \"solid blue line\"\n",
"plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
"plt.title('Training and validation loss')\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.clf() # clear figure# clear \n",
"acc_values = history_dict['acc']\n",
"val_acc_values = history_dict['val_acc']\n",
"\n",
"plt.plot(epochs, acc, 'bo', label='Training acc')\n",
"plt.plot(epochs, val_acc, 'b', label='Validation acc')\n",
"plt.title('Training and validation accuracy')\n",
"plt.xlabel('Epochs')\n",
"plt.ylabel('Loss')\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
#Imports
from keras.datasets import imdb
from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics,activations
import matplotlib.pyplot as plt
C:\Users\Hussnain\Anaconda3\envs\tensorflow\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
from ._conv import register_converters as _register_converters
Using TensorFlow backend.
#Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
(xtrain,ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
1048576/17464789 [>.............................] - ETA: 53:49
#Exploring the dataset
print('xtrain shape', xtrain.shape)
print('ytrain shape', ytrain.shape)
print()
print('xtest shape', xtest.shape)
print('ytest shape', ytest.shape)
print()
print('xtrain first review as dictionary index', xtrain[1])
print()
print()
print('ytrain label', ytrain[0])
#index to words mapping
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decode_review = ' '.join([reverse_word_index.get(i-3, reverse_word_index.get(i)) for i in xtrain[22]])
decode_review
import numpy as np
def vectorize_sequences(sequences, dimension=10000):
results = np.zeros((len(sequences), dimension))
for i, sequence in enumerate(sequences):
results[i, sequence] = 1.
return results
x_train = vectorize_sequences(xtrain)
x_test = vectorize_sequences(xtest)
ytrain = np.asarray(ytrain).astype('float32')
ytest = np.asarray(ytest).astype('float32')
#model
model = models.Sequential()
model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))
model.add(layers.Dense(16, activation=activations.relu))
model.add(layers.Dense(1, activation=activations.sigmoid))
model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=['acc'])
x_val = x_train[:10000]
y_val = ytrain[:10000]
x_train_partial = x_train[10000:]
y_train_partial = ytrain[10000:]
history = model.fit(x_train_partial, y_train_partial, epochs=4, batch_size=512, validation_data=(x_val,y_val))
history_dict = history.history
history_dict.keys()
print(history.history['acc'][-1])
print(history.history['val_acc'][-1])
print(model.predict(x_train_partial[22:23]))
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(0, len(loss)+1)
epochs
%matplotlib
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
# "bo" is for "blue dot"
plt.plot(epochs, loss, 'ro', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
plt.clf() # clear figure# clear
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
Β© Alger 2022