02-Imdb-Binary-Classification

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Hussnain\\Anaconda3\\envs\\tensorflow\\lib\\site-packages\\h5py\\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n",
      "Using TensorFlow backend.\n"
     ]
    }
   ],
   "source": [
    "#Imports\n",
    "from keras.datasets import imdb\n",
    "\n",
    "from keras import models\n",
    "from keras import layers\n",
    "from keras import optimizers\n",
    "from keras import losses\n",
    "from keras import metrics,activations\n",
    "\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
      " 1048576/17464789 [>.............................] - ETA: 53:49"
     ]
    }
   ],
   "source": [
    "#Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz\n",
    "\n",
    "(xtrain,ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Exploring the dataset\n",
    "\n",
    "print('xtrain shape', xtrain.shape)\n",
    "print('ytrain shape', ytrain.shape)\n",
    "print()\n",
    "print('xtest shape', xtest.shape)\n",
    "print('ytest shape', ytest.shape)\n",
    "print()\n",
    "print('xtrain first review as dictionary index', xtrain[1])\n",
    "print()\n",
    "print()\n",
    "print('ytrain label', ytrain[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#index to words mapping\n",
    "word_index = imdb.get_word_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "decode_review = ' '.join([reverse_word_index.get(i-3, reverse_word_index.get(i)) for i in xtrain[22]])\n",
    "decode_review"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def vectorize_sequences(sequences, dimension=10000):\n",
    "    results = np.zeros((len(sequences), dimension))\n",
    "    for i, sequence in enumerate(sequences):\n",
    "        results[i, sequence] = 1. \n",
    "    return results\n",
    "\n",
    "x_train = vectorize_sequences(xtrain)\n",
    "x_test = vectorize_sequences(xtest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ytrain = np.asarray(ytrain).astype('float32')\n",
    "ytest = np.asarray(ytest).astype('float32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#model\n",
    "model = models.Sequential()\n",
    "model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))\n",
    "model.add(layers.Dense(16, activation=activations.relu))\n",
    "model.add(layers.Dense(1, activation=activations.sigmoid))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=['acc'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x_val = x_train[:10000]\n",
    "y_val = ytrain[:10000]\n",
    "\n",
    "x_train_partial = x_train[10000:]\n",
    "y_train_partial = ytrain[10000:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "history = model.fit(x_train_partial, y_train_partial, epochs=4, batch_size=512, validation_data=(x_val,y_val))\n",
    "history_dict = history.history\n",
    "history_dict.keys()\n",
    "print(history.history['acc'][-1])\n",
    "print(history.history['val_acc'][-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(model.predict(x_train_partial[22:23]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "loss = history_dict['loss']\n",
    "val_loss = history_dict['val_loss']\n",
    "epochs = range(0, len(loss)+1)\n",
    "epochs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib\n",
    "acc = history.history['acc']\n",
    "val_acc = history.history['val_acc']\n",
    "loss = history.history['loss']\n",
    "val_loss = history.history['val_loss']\n",
    "\n",
    "epochs = range(1, len(acc) + 1)\n",
    "\n",
    "# \"bo\" is for \"blue dot\"\n",
    "plt.plot(epochs, loss, 'ro', label='Training loss')\n",
    "# b is for \"solid blue line\"\n",
    "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n",
    "plt.title('Training and validation loss')\n",
    "plt.xlabel('Epochs')\n",
    "plt.ylabel('Loss')\n",
    "plt.legend()\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.clf()      # clear figure# clear  \n",
    "acc_values = history_dict['acc']\n",
    "val_acc_values = history_dict['val_acc']\n",
    "\n",
    "plt.plot(epochs, acc, 'bo', label='Training acc')\n",
    "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n",
    "plt.title('Training and validation accuracy')\n",
    "plt.xlabel('Epochs')\n",
    "plt.ylabel('Loss')\n",
    "plt.legend()\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
About this Algorithm
#Imports
from keras.datasets import imdb

from keras import models
from keras import layers
from keras import optimizers
from keras import losses
from keras import metrics,activations

import matplotlib.pyplot as plt
C:\Users\Hussnain\Anaconda3\envs\tensorflow\lib\site-packages\h5py\__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
#Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz

(xtrain,ytrain), (xtest, ytest) = imdb.load_data(num_words=10000)
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
 1048576/17464789 [>.............................] - ETA: 53:49
#Exploring the dataset

print('xtrain shape', xtrain.shape)
print('ytrain shape', ytrain.shape)
print()
print('xtest shape', xtest.shape)
print('ytest shape', ytest.shape)
print()
print('xtrain first review as dictionary index', xtrain[1])
print()
print()
print('ytrain label', ytrain[0])
#index to words mapping
word_index = imdb.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
decode_review = ' '.join([reverse_word_index.get(i-3, reverse_word_index.get(i)) for i in xtrain[22]])
decode_review
import numpy as np

def vectorize_sequences(sequences, dimension=10000):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1. 
    return results

x_train = vectorize_sequences(xtrain)
x_test = vectorize_sequences(xtest)
ytrain = np.asarray(ytrain).astype('float32')
ytest = np.asarray(ytest).astype('float32')
#model
model = models.Sequential()
model.add(layers.Dense(16, activation=activations.relu, input_shape=(10000,)))
model.add(layers.Dense(16, activation=activations.relu))
model.add(layers.Dense(1, activation=activations.sigmoid))
model.compile(optimizer=optimizers.RMSprop(lr=0.0001), loss=losses.mse, metrics=['acc'])
x_val = x_train[:10000]
y_val = ytrain[:10000]

x_train_partial = x_train[10000:]
y_train_partial = ytrain[10000:]
history = model.fit(x_train_partial, y_train_partial, epochs=4, batch_size=512, validation_data=(x_val,y_val))
history_dict = history.history
history_dict.keys()
print(history.history['acc'][-1])
print(history.history['val_acc'][-1])
print(model.predict(x_train_partial[22:23]))
loss = history_dict['loss']
val_loss = history_dict['val_loss']
epochs = range(0, len(loss)+1)
epochs
%matplotlib
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'ro', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()
plt.clf()      # clear figure# clear  
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()
Algerlogo

Β© Alger 2022

About us

We are a group of programmers helping each other build new things, whether it be writing complex encryption programs, or simple ciphers. Our goal is to work together to document and model beautiful, helpful and interesting algorithms using code. We are an open-source community - anyone can contribute. We check each other's work, communicate and collaborate to solve problems. We strive to be welcoming, respectful, yet make sure that our code follows the latest programming guidelines.