𝔸𝕝𝕘𝕖𝕣

Support Vector Machine

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Libraries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm = pd.read_csv('/WA_Fn-UseC_-HR-Employee-Attrition.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_columns', None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dateset Information"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1470, 35)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EmployeeCount</th>\n",
       "      <th>EmployeeNumber</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StandardHours</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.0</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.0</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "      <td>1470.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>36.923810</td>\n",
       "      <td>802.485714</td>\n",
       "      <td>9.192517</td>\n",
       "      <td>2.912925</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1024.865306</td>\n",
       "      <td>2.721769</td>\n",
       "      <td>65.891156</td>\n",
       "      <td>2.729932</td>\n",
       "      <td>2.063946</td>\n",
       "      <td>2.728571</td>\n",
       "      <td>6502.931293</td>\n",
       "      <td>14313.103401</td>\n",
       "      <td>2.693197</td>\n",
       "      <td>15.209524</td>\n",
       "      <td>3.153741</td>\n",
       "      <td>2.712245</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.793878</td>\n",
       "      <td>11.279592</td>\n",
       "      <td>2.799320</td>\n",
       "      <td>2.761224</td>\n",
       "      <td>7.008163</td>\n",
       "      <td>4.229252</td>\n",
       "      <td>2.187755</td>\n",
       "      <td>4.123129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>9.135373</td>\n",
       "      <td>403.509100</td>\n",
       "      <td>8.106864</td>\n",
       "      <td>1.024165</td>\n",
       "      <td>0.0</td>\n",
       "      <td>602.024335</td>\n",
       "      <td>1.093082</td>\n",
       "      <td>20.329428</td>\n",
       "      <td>0.711561</td>\n",
       "      <td>1.106940</td>\n",
       "      <td>1.102846</td>\n",
       "      <td>4707.956783</td>\n",
       "      <td>7117.786044</td>\n",
       "      <td>2.498009</td>\n",
       "      <td>3.659938</td>\n",
       "      <td>0.360824</td>\n",
       "      <td>1.081209</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.852077</td>\n",
       "      <td>7.780782</td>\n",
       "      <td>1.289271</td>\n",
       "      <td>0.706476</td>\n",
       "      <td>6.126525</td>\n",
       "      <td>3.623137</td>\n",
       "      <td>3.222430</td>\n",
       "      <td>3.568136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>18.000000</td>\n",
       "      <td>102.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>30.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1009.000000</td>\n",
       "      <td>2094.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>30.000000</td>\n",
       "      <td>465.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>491.250000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>48.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2911.000000</td>\n",
       "      <td>8047.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>12.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>36.000000</td>\n",
       "      <td>802.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1020.500000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>66.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4919.000000</td>\n",
       "      <td>14235.500000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>43.000000</td>\n",
       "      <td>1157.000000</td>\n",
       "      <td>14.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1555.750000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>83.750000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>8379.000000</td>\n",
       "      <td>20461.500000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>7.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>60.000000</td>\n",
       "      <td>1499.000000</td>\n",
       "      <td>29.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2068.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>19999.000000</td>\n",
       "      <td>26999.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>25.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>80.0</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>40.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>15.000000</td>\n",
       "      <td>17.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               Age    DailyRate  DistanceFromHome    Education  EmployeeCount  \\\n",
       "count  1470.000000  1470.000000       1470.000000  1470.000000         1470.0   \n",
       "mean     36.923810   802.485714          9.192517     2.912925            1.0   \n",
       "std       9.135373   403.509100          8.106864     1.024165            0.0   \n",
       "min      18.000000   102.000000          1.000000     1.000000            1.0   \n",
       "25%      30.000000   465.000000          2.000000     2.000000            1.0   \n",
       "50%      36.000000   802.000000          7.000000     3.000000            1.0   \n",
       "75%      43.000000  1157.000000         14.000000     4.000000            1.0   \n",
       "max      60.000000  1499.000000         29.000000     5.000000            1.0   \n",
       "\n",
       "       EmployeeNumber  EnvironmentSatisfaction   HourlyRate  JobInvolvement  \\\n",
       "count     1470.000000              1470.000000  1470.000000     1470.000000   \n",
       "mean      1024.865306                 2.721769    65.891156        2.729932   \n",
       "std        602.024335                 1.093082    20.329428        0.711561   \n",
       "min          1.000000                 1.000000    30.000000        1.000000   \n",
       "25%        491.250000                 2.000000    48.000000        2.000000   \n",
       "50%       1020.500000                 3.000000    66.000000        3.000000   \n",
       "75%       1555.750000                 4.000000    83.750000        3.000000   \n",
       "max       2068.000000                 4.000000   100.000000        4.000000   \n",
       "\n",
       "          JobLevel  JobSatisfaction  MonthlyIncome   MonthlyRate  \\\n",
       "count  1470.000000      1470.000000    1470.000000   1470.000000   \n",
       "mean      2.063946         2.728571    6502.931293  14313.103401   \n",
       "std       1.106940         1.102846    4707.956783   7117.786044   \n",
       "min       1.000000         1.000000    1009.000000   2094.000000   \n",
       "25%       1.000000         2.000000    2911.000000   8047.000000   \n",
       "50%       2.000000         3.000000    4919.000000  14235.500000   \n",
       "75%       3.000000         4.000000    8379.000000  20461.500000   \n",
       "max       5.000000         4.000000   19999.000000  26999.000000   \n",
       "\n",
       "       NumCompaniesWorked  PercentSalaryHike  PerformanceRating  \\\n",
       "count         1470.000000        1470.000000        1470.000000   \n",
       "mean             2.693197          15.209524           3.153741   \n",
       "std              2.498009           3.659938           0.360824   \n",
       "min              0.000000          11.000000           3.000000   \n",
       "25%              1.000000          12.000000           3.000000   \n",
       "50%              2.000000          14.000000           3.000000   \n",
       "75%              4.000000          18.000000           3.000000   \n",
       "max              9.000000          25.000000           4.000000   \n",
       "\n",
       "       RelationshipSatisfaction  StandardHours  StockOptionLevel  \\\n",
       "count               1470.000000         1470.0       1470.000000   \n",
       "mean                   2.712245           80.0          0.793878   \n",
       "std                    1.081209            0.0          0.852077   \n",
       "min                    1.000000           80.0          0.000000   \n",
       "25%                    2.000000           80.0          0.000000   \n",
       "50%                    3.000000           80.0          1.000000   \n",
       "75%                    4.000000           80.0          1.000000   \n",
       "max                    4.000000           80.0          3.000000   \n",
       "\n",
       "       TotalWorkingYears  TrainingTimesLastYear  WorkLifeBalance  \\\n",
       "count        1470.000000            1470.000000      1470.000000   \n",
       "mean           11.279592               2.799320         2.761224   \n",
       "std             7.780782               1.289271         0.706476   \n",
       "min             0.000000               0.000000         1.000000   \n",
       "25%             6.000000               2.000000         2.000000   \n",
       "50%            10.000000               3.000000         3.000000   \n",
       "75%            15.000000               3.000000         3.000000   \n",
       "max            40.000000               6.000000         4.000000   \n",
       "\n",
       "       YearsAtCompany  YearsInCurrentRole  YearsSinceLastPromotion  \\\n",
       "count     1470.000000         1470.000000              1470.000000   \n",
       "mean         7.008163            4.229252                 2.187755   \n",
       "std          6.126525            3.623137                 3.222430   \n",
       "min          0.000000            0.000000                 0.000000   \n",
       "25%          3.000000            2.000000                 0.000000   \n",
       "50%          5.000000            3.000000                 1.000000   \n",
       "75%          9.000000            7.000000                 3.000000   \n",
       "max         40.000000           18.000000                15.000000   \n",
       "\n",
       "       YearsWithCurrManager  \n",
       "count           1470.000000  \n",
       "mean               4.123129  \n",
       "std                3.568136  \n",
       "min                0.000000  \n",
       "25%                2.000000  \n",
       "50%                3.000000  \n",
       "75%                7.000000  \n",
       "max               17.000000  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age  mode:  35\n",
      "Attrition  mode:  No\n",
      "BusinessTravel  mode:  Travel_Rarely\n",
      "DailyRate  mode:  691\n",
      "Department  mode:  Research & Development\n",
      "DistanceFromHome  mode:  2\n",
      "Education  mode:  3\n",
      "EducationField  mode:  Life Sciences\n",
      "EmployeeCount  mode:  1\n",
      "EmployeeNumber  mode:  1\n",
      "EnvironmentSatisfaction  mode:  3\n",
      "Gender  mode:  Male\n",
      "HourlyRate  mode:  66\n",
      "JobInvolvement  mode:  3\n",
      "JobLevel  mode:  1\n",
      "JobRole  mode:  Sales Executive\n",
      "JobSatisfaction  mode:  4\n",
      "MaritalStatus  mode:  Married\n",
      "MonthlyIncome  mode:  2342\n",
      "MonthlyRate  mode:  9150\n",
      "NumCompaniesWorked  mode:  1\n",
      "Over18  mode:  Y\n",
      "OverTime  mode:  No\n",
      "PercentSalaryHike  mode:  11\n",
      "PerformanceRating  mode:  3\n",
      "RelationshipSatisfaction  mode:  3\n",
      "StandardHours  mode:  80\n",
      "StockOptionLevel  mode:  0\n",
      "TotalWorkingYears  mode:  10\n",
      "TrainingTimesLastYear  mode:  2\n",
      "WorkLifeBalance  mode:  3\n",
      "YearsAtCompany  mode:  5\n",
      "YearsInCurrentRole  mode:  2\n",
      "YearsSinceLastPromotion  mode:  0\n",
      "YearsWithCurrManager  mode:  2\n"
     ]
    }
   ],
   "source": [
    "import statistics\n",
    "for i in ibm.columns:\n",
    "    print(i, \" mode: \", statistics.mode(ibm[i]));"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 1470 entries, 0 to 1469\n",
      "Data columns (total 35 columns):\n",
      " #   Column                    Non-Null Count  Dtype \n",
      "---  ------                    --------------  ----- \n",
      " 0   Age                       1470 non-null   int64 \n",
      " 1   Attrition                 1470 non-null   object\n",
      " 2   BusinessTravel            1470 non-null   object\n",
      " 3   DailyRate                 1470 non-null   int64 \n",
      " 4   Department                1470 non-null   object\n",
      " 5   DistanceFromHome          1470 non-null   int64 \n",
      " 6   Education                 1470 non-null   int64 \n",
      " 7   EducationField            1470 non-null   object\n",
      " 8   EmployeeCount             1470 non-null   int64 \n",
      " 9   EmployeeNumber            1470 non-null   int64 \n",
      " 10  EnvironmentSatisfaction   1470 non-null   int64 \n",
      " 11  Gender                    1470 non-null   object\n",
      " 12  HourlyRate                1470 non-null   int64 \n",
      " 13  JobInvolvement            1470 non-null   int64 \n",
      " 14  JobLevel                  1470 non-null   int64 \n",
      " 15  JobRole                   1470 non-null   object\n",
      " 16  JobSatisfaction           1470 non-null   int64 \n",
      " 17  MaritalStatus             1470 non-null   object\n",
      " 18  MonthlyIncome             1470 non-null   int64 \n",
      " 19  MonthlyRate               1470 non-null   int64 \n",
      " 20  NumCompaniesWorked        1470 non-null   int64 \n",
      " 21  Over18                    1470 non-null   object\n",
      " 22  OverTime                  1470 non-null   object\n",
      " 23  PercentSalaryHike         1470 non-null   int64 \n",
      " 24  PerformanceRating         1470 non-null   int64 \n",
      " 25  RelationshipSatisfaction  1470 non-null   int64 \n",
      " 26  StandardHours             1470 non-null   int64 \n",
      " 27  StockOptionLevel          1470 non-null   int64 \n",
      " 28  TotalWorkingYears         1470 non-null   int64 \n",
      " 29  TrainingTimesLastYear     1470 non-null   int64 \n",
      " 30  WorkLifeBalance           1470 non-null   int64 \n",
      " 31  YearsAtCompany            1470 non-null   int64 \n",
      " 32  YearsInCurrentRole        1470 non-null   int64 \n",
      " 33  YearsSinceLastPromotion   1470 non-null   int64 \n",
      " 34  YearsWithCurrManager      1470 non-null   int64 \n",
      "dtypes: int64(26), object(9)\n",
      "memory usage: 402.1+ KB\n"
     ]
    }
   ],
   "source": [
    "ibm.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Data Preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm.drop(columns = 'EmployeeCount', inplace = True)\n",
    "ibm.drop(columns = 'EmployeeNumber', inplace = True)\n",
    "ibm.drop(columns = 'Over18', inplace = True)\n",
    "ibm.drop(columns = 'StandardHours', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Attrition</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>Department</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EducationField</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>Gender</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobRole</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>MaritalStatus</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>1102</td>\n",
       "      <td>Sales</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>2</td>\n",
       "      <td>Female</td>\n",
       "      <td>94</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>Sales Executive</td>\n",
       "      <td>4</td>\n",
       "      <td>Single</td>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>8</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>279</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>3</td>\n",
       "      <td>Male</td>\n",
       "      <td>61</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Research Scientist</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>1</td>\n",
       "      <td>No</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>Yes</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>1373</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Other</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>92</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>3</td>\n",
       "      <td>Single</td>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>6</td>\n",
       "      <td>Yes</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>1392</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>4</td>\n",
       "      <td>Female</td>\n",
       "      <td>56</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Research Scientist</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>27</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>591</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Medical</td>\n",
       "      <td>1</td>\n",
       "      <td>Male</td>\n",
       "      <td>40</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>9</td>\n",
       "      <td>No</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>36</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>884</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>Medical</td>\n",
       "      <td>3</td>\n",
       "      <td>Male</td>\n",
       "      <td>41</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>4</td>\n",
       "      <td>Married</td>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>4</td>\n",
       "      <td>No</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>39</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>613</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>Medical</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>Healthcare Representative</td>\n",
       "      <td>1</td>\n",
       "      <td>Married</td>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>4</td>\n",
       "      <td>No</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>27</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>155</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>Life Sciences</td>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>87</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Manufacturing Director</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>1</td>\n",
       "      <td>Yes</td>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>49</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Frequently</td>\n",
       "      <td>1023</td>\n",
       "      <td>Sales</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>Medical</td>\n",
       "      <td>4</td>\n",
       "      <td>Male</td>\n",
       "      <td>63</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>Sales Executive</td>\n",
       "      <td>2</td>\n",
       "      <td>Married</td>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>2</td>\n",
       "      <td>No</td>\n",
       "      <td>14</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>34</td>\n",
       "      <td>No</td>\n",
       "      <td>Travel_Rarely</td>\n",
       "      <td>628</td>\n",
       "      <td>Research &amp; Development</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>Medical</td>\n",
       "      <td>2</td>\n",
       "      <td>Male</td>\n",
       "      <td>82</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>Laboratory Technician</td>\n",
       "      <td>3</td>\n",
       "      <td>Married</td>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>2</td>\n",
       "      <td>No</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1470 rows × 31 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Age Attrition     BusinessTravel  DailyRate              Department  \\\n",
       "0      41       Yes      Travel_Rarely       1102                   Sales   \n",
       "1      49        No  Travel_Frequently        279  Research & Development   \n",
       "2      37       Yes      Travel_Rarely       1373  Research & Development   \n",
       "3      33        No  Travel_Frequently       1392  Research & Development   \n",
       "4      27        No      Travel_Rarely        591  Research & Development   \n",
       "...   ...       ...                ...        ...                     ...   \n",
       "1465   36        No  Travel_Frequently        884  Research & Development   \n",
       "1466   39        No      Travel_Rarely        613  Research & Development   \n",
       "1467   27        No      Travel_Rarely        155  Research & Development   \n",
       "1468   49        No  Travel_Frequently       1023                   Sales   \n",
       "1469   34        No      Travel_Rarely        628  Research & Development   \n",
       "\n",
       "      DistanceFromHome  Education EducationField  EnvironmentSatisfaction  \\\n",
       "0                    1          2  Life Sciences                        2   \n",
       "1                    8          1  Life Sciences                        3   \n",
       "2                    2          2          Other                        4   \n",
       "3                    3          4  Life Sciences                        4   \n",
       "4                    2          1        Medical                        1   \n",
       "...                ...        ...            ...                      ...   \n",
       "1465                23          2        Medical                        3   \n",
       "1466                 6          1        Medical                        4   \n",
       "1467                 4          3  Life Sciences                        2   \n",
       "1468                 2          3        Medical                        4   \n",
       "1469                 8          3        Medical                        2   \n",
       "\n",
       "      Gender  HourlyRate  JobInvolvement  JobLevel                    JobRole  \\\n",
       "0     Female          94               3         2            Sales Executive   \n",
       "1       Male          61               2         2         Research Scientist   \n",
       "2       Male          92               2         1      Laboratory Technician   \n",
       "3     Female          56               3         1         Research Scientist   \n",
       "4       Male          40               3         1      Laboratory Technician   \n",
       "...      ...         ...             ...       ...                        ...   \n",
       "1465    Male          41               4         2      Laboratory Technician   \n",
       "1466    Male          42               2         3  Healthcare Representative   \n",
       "1467    Male          87               4         2     Manufacturing Director   \n",
       "1468    Male          63               2         2            Sales Executive   \n",
       "1469    Male          82               4         2      Laboratory Technician   \n",
       "\n",
       "      JobSatisfaction MaritalStatus  MonthlyIncome  MonthlyRate  \\\n",
       "0                   4        Single           5993        19479   \n",
       "1                   2       Married           5130        24907   \n",
       "2                   3        Single           2090         2396   \n",
       "3                   3       Married           2909        23159   \n",
       "4                   2       Married           3468        16632   \n",
       "...               ...           ...            ...          ...   \n",
       "1465                4       Married           2571        12290   \n",
       "1466                1       Married           9991        21457   \n",
       "1467                2       Married           6142         5174   \n",
       "1468                2       Married           5390        13243   \n",
       "1469                3       Married           4404        10228   \n",
       "\n",
       "      NumCompaniesWorked OverTime  PercentSalaryHike  PerformanceRating  \\\n",
       "0                      8      Yes                 11                  3   \n",
       "1                      1       No                 23                  4   \n",
       "2                      6      Yes                 15                  3   \n",
       "3                      1      Yes                 11                  3   \n",
       "4                      9       No                 12                  3   \n",
       "...                  ...      ...                ...                ...   \n",
       "1465                   4       No                 17                  3   \n",
       "1466                   4       No                 15                  3   \n",
       "1467                   1      Yes                 20                  4   \n",
       "1468                   2       No                 14                  3   \n",
       "1469                   2       No                 12                  3   \n",
       "\n",
       "      RelationshipSatisfaction  StockOptionLevel  TotalWorkingYears  \\\n",
       "0                            1                 0                  8   \n",
       "1                            4                 1                 10   \n",
       "2                            2                 0                  7   \n",
       "3                            3                 0                  8   \n",
       "4                            4                 1                  6   \n",
       "...                        ...               ...                ...   \n",
       "1465                         3                 1                 17   \n",
       "1466                         1                 1                  9   \n",
       "1467                         2                 1                  6   \n",
       "1468                         4                 0                 17   \n",
       "1469                         1                 0                  6   \n",
       "\n",
       "      TrainingTimesLastYear  WorkLifeBalance  YearsAtCompany  \\\n",
       "0                         0                1               6   \n",
       "1                         3                3              10   \n",
       "2                         3                3               0   \n",
       "3                         3                3               8   \n",
       "4                         3                3               2   \n",
       "...                     ...              ...             ...   \n",
       "1465                      3                3               5   \n",
       "1466                      5                3               7   \n",
       "1467                      0                3               6   \n",
       "1468                      3                2               9   \n",
       "1469                      3                4               4   \n",
       "\n",
       "      YearsInCurrentRole  YearsSinceLastPromotion  YearsWithCurrManager  \n",
       "0                      4                        0                     5  \n",
       "1                      7                        1                     7  \n",
       "2                      0                        0                     0  \n",
       "3                      7                        3                     0  \n",
       "4                      2                        2                     2  \n",
       "...                  ...                      ...                   ...  \n",
       "1465                   2                        0                     3  \n",
       "1466                   7                        1                     7  \n",
       "1467                   2                        0                     3  \n",
       "1468                   6                        0                     8  \n",
       "1469                   3                        1                     2  \n",
       "\n",
       "[1470 rows x 31 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.drop_duplicates()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Age                         0\n",
       "Attrition                   0\n",
       "BusinessTravel              0\n",
       "DailyRate                   0\n",
       "Department                  0\n",
       "DistanceFromHome            0\n",
       "Education                   0\n",
       "EducationField              0\n",
       "EnvironmentSatisfaction     0\n",
       "Gender                      0\n",
       "HourlyRate                  0\n",
       "JobInvolvement              0\n",
       "JobLevel                    0\n",
       "JobRole                     0\n",
       "JobSatisfaction             0\n",
       "MaritalStatus               0\n",
       "MonthlyIncome               0\n",
       "MonthlyRate                 0\n",
       "NumCompaniesWorked          0\n",
       "OverTime                    0\n",
       "PercentSalaryHike           0\n",
       "PerformanceRating           0\n",
       "RelationshipSatisfaction    0\n",
       "StockOptionLevel            0\n",
       "TotalWorkingYears           0\n",
       "TrainingTimesLastYear       0\n",
       "WorkLifeBalance             0\n",
       "YearsAtCompany              0\n",
       "YearsInCurrentRole          0\n",
       "YearsSinceLastPromotion     0\n",
       "YearsWithCurrManager        0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Attrition (0 - No, 1 - Yes)\n",
    "ibm.replace({'Attrition' : {'Yes': 1, 'No': 0}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace BusinessTravel (0 - Non-Travel, 1 - Travel_Rarely, 2 - Travel_Frequently)\n",
    "ibm.replace({'BusinessTravel' : {'Non-Travel': 0, 'Travel_Rarely': 1, 'Travel_Frequently': 2}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Department\n",
    "dummy = pd.get_dummies(ibm['Department'])\n",
    "ibm.insert(5,'Dp_Sales&Development', dummy['Research & Development'])\n",
    "ibm.insert(6,'Dp_Sales', dummy['Sales'])\n",
    "ibm.insert(7,'Dp_HumanResources', dummy['Human Resources'])\n",
    "\n",
    "ibm.drop(columns = 'Department', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "#EducationField\n",
    "dummy = pd.get_dummies(ibm['EducationField'])\n",
    "ibm.insert(11,'EF_Life Sciences',dummy['Life Sciences'])\n",
    "ibm.insert(12,'EF_Medical',dummy['Medical'])\n",
    "ibm.insert(13,'EF_Marketing',dummy['Marketing'])\n",
    "ibm.insert(14,'EF_TechnicalDegree',dummy['Technical Degree'])\n",
    "ibm.insert(15,'EF_HumanResources',dummy['Human Resources'])\n",
    "ibm.insert(16,'EF_Other',dummy['Other'])\n",
    "\n",
    "ibm.drop(columns = 'EducationField', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Gender (0 - Male; 1 - Female)\n",
    "ibm.replace({'Gender': {'Male': 0, 'Female': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Job role dummy variables\n",
    "dummy=pd.get_dummies(ibm['JobRole'])\n",
    "ibm.insert(23, 'JR_HealthcareRepresentive', dummy['Healthcare Representative'])\n",
    "ibm.insert(24, 'JR_HumanResource', dummy['Human Resources'])\n",
    "ibm.insert(25, 'JR_LaboratoryTechnician', dummy['Laboratory Technician'])\n",
    "ibm.insert(26, 'JR_Manager', dummy['Manager'])\n",
    "ibm.insert(27, 'JR_ManufacturingDirector', dummy['Manufacturing Director'])\n",
    "ibm.insert(28, 'JR_ResearchDirector', dummy['Research Director'])\n",
    "ibm.insert(29, 'JR_ResearchScientist', dummy['Research Scientist'])\n",
    "ibm.insert(30, 'JR_SalesExecutive', dummy['Sales Executive'])\n",
    "ibm.insert(31, 'JR_SalesRepresentative', dummy['Sales Representative'])\n",
    "\n",
    "ibm.drop(columns = 'JobRole', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MaritalStatus role dummy variables\n",
    "dummy=pd.get_dummies(ibm['MaritalStatus'])\n",
    "ibm.insert(34, 'MS_Married', dummy['Married'])\n",
    "ibm.insert(35, 'MS_Single', dummy['Single'])\n",
    "ibm.insert(36, 'MS_Divorced', dummy['Divorced'])\n",
    "\n",
    "ibm.drop(columns = 'MaritalStatus', inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Overtime (0 - No; 1 - Yes)\n",
    "ibm.replace({'OverTime': {'No': 0, 'Yes': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# replace Over18 (0 - N; 1 - Y)\n",
    "ibm.replace({'Over18': {'N': 0, 'Y': 1}}, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "def iqr_outliers(data):\n",
    "    out=[]\n",
    "    \n",
    "    firstQuartile = data.quantile(0.25)\n",
    "    thirdQuartile = data.quantile(0.75)\n",
    "    \n",
    "    iqr = thirdQuartile-firstQuartile\n",
    "    \n",
    "    Lower_bound = firstQuartile - 1.5 * iqr\n",
    "    Upper_bound = thirdQuartile + 1.5 * iqr\n",
    "    \n",
    "    for i in data:\n",
    "        if i > Upper_bound or i < Lower_bound:\n",
    "            out.append(i)\n",
    "            \n",
    "    print(\"Outliers:\",out , \"\\nCount: \", len(out), \"\\n\")\n",
    "    return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Age\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Attrition\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  237 \n",
      "\n",
      "BusinessTravel\n",
      "Outliers: [2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2] \n",
      "Count:  427 \n",
      "\n",
      "DailyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_Sales&Development\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_Sales\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Dp_HumanResources\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  63 \n",
      "\n",
      "DistanceFromHome\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "Education\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EnvironmentSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Life Sciences\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Medical\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "EF_Marketing\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  159 \n",
      "\n",
      "EF_TechnicalDegree\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  132 \n",
      "\n",
      "EF_HumanResources\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  27 \n",
      "\n",
      "EF_Other\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  82 \n",
      "\n",
      "Gender\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "HourlyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobInvolvement\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobLevel\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JobSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "JR_HealthcareRepresentive\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  131 \n",
      "\n",
      "JR_HumanResource\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  52 \n",
      "\n",
      "JR_LaboratoryTechnician\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  259 \n",
      "\n",
      "JR_Manager\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  102 \n",
      "\n",
      "JR_ManufacturingDirector\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  145 \n",
      "\n",
      "JR_ResearchDirector\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  80 \n",
      "\n",
      "JR_ResearchScientist\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  292 \n",
      "\n",
      "JR_SalesExecutive\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  326 \n",
      "\n",
      "JR_SalesRepresentative\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  83 \n",
      "\n",
      "MonthlyIncome\n",
      "Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] \n",
      "Count:  114 \n",
      "\n",
      "MonthlyRate\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "NumCompaniesWorked\n",
      "Outliers: [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9] \n",
      "Count:  52 \n",
      "\n",
      "MS_Married\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "MS_Single\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "MS_Divorced\n",
      "Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] \n",
      "Count:  327 \n",
      "\n",
      "OverTime\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "PercentSalaryHike\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "PerformanceRating\n",
      "Outliers: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] \n",
      "Count:  226 \n",
      "\n",
      "RelationshipSatisfaction\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "StockOptionLevel\n",
      "Outliers: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] \n",
      "Count:  85 \n",
      "\n",
      "TotalWorkingYears\n",
      "Outliers: [31, 29, 37, 38, 30, 40, 36, 34, 32, 33, 37, 30, 36, 31, 33, 32, 37, 31, 32, 32, 30, 34, 30, 40, 29, 35, 31, 33, 31, 29, 32, 30, 33, 30, 29, 31, 32, 33, 36, 34, 31, 36, 33, 31, 29, 33, 29, 32, 31, 35, 29, 32, 34, 36, 32, 30, 36, 29, 34, 37, 29, 29, 35] \n",
      "Count:  63 \n",
      "\n",
      "TrainingTimesLastYear\n",
      "Outliers: [0, 5, 5, 5, 6, 5, 5, 5, 6, 6, 0, 0, 0, 5, 0, 5, 5, 5, 6, 6, 5, 0, 6, 5, 5, 0, 5, 5, 6, 5, 5, 5, 0, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 0, 0, 5, 5, 5, 6, 6, 5, 0, 5, 0, 5, 5, 0, 6, 0, 5, 5, 6, 6, 5, 6, 5, 0, 5, 5, 5, 5, 0, 6, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 0, 5, 0, 5, 5, 6, 5, 6, 5, 0, 5, 5, 0, 6, 6, 5, 6, 0, 5, 0, 6, 6, 6, 6, 5, 5, 0, 5, 0, 0, 6, 0, 6, 5, 6, 5, 5, 0, 5, 6, 6, 5, 5, 0, 0, 6, 0, 0, 5, 0, 5, 6, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 6, 6, 0, 6, 6, 5, 5, 0, 0, 6, 6, 0, 5, 0, 0, 0, 0, 0, 5, 5, 6, 5, 5, 0, 5, 5, 0, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 0, 0, 5, 5, 5, 5, 6, 0, 0, 6, 6, 6, 6, 5, 5, 5, 6, 5, 0, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 0, 5, 5, 5, 5, 5, 0, 0, 0, 6, 5, 6, 6, 5, 6, 0, 6, 6, 5, 6, 6, 5, 5, 5, 0] \n",
      "Count:  238 \n",
      "\n",
      "WorkLifeBalance\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n",
      "YearsAtCompany\n",
      "Outliers: [25, 22, 22, 27, 21, 22, 37, 25, 20, 40, 20, 24, 20, 24, 33, 20, 19, 22, 33, 24, 19, 21, 20, 36, 20, 20, 22, 24, 21, 21, 25, 21, 29, 20, 27, 20, 31, 32, 20, 20, 21, 22, 22, 34, 24, 26, 31, 20, 31, 26, 19, 21, 21, 32, 21, 19, 20, 22, 20, 21, 26, 20, 22, 24, 33, 29, 25, 21, 19, 19, 20, 19, 33, 19, 19, 20, 20, 20, 20, 20, 32, 20, 21, 33, 36, 26, 30, 22, 23, 23, 21, 21, 22, 22, 19, 22, 19, 22, 20, 20, 20, 22, 20, 20] \n",
      "Count:  104 \n",
      "\n",
      "YearsInCurrentRole\n",
      "Outliers: [15, 16, 18, 15, 18, 17, 16, 15, 16, 15, 16, 16, 15, 16, 17, 15, 15, 15, 17, 17, 16] \n",
      "Count:  21 \n",
      "\n",
      "YearsSinceLastPromotion\n",
      "Outliers: [8, 15, 8, 8, 9, 13, 12, 10, 11, 9, 12, 15, 15, 15, 9, 11, 11, 9, 12, 11, 15, 11, 10, 9, 11, 9, 8, 11, 11, 8, 13, 9, 9, 12, 10, 11, 15, 13, 9, 11, 10, 8, 8, 11, 9, 11, 12, 11, 14, 13, 14, 8, 11, 15, 10, 11, 11, 15, 11, 13, 11, 13, 15, 8, 13, 15, 11, 14, 15, 15, 9, 11, 9, 8, 9, 15, 11, 12, 9, 8, 10, 14, 8, 13, 13, 12, 14, 8, 8, 8, 14, 14, 8, 12, 13, 14, 14, 12, 11, 8, 11, 9, 12, 8, 9, 11, 9] \n",
      "Count:  107 \n",
      "\n",
      "YearsWithCurrManager\n",
      "Outliers: [17, 15, 15, 15, 15, 17, 16, 17, 15, 17, 17, 17, 17, 16] \n",
      "Count:  14 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "for c_name in ibm.columns:\n",
    "    print (c_name)\n",
    "    iqr_outliers(ibm[c_name])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "def remove_outliers(c_name):\n",
    "    outliers = iqr_outliers(ibm[c_name])\n",
    "\n",
    "    while (len(outliers)!=0):\n",
    "        for i in outliers:\n",
    "            ibm.drop(ibm.loc[ibm[c_name]==i].index, inplace = True)\n",
    "        outliers = iqr_outliers(ibm[c_name])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] \n",
      "Count:  114 \n",
      "\n",
      "Outliers: [15427, 13458, 14756, 13245, 13664, 13503, 13549, 13872, 13734, 13591, 16064, 13675, 13496, 13603, 13525, 16015, 13964, 15992, 14336, 13212, 16555, 14118, 13610, 13237, 16184, 15402, 14814, 13770, 16307, 13826, 14275, 13582, 14852, 13194, 13973, 13726, 13320, 13120, 13499, 13758, 13191, 16124, 13577, 14026, 13142, 13695, 13402, 13247, 14732, 16422, 13757, 16032, 16328, 14411, 16437, 15202, 16413, 13269, 13966, 15972, 15379, 12936, 12965, 13116, 13464, 16291, 15787, 13225, 13348, 13341, 13206, 13744, 13570] \n",
      "Count:  73 \n",
      "\n",
      "Outliers: [11994, 12490, 12185, 11849, 11996, 12061, 11878, 12504, 11935, 12808, 11836, 12742, 11904, 12169, 11916, 11957, 12031] \n",
      "Count:  17 \n",
      "\n",
      "Outliers: [11713, 11691] \n",
      "Count:  2 \n",
      "\n",
      "Outliers: [11631] \n",
      "Count:  1 \n",
      "\n",
      "Outliers: [] \n",
      "Count:  0 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "remove_outliers('MonthlyIncome')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Age</th>\n",
       "      <th>Attrition</th>\n",
       "      <th>BusinessTravel</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>Dp_Sales&amp;Development</th>\n",
       "      <th>Dp_Sales</th>\n",
       "      <th>Dp_HumanResources</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Education</th>\n",
       "      <th>EnvironmentSatisfaction</th>\n",
       "      <th>EF_Life Sciences</th>\n",
       "      <th>EF_Medical</th>\n",
       "      <th>EF_Marketing</th>\n",
       "      <th>EF_TechnicalDegree</th>\n",
       "      <th>EF_HumanResources</th>\n",
       "      <th>EF_Other</th>\n",
       "      <th>Gender</th>\n",
       "      <th>HourlyRate</th>\n",
       "      <th>JobInvolvement</th>\n",
       "      <th>JobLevel</th>\n",
       "      <th>JobSatisfaction</th>\n",
       "      <th>JR_HealthcareRepresentive</th>\n",
       "      <th>JR_HumanResource</th>\n",
       "      <th>JR_LaboratoryTechnician</th>\n",
       "      <th>JR_Manager</th>\n",
       "      <th>JR_ManufacturingDirector</th>\n",
       "      <th>JR_ResearchDirector</th>\n",
       "      <th>JR_ResearchScientist</th>\n",
       "      <th>JR_SalesExecutive</th>\n",
       "      <th>JR_SalesRepresentative</th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>NumCompaniesWorked</th>\n",
       "      <th>MS_Married</th>\n",
       "      <th>MS_Single</th>\n",
       "      <th>MS_Divorced</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>PercentSalaryHike</th>\n",
       "      <th>PerformanceRating</th>\n",
       "      <th>RelationshipSatisfaction</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>TrainingTimesLastYear</th>\n",
       "      <th>WorkLifeBalance</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsSinceLastPromotion</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>41</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1102</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>94</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>279</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>61</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>37</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1373</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>92</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>33</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1392</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>56</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>8</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>591</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>9</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>884</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>41</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>613</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>42</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>155</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>87</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1023</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>63</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>17</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>9</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>628</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1263 rows × 48 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Age  Attrition  BusinessTravel  DailyRate  Dp_Sales&Development  \\\n",
       "0      41          1               1       1102                     0   \n",
       "1      49          0               2        279                     1   \n",
       "2      37          1               1       1373                     1   \n",
       "3      33          0               2       1392                     1   \n",
       "4      27          0               1        591                     1   \n",
       "...   ...        ...             ...        ...                   ...   \n",
       "1465   36          0               2        884                     1   \n",
       "1466   39          0               1        613                     1   \n",
       "1467   27          0               1        155                     1   \n",
       "1468   49          0               2       1023                     0   \n",
       "1469   34          0               1        628                     1   \n",
       "\n",
       "      Dp_Sales  Dp_HumanResources  DistanceFromHome  Education  \\\n",
       "0            1                  0                 1          2   \n",
       "1            0                  0                 8          1   \n",
       "2            0                  0                 2          2   \n",
       "3            0                  0                 3          4   \n",
       "4            0                  0                 2          1   \n",
       "...        ...                ...               ...        ...   \n",
       "1465         0                  0                23          2   \n",
       "1466         0                  0                 6          1   \n",
       "1467         0                  0                 4          3   \n",
       "1468         1                  0                 2          3   \n",
       "1469         0                  0                 8          3   \n",
       "\n",
       "      EnvironmentSatisfaction  EF_Life Sciences  EF_Medical  EF_Marketing  \\\n",
       "0                           2                 1           0             0   \n",
       "1                           3                 1           0             0   \n",
       "2                           4                 0           0             0   \n",
       "3                           4                 1           0             0   \n",
       "4                           1                 0           1             0   \n",
       "...                       ...               ...         ...           ...   \n",
       "1465                        3                 0           1             0   \n",
       "1466                        4                 0           1             0   \n",
       "1467                        2                 1           0             0   \n",
       "1468                        4                 0           1             0   \n",
       "1469                        2                 0           1             0   \n",
       "\n",
       "      EF_TechnicalDegree  EF_HumanResources  EF_Other  Gender  HourlyRate  \\\n",
       "0                      0                  0         0       1          94   \n",
       "1                      0                  0         0       0          61   \n",
       "2                      0                  0         1       0          92   \n",
       "3                      0                  0         0       1          56   \n",
       "4                      0                  0         0       0          40   \n",
       "...                  ...                ...       ...     ...         ...   \n",
       "1465                   0                  0         0       0          41   \n",
       "1466                   0                  0         0       0          42   \n",
       "1467                   0                  0         0       0          87   \n",
       "1468                   0                  0         0       0          63   \n",
       "1469                   0                  0         0       0          82   \n",
       "\n",
       "      JobInvolvement  JobLevel  JobSatisfaction  JR_HealthcareRepresentive  \\\n",
       "0                  3         2                4                          0   \n",
       "1                  2         2                2                          0   \n",
       "2                  2         1                3                          0   \n",
       "3                  3         1                3                          0   \n",
       "4                  3         1                2                          0   \n",
       "...              ...       ...              ...                        ...   \n",
       "1465               4         2                4                          0   \n",
       "1466               2         3                1                          1   \n",
       "1467               4         2                2                          0   \n",
       "1468               2         2                2                          0   \n",
       "1469               4         2                3                          0   \n",
       "\n",
       "      JR_HumanResource  JR_LaboratoryTechnician  JR_Manager  \\\n",
       "0                    0                        0           0   \n",
       "1                    0                        0           0   \n",
       "2                    0                        1           0   \n",
       "3                    0                        0           0   \n",
       "4                    0                        1           0   \n",
       "...                ...                      ...         ...   \n",
       "1465                 0                        1           0   \n",
       "1466                 0                        0           0   \n",
       "1467                 0                        0           0   \n",
       "1468                 0                        0           0   \n",
       "1469                 0                        1           0   \n",
       "\n",
       "      JR_ManufacturingDirector  JR_ResearchDirector  JR_ResearchScientist  \\\n",
       "0                            0                    0                     0   \n",
       "1                            0                    0                     1   \n",
       "2                            0                    0                     0   \n",
       "3                            0                    0                     1   \n",
       "4                            0                    0                     0   \n",
       "...                        ...                  ...                   ...   \n",
       "1465                         0                    0                     0   \n",
       "1466                         0                    0                     0   \n",
       "1467                         1                    0                     0   \n",
       "1468                         0                    0                     0   \n",
       "1469                         0                    0                     0   \n",
       "\n",
       "      JR_SalesExecutive  JR_SalesRepresentative  MonthlyIncome  MonthlyRate  \\\n",
       "0                     1                       0           5993        19479   \n",
       "1                     0                       0           5130        24907   \n",
       "2                     0                       0           2090         2396   \n",
       "3                     0                       0           2909        23159   \n",
       "4                     0                       0           3468        16632   \n",
       "...                 ...                     ...            ...          ...   \n",
       "1465                  0                       0           2571        12290   \n",
       "1466                  0                       0           9991        21457   \n",
       "1467                  0                       0           6142         5174   \n",
       "1468                  1                       0           5390        13243   \n",
       "1469                  0                       0           4404        10228   \n",
       "\n",
       "      NumCompaniesWorked  MS_Married  MS_Single  MS_Divorced  OverTime  \\\n",
       "0                      8           0          1            0         1   \n",
       "1                      1           1          0            0         0   \n",
       "2                      6           0          1            0         1   \n",
       "3                      1           1          0            0         1   \n",
       "4                      9           1          0            0         0   \n",
       "...                  ...         ...        ...          ...       ...   \n",
       "1465                   4           1          0            0         0   \n",
       "1466                   4           1          0            0         0   \n",
       "1467                   1           1          0            0         1   \n",
       "1468                   2           1          0            0         0   \n",
       "1469                   2           1          0            0         0   \n",
       "\n",
       "      PercentSalaryHike  PerformanceRating  RelationshipSatisfaction  \\\n",
       "0                    11                  3                         1   \n",
       "1                    23                  4                         4   \n",
       "2                    15                  3                         2   \n",
       "3                    11                  3                         3   \n",
       "4                    12                  3                         4   \n",
       "...                 ...                ...                       ...   \n",
       "1465                 17                  3                         3   \n",
       "1466                 15                  3                         1   \n",
       "1467                 20                  4                         2   \n",
       "1468                 14                  3                         4   \n",
       "1469                 12                  3                         1   \n",
       "\n",
       "      StockOptionLevel  TotalWorkingYears  TrainingTimesLastYear  \\\n",
       "0                    0                  8                      0   \n",
       "1                    1                 10                      3   \n",
       "2                    0                  7                      3   \n",
       "3                    0                  8                      3   \n",
       "4                    1                  6                      3   \n",
       "...                ...                ...                    ...   \n",
       "1465                 1                 17                      3   \n",
       "1466                 1                  9                      5   \n",
       "1467                 1                  6                      0   \n",
       "1468                 0                 17                      3   \n",
       "1469                 0                  6                      3   \n",
       "\n",
       "      WorkLifeBalance  YearsAtCompany  YearsInCurrentRole  \\\n",
       "0                   1               6                   4   \n",
       "1                   3              10                   7   \n",
       "2                   3               0                   0   \n",
       "3                   3               8                   7   \n",
       "4                   3               2                   2   \n",
       "...               ...             ...                 ...   \n",
       "1465                3               5                   2   \n",
       "1466                3               7                   7   \n",
       "1467                3               6                   2   \n",
       "1468                2               9                   6   \n",
       "1469                4               4                   3   \n",
       "\n",
       "      YearsSinceLastPromotion  YearsWithCurrManager  \n",
       "0                           0                     5  \n",
       "1                           1                     7  \n",
       "2                           0                     0  \n",
       "3                           3                     0  \n",
       "4                           2                     2  \n",
       "...                       ...                   ...  \n",
       "1465                        0                     3  \n",
       "1466                        1                     7  \n",
       "1467                        0                     3  \n",
       "1468                        0                     8  \n",
       "1469                        1                     2  \n",
       "\n",
       "[1263 rows x 48 columns]"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Classification"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Support Vector Machine (prepared by Teh Liang Sean) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import important library to do SVM\n",
    "from sklearn import svm\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "#The target for SVM will be the attrition of IBM employees to know whether the employees will continue stay or leave IBM\n",
    "x_svm_find = ibm.drop(columns = 'Attrition')\n",
    "y_svm = ibm['Attrition']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "                     Features         Score\n",
      "29              MonthlyIncome  26471.159476\n",
      "30                MonthlyRate   1308.443569\n",
      "2                   DailyRate   1111.594737\n",
      "44         YearsInCurrentRole    109.263859\n",
      "43             YearsAtCompany    103.805057\n",
      "46       YearsWithCurrManager    100.636711\n",
      "40          TotalWorkingYears     95.843571\n",
      "35                   OverTime     60.367656\n",
      "6            DistanceFromHome     57.197704\n",
      "0                         Age     46.705340\n",
      "28     JR_SalesRepresentative     27.299127\n",
      "33                  MS_Single     26.251695\n",
      "39           StockOptionLevel     24.376114\n",
      "20  JR_HealthcareRepresentive     10.935616\n",
      "24   JR_ManufacturingDirector      9.987076\n"
     ]
    }
   ],
   "source": [
    "# Try use SelectKBest and chi-squared (chi²) statistical test for non-negative feature to find top 15 best features\n",
    "#Import library\n",
    "from sklearn.feature_selection import SelectKBest\n",
    "from sklearn.feature_selection import chi2\n",
    "#Use SelectKBest class to find top 15 best features\n",
    "best_15_features = SelectKBest(score_func=chi2, k=15)\n",
    "fit = best_15_features.fit(x_svm_find,y_svm)\n",
    "dfscores = pd.DataFrame(fit.scores_)\n",
    "dfcolumns = pd.DataFrame(x_svm_find.columns)\n",
    "#Try to concat two dataframes for a better visualization \n",
    "top_15_feature_scores = pd.concat([dfcolumns,dfscores],axis=1)\n",
    "#Name the dataframe columns\n",
    "top_15_feature_scores.columns = ['Features','Score']  \n",
    "#Show 15 best features\n",
    "print(top_15_feature_scores.nlargest(15,'Score'))  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "ibm_svm_features_df = pd.DataFrame()\n",
    "# Set up data to do SVM using top 15 best features identified\n",
    "ibm_svm_features_df.insert(0,'MonthlyIncome',ibm['MonthlyIncome'])\n",
    "ibm_svm_features_df.insert(1,'MonthlyRate',ibm['MonthlyRate'])\n",
    "ibm_svm_features_df.insert(2,'DailyRate',ibm['DailyRate'])\n",
    "ibm_svm_features_df.insert(3,'YearsInCurrentRole',ibm['YearsInCurrentRole'])\n",
    "ibm_svm_features_df.insert(4,'YearsAtCompany',ibm['YearsAtCompany'])\n",
    "ibm_svm_features_df.insert(5,'YearsWithCurrManager',ibm['YearsWithCurrManager'])\n",
    "ibm_svm_features_df.insert(6,'TotalWorkingYears',ibm['TotalWorkingYears'])\n",
    "ibm_svm_features_df.insert(7,'OverTime',ibm['OverTime'])\n",
    "ibm_svm_features_df.insert(8,'DistanceFromHome',ibm['DistanceFromHome'])\n",
    "ibm_svm_features_df.insert(9,'Age',ibm['Age'])\n",
    "ibm_svm_features_df.insert(10,'JR_SalesRepresentative',ibm['JR_SalesRepresentative'])\n",
    "ibm_svm_features_df.insert(11,'MS_Single',ibm['MS_Single'])\n",
    "ibm_svm_features_df.insert(12,'StockOptionLevel',ibm['StockOptionLevel'])\n",
    "ibm_svm_features_df.insert(13,'JR_HealthcareRepresentive ',ibm['JR_HealthcareRepresentive'])\n",
    "ibm_svm_features_df.insert(14,'JR_ManufacturingDirector',ibm['JR_ManufacturingDirector'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>MonthlyIncome</th>\n",
       "      <th>MonthlyRate</th>\n",
       "      <th>DailyRate</th>\n",
       "      <th>YearsInCurrentRole</th>\n",
       "      <th>YearsAtCompany</th>\n",
       "      <th>YearsWithCurrManager</th>\n",
       "      <th>TotalWorkingYears</th>\n",
       "      <th>OverTime</th>\n",
       "      <th>DistanceFromHome</th>\n",
       "      <th>Age</th>\n",
       "      <th>JR_SalesRepresentative</th>\n",
       "      <th>MS_Single</th>\n",
       "      <th>StockOptionLevel</th>\n",
       "      <th>JR_HealthcareRepresentive</th>\n",
       "      <th>JR_ManufacturingDirector</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5993</td>\n",
       "      <td>19479</td>\n",
       "      <td>1102</td>\n",
       "      <td>4</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>41</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>5130</td>\n",
       "      <td>24907</td>\n",
       "      <td>279</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2090</td>\n",
       "      <td>2396</td>\n",
       "      <td>1373</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>37</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2909</td>\n",
       "      <td>23159</td>\n",
       "      <td>1392</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>33</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3468</td>\n",
       "      <td>16632</td>\n",
       "      <td>591</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1465</th>\n",
       "      <td>2571</td>\n",
       "      <td>12290</td>\n",
       "      <td>884</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>23</td>\n",
       "      <td>36</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>9991</td>\n",
       "      <td>21457</td>\n",
       "      <td>613</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>39</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1467</th>\n",
       "      <td>6142</td>\n",
       "      <td>5174</td>\n",
       "      <td>155</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>27</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1468</th>\n",
       "      <td>5390</td>\n",
       "      <td>13243</td>\n",
       "      <td>1023</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "      <td>17</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>49</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1469</th>\n",
       "      <td>4404</td>\n",
       "      <td>10228</td>\n",
       "      <td>628</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "      <td>34</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1263 rows × 15 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      MonthlyIncome  MonthlyRate  DailyRate  YearsInCurrentRole  \\\n",
       "0              5993        19479       1102                   4   \n",
       "1              5130        24907        279                   7   \n",
       "2              2090         2396       1373                   0   \n",
       "3              2909        23159       1392                   7   \n",
       "4              3468        16632        591                   2   \n",
       "...             ...          ...        ...                 ...   \n",
       "1465           2571        12290        884                   2   \n",
       "1466           9991        21457        613                   7   \n",
       "1467           6142         5174        155                   2   \n",
       "1468           5390        13243       1023                   6   \n",
       "1469           4404        10228        628                   3   \n",
       "\n",
       "      YearsAtCompany  YearsWithCurrManager  TotalWorkingYears  OverTime  \\\n",
       "0                  6                     5                  8         1   \n",
       "1                 10                     7                 10         0   \n",
       "2                  0                     0                  7         1   \n",
       "3                  8                     0                  8         1   \n",
       "4                  2                     2                  6         0   \n",
       "...              ...                   ...                ...       ...   \n",
       "1465               5                     3                 17         0   \n",
       "1466               7                     7                  9         0   \n",
       "1467               6                     3                  6         1   \n",
       "1468               9                     8                 17         0   \n",
       "1469               4                     2                  6         0   \n",
       "\n",
       "      DistanceFromHome  Age  JR_SalesRepresentative  MS_Single  \\\n",
       "0                    1   41                       0          1   \n",
       "1                    8   49                       0          0   \n",
       "2                    2   37                       0          1   \n",
       "3                    3   33                       0          0   \n",
       "4                    2   27                       0          0   \n",
       "...                ...  ...                     ...        ...   \n",
       "1465                23   36                       0          0   \n",
       "1466                 6   39                       0          0   \n",
       "1467                 4   27                       0          0   \n",
       "1468                 2   49                       0          0   \n",
       "1469                 8   34                       0          0   \n",
       "\n",
       "      StockOptionLevel  JR_HealthcareRepresentive   JR_ManufacturingDirector  \n",
       "0                    0                           0                         0  \n",
       "1                    1                           0                         0  \n",
       "2                    0                           0                         0  \n",
       "3                    0                           0                         0  \n",
       "4                    1                           0                         0  \n",
       "...                ...                         ...                       ...  \n",
       "1465                 1                           0                         0  \n",
       "1466                 1                           1                         0  \n",
       "1467                 1                           0                         1  \n",
       "1468                 0                           0                         0  \n",
       "1469                 0                           0                         0  \n",
       "\n",
       "[1263 rows x 15 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ibm_svm_features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "#assignment ibm_svm_features to x\n",
    "x_svm = ibm_svm_features_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Try to scale all the numeric data of each features to make svm model train more effective\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "s_scaler = StandardScaler()\n",
    "x_scaled_svm = s_scaler.fit_transform(x_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Try to use tomek link to solve undersampling problem as attriction too few 'yes' value for imbalanced classification \n",
    "from imblearn.under_sampling import TomekLinks\n",
    "\n",
    "tl_svm = TomekLinks(sampling_strategy='not minority')\n",
    "x_tl_svm, y_tl_svm= tl_svm.fit_resample(x_svm, y_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Train the modals with 80% and test 20% of the data\n",
    "x_train_svm, x_test_svm, y_train_svm, y_test_svm = train_test_split(x_tl_svm,y_tl_svm, test_size=0.2,random_state=40, stratify=y_tl_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Model 1 is using the manual tuning for some hyperparameters of SVM\n",
    "model_1_svm=svm.SVC(C=2,kernel='sigmoid',gamma='scale',coef0=0.6,random_state=40,probability=True)\n",
    "model_1_svm.fit(x_train_svm,y_train_svm)\n",
    "y_predict_1_svm=model_1_svm.predict(x_test_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 4 folds for each of 5400 candidates, totalling 21600 fits\n"
     ]
    }
   ],
   "source": [
    "# Modal 2 is using GridSearchCV to find the best hyperparameters for SVM using cross validation\n",
    "# Only some hyperparameters are tuned \n",
    "\n",
    "# import GridSearchCV library\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "\n",
    "#Try to tune the hyperparameter with\n",
    "#kernel type: linear/rbf/sigmoid\n",
    "#C which is the regularization parameter: range 0-1 increase by 0.1\n",
    "#coef0 that is the independent term for kernel method (only for sigmoid): range 0.0-0.5 increase by 0.1\n",
    "#degree for the polynomial ('poly') kernel method: range 0-5 increase by 1\n",
    "#gamma that are kernel coefficient for 'rbf' and 'poly': scale/auto\n",
    "\n",
    "param_grid={'kernel':('linear','rbf','sigmoid'),\n",
    "        'C':[i for i in np.arange(1.0,3.0,0.1)],\n",
    "        'coef0':[y for y in np.arange(0.0,1.5,0.1)],\n",
    "        'degree':[z for z in np.arange(3,6,1)],\n",
    "        'gamma':('auto','scale'),}\n",
    "# set random state to 40\n",
    "find_best_para_model=svm.SVC(random_state=40)\n",
    "Grid_search_svm=GridSearchCV(find_best_para_model,param_grid, n_jobs=-1,verbose=2,cv=4)\n",
    "# this may take some time to run\n",
    "Grid_search_svm.fit(x_train_svm,y_train_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'C': 2.8000000000000016,\n",
       " 'coef0': 0.0,\n",
       " 'degree': 3,\n",
       " 'gamma': 'scale',\n",
       " 'kernel': 'rbf'}"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Show the best hyperparameter found by grid search\n",
    "Grid_search_svm.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use hyperparameter found grid search to build modal \n",
    "model_2_svm=svm.SVC(C=2.8000000000000016,kernel='rbf',degree=3,gamma='scale',coef0=0.0,probability=True,random_state=40)\n",
    "model_2_svm.fit(x_train_svm,y_train_svm)\n",
    "y_predict_2_svm=model_2_svm.predict(x_test_svm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Accuracy of prediction classification result for 2 model\n",
      "Hyperparameters that try to tune manually (model 1):  0.7416666666666667\n",
      "Best hyperparameters found using GridSearchCV (model 2):  0.8166666666666667\n"
     ]
    }
   ],
   "source": [
    "#Evaluate accurracy of classification result\n",
    "print('Accuracy of prediction classification result for 2 model')\n",
    "print('Hyperparameters that try to tune manually (model 1): ',metrics.accuracy_score(y_test_svm, y_predict_1_svm))\n",
    "print('Best hyperparameters found using GridSearchCV (model 2): ',metrics.accuracy_score(y_test_svm, y_predict_2_svm)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[183  12]\n",
      " [ 32  13]]\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           0       0.85      0.94      0.89       195\n",
      "           1       0.52      0.29      0.37        45\n",
      "\n",
      "    accuracy                           0.82       240\n",
      "   macro avg       0.69      0.61      0.63       240\n",
      "weighted avg       0.79      0.82      0.79       240\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\USER\\anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:70: FutureWarning: Pass labels=[0, 1] as keyword args. From version 1.0 (renaming of 0.25) passing these as positional arguments will result in an error\n",
      "  warnings.warn(f\"Pass {args_msg} as keyword args. From version \"\n"
     ]
    }
   ],
   "source": [
    "#Evaluating classification result by confusion matrix\n",
    "from sklearn.metrics import confusion_matrix\n",
    "print (confusion_matrix(y_test_svm, y_predict_2_svm,[0,1]))\n",
    "\n",
    "#Evaluating classification result by Precision, Recall and F1-Measure\n",
    "from sklearn.metrics import classification_report\n",
    "print (classification_report(y_test_svm, y_predict_2_svm))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAz+UlEQVR4nO3de5xN9frA8c/jfhvkmtyT2wwhg3QKxYlSEerERIkzlK5O/VIicolURIgiOXGcSFIJUS5RSpLLSMepk5RObrlPGM/vj7XG2e3msjFrr71nP+/Xa71mr71uz9ozs579/X7X+n5FVTHGGBO78vgdgDHGGH9ZIjDGmBhnicAYY2KcJQJjjIlxlgiMMSbGWSIwxpgYZ4nAGGNinCUCc4aIrBCRAyJSMIP3ewe910pEdgXMi4jcLyJbROSoiOwSkbkiUj/EYxcUkekickhEfhaR/lms+7iIHAmYjovIaREp4y5/RkR+cPf1vYgMDNr+RjfOIyKyVkTig85juIj8KCIH3XNPCIhxmrvPwyLypYhcF7Tv3iKyw933YhG5KGjfo0Vknzs9IyISsPw/7rmkn9fSoG0HishO97zmiEjxgOWlROSfIrLXnWalLxeRWiLytojsEZH9IrJERGoHffZjReQn9/c/SUTyB+37Lff3+r2IdAtYVk1ENOj3MShgeUkReU1EfnGnIUGfV0MRWe1+1rtEZHBmv3fjIVW1ySaAakAasB+4JWjZCqB30HutgF0B8+OBfwPXAAWBIkASMCDE4z8NrAYuAOoCPwPtQtx2CPBhwHxtoKj7uiKwFejkztcEDgFXAvmAx4AdQD53+a3AT8DFQF43rg3usqLusarhfIm6ATgMVHOXtwR+ARKAAsBkYGVAXH2A7UAlN64UoG/A8v8AbTI5xzuAr4HKQDHgbeC1gOWTgKVAcaAEsAx43l3WFOgFlALyA8OArwO2fdL97EsBZYFPgaEBy/8B/NM97pXAQSAh4O9G0z+/DOJ+FZjr/j1Uc/9GegYsTwFGuJ91DWA3cJPf/w+xNvkegE2RMQGDgTXA88C7QctWkEUicC+uaUDT8zj+j8C1AfPDgDkhbCfuxeWOTJZXBDYD/+fO3wu8F7A8D3AcaO3OPwq8EbA8AUjN4vibgM7u62eBiQHLLnIvkjXc+bVAcsDyXsCnAfNZJYJ5wCMB81cAqUARd/594J6A5f2AJZnsq5QbV2l3fj0ByR/oBvzgvi4KnABqBSz/OzDKfZ1dItgLNAmYfxxYHTB/DIgPmJ8LPOb3/0OsTVY1ZNL1AGa5U1sRKX8W27bGSQqfZbaCiHQTkU2ZLLsA56L5VcDbX+FchLNzFVAeeDNonwNE5AiwC+diNjt9kTsRNF/PnZ8DXOJWp+TH+Sa+OJO4ywO1cEocme2bgH0nkP05znKrcJaKSIMM4gycL4iThAEmAjeIyAXu59kZJzlkpAXws6ruy2LflUSkhHt+aar6TTZxf+9W7byaXkUXtL/A1/UC5scBPUQkv1td1RynNGPCyBKBQUSuBKrifBP+Aucbdrest/qd0jhF+kyp6mxVvTSTxcXcnwcD3jsIxIVw7DuAeap6JOh4o9ztL8P5Bpu+7w+AluK0cRTA+YZaAKfqAvc8VuNU4RwHbgEeCj6omyRm4VTPfO2+vQi4VUQuFZHCOKUsDdh3sQzOsVhAO0ESzjfsqsBHwBIRKekuex/o7dbJl8ApuRCw7w3ueexzpzSc6qLguCvhJI3ANpj3gQdEpKyIXAjcH7Dv4JjT407/3ewFmrgxN3bfnxWw7mJggIjEicglwF0BMQO8C3TB+ay/Bqap6ufBcRtvWSIw4FxMl6rqXnd+tvteulM4dcuB8gMn3df7gArncfz0i3jxgPeK49S/Z8q92N4CvJbRcnV8iXORGeq+9zXOub2Ic9Evg1NPnd7w/STOha0yUMjd7kMROXPxEpE8OMnlBE5VU/rxlrvbvwl8j1PVczhg30cyOMcj6taJqOoaVT2uqsdU9WngV5wSD8B0nLr6FTglkI/c99P3PRf4BudCXBwnmb8e9HmVxWlHmKSq/whYNAL4EtiIU321AOd3+0sGMafHfdiN+YiqrlfVU6r6X/fzuDagIft+nM//XzjtGv9Ij1lESuEkiqdwPuvKOKXRezBhZYkgxrkX01txviX/LCI/43wDbhBQNbET55tqoOo4FzuA5ThVCYnnEoOqHsC5KAdWhTTgf1UumemE07i9Ipv18uE0RKYfb56q1lPV0jgX7qpA+rfQBsA/VXWXe3GbgdOAHQ/O3TvANJzqqM6qepIAqjpRVWuqajmchJAP2OIu3nqW56i41SqqelpVn1TVaqpayd3uR3dK39cUVT3qlo5eAq5P35FbXbQUWKiqI4JiPq6q96pqRVW9GCexf6GqaTjJJZ+I1AzYJKu407szTo97v6omqeqFqpqAc81Jr0K8GKfaaab7We/CqZq7/g97Nd7yu5HCJn8noCvOxbQKcGHAtAp4zl2nLc63w6Y4/+C1gG38/o6XCTjf+lrhVFEUAm4j9LuGRgErcS66dXASQ5Z3DeFc2J4Kei8Pzt05F7ixNnX3dX/AOo1x7lIpi3M3zOyAZU8CH+Nc6PMA3YGjQEl3+Us4d9UUyyCeQjj13+J+niuAkQHL+7qfW0WcNpGt6Z+hu/6fAj67R4A9/K9BtxROMhOcpLSF3zc8f+T+Dgq70yRgjbusOM7F98VMPsf0eAS4HPiB3zfcz8H5Jl/UjTHwrqFmOHdp5cGpIvwn8FHAtjXc9/MC1+FUJSUExPUrTjVkHpy/u0+AEX7/X8Ta5HsANvn8B+AUzZ/L4P1bcW7hTL+t8i73wnUI53bLAUCegPUFeMBd5xjON9V/BvzTJwFbs4ijIE71xyHgv0D/oOVHgKsC5iviVFldErReHvec9rvbfIPTDiAB63yMU7WxH5iCe6upu6wQTh36bjeWDbgJCafkoDh36xwJmJLc5SVx7iI66n52TwN5gz6jZ9zj7ndfi7ssIWDbfTilrMSAbWvhtFscwymJBX8+1YF33G33u59BTXfZHW7cR4PiruIub4FTjXXMPUZS0L5L4VQXHcUpHXYLWNYV+M5dthuYCVwY9Hf0k7vvjUDboH1fg1MaO+h+Zi/j3gllU/im9D9CY4wxMcraCIwxJsZZIjDGmBhnicAYY2KcJQJjjIlx+fwO4GyVKVNGq1Wr5ncYxhgTVb744ou9qlo2o2VRlwiqVavG+vXr/Q7DGGOiioh8n9kyqxoyxpgYZ4nAGGNinCUCY4yJcZYIjDEmxlkiMMaYGOdZIhBnIPJfRGRLJstFRMaLM9D3JhG5zKtYjDHGZM7LEsEMoF0Wy6/DGWavJpCMM9C3McaYMPPsOQJVXSUi1bJYpQMwU53uTz8VkZIiUkFVsxzy0Bhjwm32up28vfHH7Ff0iKqSmppK4xrlefLGUIbyPjt+thFUxBkAI90u970/EJFkEVkvIuv37NkTluCMMSbd2xt/JGX3IV+OfeTIETZs2MDGjRs5efJk9hucAz+fLJYM3stwcARVnQpMBUhMTLQBFIwxYRdfoTj/7NM8bMdLTU1l6NChjBkzhjJlyjBp0iQ6dWroybH8TAS7cAarTlcJZyQjY4yJeR07dmTJkiX07NmT5557jgsuuMCzY/lZNbQQ6OHePXQ5cNDaB4wxsezw4cOkpqYCMGDAAJYuXcr06dM9TQLgYYlARP6BM5B5GRHZhTMoeH4AVX0JWARcjzP+7TGgp1exGGMik9+NsKFK2X2I+ArFPT3GkiVLSE5O5vbbb2fEiBG0atXK0+MF8vKuoa7ZLFegn1fHN8ZEvvRGWK8vsucrvkJxOjTM8F6W87Z//3769+/Pa6+9Rp06dWjfvr0nx8lK1HVDbYzJXcLdCBtJli9fTlJSEvv27WPgwIE88cQTFCpUKOxxWCIwxhiflCtXjurVq7N48WIaNmzoWxyWCIwxnsusLSAaqoVykqry2muvsWHDBsaPH0/9+vVZu3YtIhndTR8+1umcMcZzmT2Q5WXde6T57rvvaNu2LT179mTjxo0cP34cwPckAFYiMMaESay2BaSlpTFx4kQee+wx8uTJw6RJk+jTpw958kTO93BLBMYY46G9e/cyePBgWrZsyUsvvUSVKlX8DukPLBEYY87Kudz7H2ttASdPnmTWrFn06NGD8uXLs2HDBqpXrx4R1UAZiZyyiTEmKpxLB2yx1BbwxRdfkJiYSM+ePfnggw8AuPjiiyM2CYCVCIwx5yBW6/uzcvz4cYYOHcqzzz5LuXLleOutt2jbtq3fYYXEEoExxuSAjh07snTpUnr37s2YMWMoWbKk3yGFzKqGjDHmHB06dOhMJ3GPP/44y5Yt4+WXX46qJABWIjDGcHYNwLHW8JuZRYsW0bdvX26//XZGjhxJy5Yt/Q7pnFmJwBhzVg3AsdTwm5G9e/fSvXt32rdvT1xcHDfddJPfIZ03KxEYYwBrAA7FBx98QFJSEgcOHGDw4ME8/vjjFCxY0O+wzpslAmOMCVGFChWoVasWkydPpn79+n6Hk2MsERgTA7JrA7B6/4ypKtOmTePLL79k4sSJ1KtXj9WrV0f0MwHnwtoIjIkB2bUBxHq9f0a+/fZb2rRpw1//+ldSUlIiqpO4nGYlAmNihLUBhCYtLY3x48czcOBA8uXLx5QpU+jdu3dEdRKX0ywRGGNMgL179zJ06FBat27N5MmTqVSpkt8heS73pjhjjAnRiRMnmD59OqdPn6Z8+fJs3LiRhQsXxkQSACsRGBP1QnkYzBqDM/f5559z1113sWXLFipVqsS1115LtWrV/A4rrKxEYEyUC+VhMGsM/qNjx47x8MMPc/nll3PgwAEWLlzItdde63dYvrASgTG5gDUEn70OHTqwbNkykpOTeeaZZyhRooTfIfnGSgTGmJhx8ODBM53EDRo0iA8//JApU6bEdBIAKxEYEzHOZeQvsPr/UL377rv07duX7t278/TTT9OiRQu/Q4oYViIwJkKcy8hfYPX/2dmzZw/dunXjxhtvpFSpUnTq1MnvkCKOlQiMiSBW15+zli5dSlJSEgcPHmTo0KEMGDCAAgUK+B1WxLFEYIzJtSpWrEjdunWZPHkyCQkJfocTsSwRGOOjwHYBq+s/f6dPn+aVV17hyy+/PHPxX7Vqld9hRTxrIzDGR4HtAlbXf3527NhB69at6dOnD9u3bz/TSZzJnpUIjPGZtQucn7S0NMaNG8egQYPInz8/L7/8Mr169cqVvYR6xdMSgYi0E5HtIrJDRAZksLyEiLwjIl+JyFYR6ellPMaY3Gfv3r0MHz6cP//5z6SkpNC7d29LAmfJs0QgInmBicB1QDzQVUTig1brB6SoagOgFfCciFiTvjEmS7/99hsvv/zy7zqJW7BgARUrWtXaufCyRNAU2KGq36rqCWAO0CFoHQXixEnfxYD9wCkPYzImIsxet5O/TPnknJ4biHXr1q2jcePGJCcns2zZMgCqVq1qpYDz4GUiqAj8EDC/y30v0ItAXeAnYDPwgKqeDt6RiCSLyHoRWb9nzx6v4jUmbNIbia2BOHRHjx6lf//+NG/enIMHD/Lee+/FbCdxOc3LxuKM0rMGzbcFNgLXADWAD0Rktar+7muSqk4FpgIkJiYG78OYqGSNxGenY8eOLFu2jLvvvptRo0ZRvLjdaptTvCwR7AIqB8xXwvnmH6gnMF8dO4DvgDoexmSMiSK//vrrmdtABw8ezMqVK5k0aZIlgRzmZYngc6CmiFQHfgRuA7oFrbMTaA2sFpHyQG3gWw9jMsYXwR3K2cNj2Vu4cCF333033bt3Z9SoUVx11VV+h5RreVYiUNVTwL3AEmAb8IaqbhWRviLS111tGHCFiGwGlgOPquper2Iyxi/BHcpZ20DmfvnlF2677TY6dOhAmTJl6NKli98h5XqePlCmqouARUHvvRTw+ifAWntMTLA2gewtXryYpKQkjhw5wrBhw3j00UfJnz+/32HlevZksTEmYlSuXJn69eszadIk4uODHzsyXrG+howxvjl9+jSTJ0+mT58+ACQkJLBixQpLAmFmJQJjOPfRwUJljcN/9M0339C7d29Wr17Nn//8Z1JTUylUqJDfYcUkKxEYw7mPDhYqaxz+n1OnTjF69GguvfRSNm/ezKuvvsqSJUssCfjISgTGuKwxNzz27dvH6NGjuf7665k4cSIVKlTwO6SYZyUCY4znfvvtN6ZMmXKmk7ivvvqK+fPnWxKIEFYiMLleKPX/VofvnU8++YRevXqxbds2atSoQZs2bahcuXL2G5qwsRKByfVCqf+3Ovycd+TIER588EH+9Kc/cfToURYvXkybNm38DstkwEoEJiZY/X/4dezYkeXLl3PvvfcycuRI4uLi/A7JZMJKBMaYHHPgwIEzncQNGTKE1atXM2HCBEsCES7kEoGIFFXVo14GY0xGzvcef6v/D4/58+fTr18/evTowejRo7nyyiv9DsmEKNsSgYhcISIpOB3HISINRGSS55EZ4zrfe/yt/t9bP//8M126dKFz585ceOGF3HbbbX6HZM5SKCWCsTgDyCwEUNWvRKSFp1EZE8Tq+CPT+++/T1JSEseOHWPkyJE8/PDD1klcFAqpakhVfwgaDzTNm3CMMdGkatWqNGrUiIkTJ1Knjo0pFa1CaSz+QUSuAFRECojIw7jVRMaY2HL69GlefPFF/vrXvwIQHx/P8uXLLQlEuVBKBH2BF3AGnt8FLAXu8TIok3vkRGdu1tgbGbZv306vXr1Ys2YNbdu2tU7icpFQSgS1VTVJVcurajlVvR2o63VgJnfIic7crLHXXydPnuTpp5+mQYMGpKSkMGPGDN5//31LArlIKCWCCcBlIbxnTIasoTe6HThwgDFjxnDjjTcyYcIELrzwQr9DMjks00QgIs2BK4CyItI/YFFxIK/XgRlj/JOamsr06dPp27cv5cqVY9OmTVSqVMnvsIxHsioRFACKuesEPhZ4CLDRpGPY2dT7W/1+9Pn444/p1asX33zzDbVq1aJNmzaWBHK5TBOBqq4EVorIDFX9PowxmQiXXu8fygXe6vejx+HDh3nssceYOHEi1apVY+nSpdZJXIwIpY3gmIiMARKAM61DqnqNZ1GZiGf1/rlPx44d+eijj3jggQcYPnw4xYoV8zskEyahJIJZwD+BG3BuJb0D2ONlUMaY8Ni/fz+FChWiSJEiDBs2DBGheXNL8LEmlNtHS6vqNOCkqq5U1buAyz2OyxjjsXnz5lG3bl2GDBkCwBVXXGFJIEaFkghOuj93i0h7EWkEWMtRjJq9bifrvtvvdxjmPOzevZtOnTpxyy23ULlyZZKSkvwOyfgslKqh4SJSAvgbzvMDxYEHvQzKRK70u4WsATg6vffee9x+++2kpqYyevRo+vfvT758Nj5VrMv2L0BV33VfHgSuBhCRP3kZlIlszaqXoluzKn6HYc7BxRdfTJMmTXjxxRepVauW3+GYCJFp1ZCI5BWRriLysIjUc9+7QUTWAi+GLUJjzDlLS0vjhRdeoFevXgDUrVuXpUuXWhIwv5NViWAaUBn4DBgvIt8DzYEBqrogDLGZCJL+EJk9IBY9UlJS6N27N5988gnXX3+9dRJnMpVVIkgELlXV0yJSCNgLXKKqP4cnNBNJApOAtQ9EthMnTvDMM88wbNgw4uLieP311+nWrRtBY4oYc0ZWieCEqp4GUNVUEfnmbJOAiLTD6cI6L/CKqo7KYJ1WwDggP7BXVVuezTFM+NhDZNHh119/ZezYsdx8882MHz+ecuXK+R2SiXBZJYI6IrLJfS1ADXdeAFXVS7PasYjkBSYCf8YZx+BzEVmoqikB65QEJgHtVHWniNhfrDHn4Pjx40ybNo177rmHcuXKsXnzZi666CK/wzJRIqtEcL5jDjQFdqjqtwAiMgfoAKQErNMNmK+qOwFU9ZfzPKY5C9Z5XO6watUqevfuzb/+9S/q1q1L69atLQmYs5LpXUOq+n1WUwj7rgj8EDC/y30vUC3gAhFZISJfiEiPjHYkIskisl5E1u/ZY71b5JSzGTTG2gYiz6FDh7jnnnto2bIlp06dYtmyZbRu3drvsEwU8vJJkoxapjSD4zcGWgOFgU9E5FNV/eZ3G6lOBaYCJCYmBu/DnAer949eHTt2ZMWKFTz00EMMGzaMokWL+h2SiVJeJoJdOLefpqsE/JTBOntV9ShwVERWAQ2AbzDG/MHevXspUqQIRYoUYcSIEYgIl19uXX+Z8xNKX0OISGERqX2W+/4cqCki1UWkAHAbsDBonbeBq0Qkn4gUAZoB287yOMbkeqrKnDlzqFu3Lk8++SQAzZs3tyRgckS2iUBEbgQ2Aovd+YYiEnxB/wNVPQXcCyzBubi/oapbRaSviPR119nm7ncTzoNrr6jqlnM8F2NypR9//JGOHTvStWtXqlevTo8eGTalGXPOQqkaGoJzB9AKAFXdKCLVQtm5qi4CFgW991LQ/BhgTCj7MybWvPvuuyQlJXHy5EmeffZZHnzwQfLmtSHDTc4KJRGcUtWD9lSiMeF3ySWXcMUVVzBhwgQuueQSv8MxuVQobQRbRKQbkFdEaorIBGCtx3EZE5PS0tIYO3Ysd955JwB16tTh/ffftyRgPBVKieA+YCDwGzAbp85/uJdBmXNnD4lFr61bt9KrVy/WrVtH+/btrZM4EzahlAhqq+pAVW3iTk+oaqrnkZlzYg+JRZ8TJ07w1FNP0ahRI/79738ze/Zs3nnnHUsCJmxCKRE8LyIVgLnAHFXd6nFM5jzZQ2LR5ddff2X8+PHccsstjBs3jrJly/odkokx2ZYIVPVqoBWwB5gqIptF5AmvAzMmNzt27BgvvPACaWlpZzqJmzVrliUB44uQnix2u58eLyIfAf8HDMbaCcIulPp/q/ePfB999BG9e/fm22+/pV69erRu3ZoKFSr4HZaJYaE8UFZXRIaIyBacISrX4nQXYcIslPp/q/ePXAcPHqRPnz5cc801iAgfffSRdRJnIkIoJYJXgX8A16pqcF9BJsys/j96dezYkVWrVvHII48wZMgQihQp4ndIxgAhJAJVtc5MjDlHe/bsoWjRohQpUoSnn36avHnz0qRJE7/DMuZ3Mq0aEpE33J+bRWRTwLQ5YOQyY0wGVJXZs2f/rpO4yy+/3JKAiUhZlQgecH/eEI5AjMktdu3axd133827775Ls2bNzjwlbEykymqEst3uy3syGJ3snvCEZ0x0WbhwIfHx8Xz44YeMHTuWNWvWkJCQ4HdYxmQplCeL/5zBe9fldCDG5Aa1atXiyiuvZPPmzdZTqIkamVYNicjdON/8Lw5qE4gD1ngdmDHR4NSpU4wbN45NmzYxc+ZM6tSpw6JFi7Lf0JgIklUbwWzgfeBpYEDA+4dVdb+nUcWo7B4Ys4fFIsumTZvo1asX69evp0OHDtZJnIlaWVUNqar+B+gHHA6YEJFS3ocWe7J7YMweFosMv/32G08++SSNGzdm586dvPHGG7z11luWBEzUyq5EcAPwBaBA4Mg0ClzsYVwxyx4Yi3yHDh1i0qRJdO3albFjx1K6dGm/QzLmvGSaCFT1Bvdn9fCFY0xkOnr0KFOnTuX++++nbNmybNmyhfLly/sdljE5IpS+hv4kIkXd17eLyPMiUsX70IyJDMuXL6d+/fr079+flStXAlgSMLlKKLePTgaOiUgDnJ5Hvwf+7mlUMWb2up38ZconIQ8oY8Lj119/pXfv3rRp04Z8+fKxcuVKrrnmGr/DMibHhZIITqmqAh2AF1T1BZxbSE0OSW8ktsbgyHLzzTczY8YMHn30Ub766itatGjhd0jGeCKU3kcPi8hjQHfgKhHJC+T3NqzYY43EkeG///0vxYoVo2jRoowaNYp8+fLRuHFjv8MyxlOhlAj+gjNw/V3uADUVgTGeRmVMmKkqf//734mPjz/TSVyzZs0sCZiYEMpQlT8Ds4ASInIDkKqqMz2PzJgw2blzJ+3bt6dHjx7Url2bXr16+R2SMWEVyl1DtwKfAbcAtwLrRKSL14EZEw5vv/02CQkJrFq1ivHjx7N69Wrq1q3rd1jGhFUobQQDgSaq+guAiJQFlgHzvAzMGC+pKiJCnTp1aNWqFRMmTKBatWp+h2WML0JpI8iTngRc+0LczpiIc+rUKUaPHk337t0BqF27Nu+8844lARPTQrmgLxaRJSJyp4jcCbwHWPeKJup89dVXNGvWjAEDBnDs2DFSU1P9DsmYiBBKY/EjwBTgUqABMFVVH/U6MGNySmpqKk888QSJiYn8+OOPzJs3j/nz51sncca4shqPoCbwLFAD2Aw8rKqZ95FsTIQ6fPgwU6ZMISkpieeff55SpazzXGMCZVUimA68C3TG6YF0wtnuXETaich2EdkhIgOyWK+JiKTZ3Ugmpxw5coRnn32WtLQ0ypYtS0pKCjNmzLAkYEwGsrprKE5VX3ZfbxeRDWezY/cJ5Ik4Q13uAj4XkYWqmpLBeqOBJWezf2Mys3TpUpKTk9m5cyeNGzfm6quvpmzZsn6HZUzEyqpEUEhEGonIZSJyGVA4aD47TYEdqvqtqp4A5uD0VxTsPuBN4JcMluV6s9ftZN13NuBbTti/fz89e/akbdu2FCpUiNWrV3P11Vf7HZYxES+rEsFu4PmA+Z8D5hXIrhvGisAPAfO7gGaBK4hIReBmd19NMtuRiCQDyQBVquSuHrDTh6a0zubO380338yaNWt4/PHHGTRokDUGGxOirAamOd+vUpLBexo0Pw54VFXTRDJa/UwsU4GpAImJicH7iHrNqpeiW7PcleDC5eeffyYuLo6iRYsyZswYChQoQMOGDf0Oy5io4uWDYbuAygHzlYCfgtZJBOaIyH+ALsAkEenoYUwml1BVZsyYQXx8PIMHDwagadOmlgSMOQehdDFxrj4HaopIdeBH4DagW+AKgcNgisgM4F1VXeBhTGExe93OM1U+2Ukfh8CE7j//+Q99+vRh6dKlXHnllSQnJ/sdkjFRzbMSgaqeAu7FuRtoG/CGqm4Vkb4i0ter40aC9IFmQmGD0Zydt956i3r16rF27VpefPFFVq5cSe3atf0Oy5iolm2JQJzK+yTgYlV9yh2v+EJV/Sy7bVV1EUHdUajqS5mse2dIEUcJG2gmZ6V3EpeQkECbNm144YUXqFq1qt9hGZMrhFIimAQ0B7q684dxng8wxnMnT55k5MiRJCUlAVCrVi0WLFhgScCYHBRKImimqv2AVABVPQAU8DQqY4ANGzbQtGlTBg4cSFpaGr/99pvfIRmTK4XSWHzSffpX4cx4BKc9jSoKBTYQWwPw+Tl+/DhPPfUUY8aMoWzZsrz11lt07NjR77CMybVCKRGMB94CyonICOBjYKSnUUWhwAZiawA+P0ePHmXatGnccccdpKSkWBIwxmPZlghUdZaIfAG0xnlIrKOqbvM8sihkDcTn7vDhw0yePJm//e1vlClThpSUFMqUKeN3WMbEhFDGLK4CHAPeARYCR933jMkRixcvpl69egwYMIDVq1cDWBIwJoxCaSN4D6d9QIBCQHVgO5DgYVwmBuzbt4/+/fszc+ZM6taty5o1a2je3EpUxoRbKFVD9QPn3Z5H+3gWkYkZnTp1Yu3atQwaNIiBAwdSsGBBv0MyJiaddRcTqrpBRDLtKdSYrOzevZu4uDiKFSvGs88+S4ECBWjQoIHfYRkT00J5srh/wGwe4DJgj2cRmVxJVXn11Vfp378/d911F88//zxNmtj3CWMiQSglgriA16dw2gze9Cac6GLPDoTm22+/pU+fPixbtowWLVrQt2+u7mrKmKiTZSJwHyQrpqqPhCmeqJL+7EB8heL27EAm5s+fT/fu3cmbNy+TJ08mOTmZPHm87P3cGHO2Mk0EIpJPVU+FOCxlzLJnBzKW3klc/fr1adeuHePGjaNy5crZb2iMCbusSgSf4bQHbBSRhcBc4Gj6QlWd73FsJgqdOHGCZ555hq1btzJ79mxq1qzJm29aTaIxkSyUMnopYB/OuMI3ADe6P435nfXr19OkSRMGDRoEOEnBGBP5sioRlHPvGNrC/x4oS5frxg0OlTUQ/9Hx48d58sknee6557jwwgt5++23uemmm/wOyxgToqxKBHmBYu4UF/A6fYpJ1rncHx09epQZM2bQq1cvtm7daknAmCiTVYlgt6o+FbZIoog1EMOhQ4eYNGkSjzzyCGXKlGHbtm2ULl3a77CMMecgqxKBZLHMxLD33nuPhIQEBg4ceKaTOEsCxkSvrEoErcMWRQQLbBOA2G4X2LNnDw8++CCzZ88mISGBefPm0axZM7/DMsacp0xLBKq6P5yBRKrANgGI7XaBzp07M3fuXIYMGcKGDRssCRiTS5x1p3OxKJbbBH788UdKlChBsWLFGDt2LAULFqRevXp+h2WMyUH2rL/JkKry8ssvEx8fz+DBgwFo3LixJQFjciFLBOYP/v3vf9O6dWuSk5Np3Lgx/fr18zskY4yHLBFkYfa6naz7LraaSubNm0f9+vX54osvmDp1KsuXL6dGjRp+h2WM8ZC1EWQh/W6hWGgcTu8krkGDBrRv356xY8dSqVIlv8MyxoSBlQiy0ax6Kbo1q+J3GJ45ceIEQ4cO5bbbbkNVqVmzJnPnzrUkYEwMsUQQwz777DMaN27MkCFDyJcvn3USZ0yMskQQg44dO8bDDz9M8+bNOXDgAO+88w6zZs2yweONiVGWCGLQ8ePHef3110lOTiYlJYUbbrBexY2JZZ4mAhFpJyLbRWSHiAzIYHmSiGxyp7Ui0sDLeGLZwYMHGTFiBKdOnaJ06dJs27aNyZMnU7x4bHaXYYz5H88SgTve8UTgOiAe6Coi8UGrfQe0VNVLgWHAVK/iiWXvvPPOmQfDPv74YwAuuOACn6MyxkQKL0sETYEdqvqtqp4A5gAdAldQ1bWqesCd/RSIiFtVZq/byV+mfPK7Poai0Z49e+jatSs33XQTpUuXZt26dbRq1crvsIwxEcbLRFAR+CFgfpf7XmZ6Ae9ntEBEkkVkvYis37NnTw6GmLH0juaivYO5zp078+abb/LUU0+xfv16EhMT/Q7JGBOBvHygLKPxDDIc4lJErsZJBFdmtFxVp+JWGyUmJoZlmMxo7Whu165dlCxZkmLFijFu3DgKFixIQkKC32EZYyKYlyWCXUDlgPlKwE/BK4nIpcArQAdV3edhPLna6dOnmTJlCvHx8WcGj7/sssssCRhjsuVlIvgcqCki1UWkAHAbsDBwBRGpAswHuqvqNx7Gkqv961//4pprrqFv3740bdqU++67z++QjDFRxLOqIVU9JSL3AkuAvMB0Vd0qIn3d5S8Bg4HSwCQRATilqr5UZAeORBZNo5DNnTuXHj16ULBgQaZNm0bPnj1xP0tjjAmJp53OqeoiYFHQey8FvO4N9PYyhlAFNhBHQyNxeidxjRo1okOHDjz//PNcdNFFfodljIlC1vtogGhoIP7tt98YMWIE27Zt44033uCSSy5hzpw5fodljIli1sVEFPn000+57LLLGDZsGIULF7ZO4owxOcISQRQ4evQoDz30EFdccQWHDx9m0aJFzJw50zqJM8bkCEsEUSA1NZU5c+Zwzz33sHXrVq677jq/QzLG5CLWRhChfv31VyZMmMBjjz12ppO4kiVL+h2WMSYXshJBBFqwYAHx8fEMHTqUtWvXAlgSMMZ4xhJBBPnvf//Lrbfeys0330y5cuVYt24dLVq08DssY0wuZ1VDEaRLly589tlnDB8+nP/7v/8jf/78fodkjIkBlgh8tnPnTi644ALi4uIYP348BQsWJD4+eNgGY4zxjlUN+eT06dNMnDiRhIQEBg8eDECjRo0sCRhjws4SgQ+2b99Oy5Ytuffee2nevDkPPPCA3yEZY2JYTFcN+dHR3BtvvEGPHj0oXLgwr776KnfccYd1EmeM8VVMlwjSO5oDPO9oTtUZT6dx48Z06tSJbdu2ceedd1oSMMb4LqZLBOB9R3OpqakMGzaMr7/+mnnz5lGjRg1mz57t2fGMMeZsxXSJwGtr166lUaNGjBw5kri4OOskzhgTkSwReODIkSPcf//9XHnllRw7dozFixczY8YM6yTOGBORLBF44MSJE8ybN49+/fqxZcsW2rZt63dIxhiTqZhvI8gp+/fvZ/z48TzxxBOUKlWKbdu2UaJECb/DMsaYbFmJIAe8+eabxMfHM3z48DOdxFkSMMZEC0sE52H37t107tyZLl26cNFFF7F+/XrrJM4YE3ViNhHMXreTdd/tP6993Hrrrbz33nuMGjWKzz77jIYNG+ZMcMYYE0Yx20aQ/kTx2T5E9v3331OqVCni4uKYMGEChQsXpnbt2l6EaIwxYRGzJQKAZtVL0a1ZlZDWPX36NBMmTCAhIYFBgwYB0LBhQ0sCxpioF7MlgrPx9ddf07t3b9asWUO7du146KGH/A7JGGNyTEyXCEIxZ84cGjRowLZt25g5cyaLFi2iatWqfodljDE5xhJBJk6fPg1AkyZNuOWWW0hJSaF79+7WSZwxJtexRBDk+PHjDBgwgM6dO6Oq1KhRg9dff53y5cv7HZoxxnjCEkGA1atX07BhQ0aPHk3p0qU5efKk3yEZY4znLBEAhw8fpl+/frRo0YKTJ0/ywQcf8Morr1CgQAG/QzPGGM/FzF1DgaORwe9HJDt58iQLFizgwQcfZPjw4RQtWtSvMI0xJuxiJhGkj0aWfvGvVbYwsnM9p041oVSpUnz99dfExcX5HKUxxoSfp1VDItJORLaLyA4RGZDBchGR8e7yTSJymZfxxFcozpzky+lSahcfP3ULbz7Tn08++QTAkoAxJmZ5lghEJC8wEbgOiAe6ikh80GrXATXdKRmY7FU84IwT0KlTJ2699VYqV67M+vXrueqqq7w8pDHGRDwvSwRNgR2q+q2qngDmAB2C1ukAzFTHp0BJEangVUBbU7ayePFinnnmGT799FMaNGjg1aGMMSZqeNlGUBH4IWB+F9AshHUqArsDVxKRZJwSA1WqhNY3ULD4i4pTLn8C9z30FbVq1TqnfRhjTG7kZSLI6BFcPYd1UNWpwFSAxMTEPywPxZM3JpzLZsYYk+t5WTW0C6gcMF8J+Okc1jHGGOMhLxPB50BNEakuIgWA24CFQessBHq4dw9dDhxU1d3BOzLGGOMdz6qGVPWUiNwLLAHyAtNVdauI9HWXvwQsAq4HdgDHgJ5exWOMMSZjnj5QpqqLcC72ge+9FPBagX5exmCMMSZr1teQMcbEOEsExhgT4ywRGGNMjLNEYIwxMU6c9troISJ7gO/PcfMywN4cDCca2DnHBjvn2HA+51xVVctmtCDqEsH5EJH1qprodxzhZOccG+ycY4NX52xVQ8YYE+MsERhjTIyLtUQw1e8AfGDnHBvsnGODJ+ccU20Exhhj/ijWSgTGGGOCWCIwxpgYlysTgYi0E5HtIrJDRAZksFxEZLy7fJOIXOZHnDkphHNOcs91k4isFZGoH6czu3MOWK+JiKSJSJdwxueFUM5ZRFqJyEYR2SoiK8MdY04L4W+7hIi8IyJfuecc1b0Yi8h0EflFRLZksjznr1+qmqsmnC6v/w1cDBQAvgLig9a5HngfZ4S0y4F1fscdhnO+ArjAfX1dLJxzwHof4vSC28XvuMPwey4JpABV3PlyfscdhnN+HBjtvi4L7AcK+B37eZxzC+AyYEsmy3P8+pUbSwRNgR2q+q2qngDmAB2C1ukAzFTHp0BJEakQ7kBzULbnrKprVfWAO/spzmhw0SyU3zPAfcCbwC/hDM4joZxzN2C+qu4EUNVoP+9QzlmBOBERoBhOIjgV3jBzjqquwjmHzOT49Ss3JoKKwA8B87vc9852nWhytufTC+cbRTTL9pxFpCJwM/ASuUMov+dawAUiskJEvhCRHmGLzhuhnPOLQF2cYW43Aw+o6unwhOeLHL9+eTowjU8kg/eC75ENZZ1oEvL5iMjVOIngSk8j8l4o5zwOeFRV05wvi1EvlHPOBzQGWgOFgU9E5FNV/cbr4DwSyjm3BTYC1wA1gA9EZLWqHvI4Nr/k+PUrNyaCXUDlgPlKON8UznadaBLS+YjIpcArwHWqui9MsXkllHNOBOa4SaAMcL2InFLVBWGJMOeF+re9V1WPAkdFZBXQAIjWRBDKOfcERqlTgb5DRL4D6gCfhSfEsMvx61durBr6HKgpItVFpABwG7AwaJ2FQA+39f1y4KCq7g53oDko23MWkSrAfKB7FH87DJTtOatqdVWtpqrVgHnAPVGcBCC0v+23gatEJJ+IFAGaAdvCHGdOCuWcd+KUgBCR8kBt4NuwRhleOX79ynUlAlU9JSL3Aktw7jiYrqpbRaSvu/wlnDtIrgd2AMdwvlFErRDPeTBQGpjkfkM+pVHcc2OI55yrhHLOqrpNRBYDm4DTwCuqmuFtiNEgxN/zMGCGiGzGqTZ5VFWjtntqEfkH0AooIyK7gCeB/ODd9cu6mDDGmBiXG6uGjDHGnAVLBMYYE+MsERhjTIyzRGCMMTHOEoExxsQ4SwQmIrm9hW4MmKplse6RHDjeDBH5zj3WBhFpfg77eEVE4t3XjwctW3u+Mbr7Sf9ctrg9bpbMZv2GInJ9Thzb5F52+6iJSCJyRFWL5fS6WexjBvCuqs4TkWuBZ1X10vPY33nHlN1+ReQ14BtVHZHF+ncCiap6b07HYnIPKxGYqCAixURkufttfbOI/KGnURGpICKrAr4xX+W+f62IfOJuO1dEsrtArwIucbft7+5ri4g86L5XVETec/u/3yIif3HfXyEiiSIyCijsxjHLXXbE/fnPwG/obkmks4jkFZExIvK5OH3M9wnhY/kEt7MxEWkqzjgTX7o/a7tP4j4F/MWN5S9u7NPd43yZ0edoYpDffW/bZFNGE5CG05HYRuAtnKfgi7vLyuA8VZleoj3i/vwbMNB9nReIc9ddBRR1338UGJzB8WbgjlcA3AKsw+m8bTNQFKd7461AI6Az8HLAtiXcnytwvn2fiSlgnfQYbwZec18XwOlFsjCQDDzhvl8QWA9UzyDOIwHnNxdo584XB/K5r9sAb7qv7wReDNh+JHC7+7okTh9ERf3+fdvk75TrupgwucZxVW2YPiMi+YGRItICp+uEikB54OeAbT4HprvrLlDVjSLSEogH1rhdaxTA+SadkTEi8gSwB6eH1tbAW+p04IaIzAeuAhYDz4rIaJzqpNVncV7vA+NFpCDQDlilqsfd6qhL5X+jqJUAagLfBW1fWEQ2AtWAL4APAtZ/TURq4vREmT+T418L3CQiD7vzhYAqRHd/ROY8WSIw0SIJZ/Spxqp6UkT+g3MRO0NVV7mJoj3wdxEZAxwAPlDVriEc4xFVnZc+IyJtMlpJVb8RkcY4/b08LSJLVfWpUE5CVVNFZAVO18l/Af6RfjjgPlVdks0ujqtqQxEpAbwL9APG4/S385Gq3uw2rK/IZHsBOqvq9lDiNbHB2ghMtCgB/OImgauBqsEriEhVd52XgWk4w/19CvxJRNLr/IuISK0Qj7kK6OhuUxSnWme1iFwEHFPV14Fn3eMEO+mWTDIyB6ejsKtwOlPD/Xl3+jYiUss9ZoZU9SBwP/Cwu00J4Ed38Z0Bqx7GqSJLtwS4T9zikYg0yuwYJnZYIjDRYhaQKCLrcUoHX2ewTitgo4h8iVOP/4Kq7sG5MP5DRDbhJIY6oRxQVTfgtB18htNm8IqqfgnUBz5zq2gGAsMz2HwqsCm9sTjIUpxxaZepM/wiOONEpAAbxBm0fArZlNjdWL7C6Zr5GZzSyRqc9oN0HwHx6Y3FOCWH/G5sW9x5E+Ps9lFjjIlxViIwxpgYZ4nAGGNinCUCY4yJcZYIjDEmxlkiMMaYGGeJwBhjYpwlAmOMiXH/D95IOHfpsjmgAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "#Evaluating classification result by ROC curves\n",
    "from sklearn.metrics import roc_curve\n",
    "y_pred_prob_svm = model_2_svm.predict_proba(x_test_svm)[:,1]\n",
    "fpr, tpr, threshold = roc_curve(y_test_svm, y_pred_prob_svm)\n",
    "plt.plot([0, 1], [0, 1], 'k--')\n",
    "plt.plot(fpr,tpr)\n",
    "auc = roc_auc_score(y_test_svm,  y_pred_prob_svm)\n",
    "plt.title(f'AUC: {auc}')\n",
    "plt.xlabel('False Positive Rate')\n",
    "plt.ylabel('True Positive Rate')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}

Try this Code

About this Algorithm

Import Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

Read Data

ibm = pd.read_csv('/WA_Fn-UseC_-HR-Employee-Attrition.csv')

pd.set_option('display.max_columns', None)

Dateset Information

ibm.shape

(1470, 35)

ibm.describe()

	Age	DailyRate	DistanceFromHome	Education	EmployeeCount	EmployeeNumber	EnvironmentSatisfaction	HourlyRate	JobInvolvement	JobLevel	JobSatisfaction	MonthlyIncome	MonthlyRate	NumCompaniesWorked	PercentSalaryHike	PerformanceRating	RelationshipSatisfaction	StandardHours	StockOptionLevel	TotalWorkingYears	TrainingTimesLastYear	WorkLifeBalance	YearsAtCompany	YearsInCurrentRole	YearsSinceLastPromotion	YearsWithCurrManager
count	1470.000000	1470.000000	1470.000000	1470.000000	1470.0	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.0	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000	1470.000000
mean	36.923810	802.485714	9.192517	2.912925	1.0	1024.865306	2.721769	65.891156	2.729932	2.063946	2.728571	6502.931293	14313.103401	2.693197	15.209524	3.153741	2.712245	80.0	0.793878	11.279592	2.799320	2.761224	7.008163	4.229252	2.187755	4.123129
std	9.135373	403.509100	8.106864	1.024165	0.0	602.024335	1.093082	20.329428	0.711561	1.106940	1.102846	4707.956783	7117.786044	2.498009	3.659938	0.360824	1.081209	0.0	0.852077	7.780782	1.289271	0.706476	6.126525	3.623137	3.222430	3.568136
min	18.000000	102.000000	1.000000	1.000000	1.0	1.000000	1.000000	30.000000	1.000000	1.000000	1.000000	1009.000000	2094.000000	0.000000	11.000000	3.000000	1.000000	80.0	0.000000	0.000000	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000
25%	30.000000	465.000000	2.000000	2.000000	1.0	491.250000	2.000000	48.000000	2.000000	1.000000	2.000000	2911.000000	8047.000000	1.000000	12.000000	3.000000	2.000000	80.0	0.000000	6.000000	2.000000	2.000000	3.000000	2.000000	0.000000	2.000000
50%	36.000000	802.000000	7.000000	3.000000	1.0	1020.500000	3.000000	66.000000	3.000000	2.000000	3.000000	4919.000000	14235.500000	2.000000	14.000000	3.000000	3.000000	80.0	1.000000	10.000000	3.000000	3.000000	5.000000	3.000000	1.000000	3.000000
75%	43.000000	1157.000000	14.000000	4.000000	1.0	1555.750000	4.000000	83.750000	3.000000	3.000000	4.000000	8379.000000	20461.500000	4.000000	18.000000	3.000000	4.000000	80.0	1.000000	15.000000	3.000000	3.000000	9.000000	7.000000	3.000000	7.000000
max	60.000000	1499.000000	29.000000	5.000000	1.0	2068.000000	4.000000	100.000000	4.000000	5.000000	4.000000	19999.000000	26999.000000	9.000000	25.000000	4.000000	4.000000	80.0	3.000000	40.000000	6.000000	4.000000	40.000000	18.000000	15.000000	17.000000

import statistics
for i in ibm.columns:
    print(i, " mode: ", statistics.mode(ibm[i]));

Age  mode:  35
Attrition  mode:  No
BusinessTravel  mode:  Travel_Rarely
DailyRate  mode:  691
Department  mode:  Research & Development
DistanceFromHome  mode:  2
Education  mode:  3
EducationField  mode:  Life Sciences
EmployeeCount  mode:  1
EmployeeNumber  mode:  1
EnvironmentSatisfaction  mode:  3
Gender  mode:  Male
HourlyRate  mode:  66
JobInvolvement  mode:  3
JobLevel  mode:  1
JobRole  mode:  Sales Executive
JobSatisfaction  mode:  4
MaritalStatus  mode:  Married
MonthlyIncome  mode:  2342
MonthlyRate  mode:  9150
NumCompaniesWorked  mode:  1
Over18  mode:  Y
OverTime  mode:  No
PercentSalaryHike  mode:  11
PerformanceRating  mode:  3
RelationshipSatisfaction  mode:  3
StandardHours  mode:  80
StockOptionLevel  mode:  0
TotalWorkingYears  mode:  10
TrainingTimesLastYear  mode:  2
WorkLifeBalance  mode:  3
YearsAtCompany  mode:  5
YearsInCurrentRole  mode:  2
YearsSinceLastPromotion  mode:  0
YearsWithCurrManager  mode:  2

ibm.info()

&lt;class 'pandas.core.frame.DataFrame'&gt;
RangeIndex: 1470 entries, 0 to 1469
Data columns (total 35 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Age                       1470 non-null   int64 
 1   Attrition                 1470 non-null   object
 2   BusinessTravel            1470 non-null   object
 3   DailyRate                 1470 non-null   int64 
 4   Department                1470 non-null   object
 5   DistanceFromHome          1470 non-null   int64 
 6   Education                 1470 non-null   int64 
 7   EducationField            1470 non-null   object
 8   EmployeeCount             1470 non-null   int64 
 9   EmployeeNumber            1470 non-null   int64 
 10  EnvironmentSatisfaction   1470 non-null   int64 
 11  Gender                    1470 non-null   object
 12  HourlyRate                1470 non-null   int64 
 13  JobInvolvement            1470 non-null   int64 
 14  JobLevel                  1470 non-null   int64 
 15  JobRole                   1470 non-null   object
 16  JobSatisfaction           1470 non-null   int64 
 17  MaritalStatus             1470 non-null   object
 18  MonthlyIncome             1470 non-null   int64 
 19  MonthlyRate               1470 non-null   int64 
 20  NumCompaniesWorked        1470 non-null   int64 
 21  Over18                    1470 non-null   object
 22  OverTime                  1470 non-null   object
 23  PercentSalaryHike         1470 non-null   int64 
 24  PerformanceRating         1470 non-null   int64 
 25  RelationshipSatisfaction  1470 non-null   int64 
 26  StandardHours             1470 non-null   int64 
 27  StockOptionLevel          1470 non-null   int64 
 28  TotalWorkingYears         1470 non-null   int64 
 29  TrainingTimesLastYear     1470 non-null   int64 
 30  WorkLifeBalance           1470 non-null   int64 
 31  YearsAtCompany            1470 non-null   int64 
 32  YearsInCurrentRole        1470 non-null   int64 
 33  YearsSinceLastPromotion   1470 non-null   int64 
 34  YearsWithCurrManager      1470 non-null   int64 
dtypes: int64(26), object(9)
memory usage: 402.1+ KB

Data Preprocessing

ibm.drop(columns = 'EmployeeCount', inplace = True)
ibm.drop(columns = 'EmployeeNumber', inplace = True)
ibm.drop(columns = 'Over18', inplace = True)
ibm.drop(columns = 'StandardHours', inplace = True)

ibm.drop_duplicates()

	Age	Attrition	BusinessTravel	DailyRate	Department	DistanceFromHome	Education	EducationField	EnvironmentSatisfaction	Gender	HourlyRate	JobInvolvement	JobLevel	JobRole	JobSatisfaction	MaritalStatus	MonthlyIncome	MonthlyRate	NumCompaniesWorked	OverTime	PercentSalaryHike	PerformanceRating	RelationshipSatisfaction	StockOptionLevel	TotalWorkingYears	TrainingTimesLastYear	WorkLifeBalance	YearsAtCompany	YearsInCurrentRole	YearsSinceLastPromotion	YearsWithCurrManager
0	41	Yes	Travel_Rarely	1102	Sales	1	2	Life Sciences	2	Female	94	3	2	Sales Executive	4	Single	5993	19479	8	Yes	11	3	1	0	8	0	1	6	4	0	5
1	49	No	Travel_Frequently	279	Research & Development	8	1	Life Sciences	3	Male	61	2	2	Research Scientist	2	Married	5130	24907	1	No	23	4	4	1	10	3	3	10	7	1	7
2	37	Yes	Travel_Rarely	1373	Research & Development	2	2	Other	4	Male	92	2	1	Laboratory Technician	3	Single	2090	2396	6	Yes	15	3	2	0	7	3	3	0	0	0	0
3	33	No	Travel_Frequently	1392	Research & Development	3	4	Life Sciences	4	Female	56	3	1	Research Scientist	3	Married	2909	23159	1	Yes	11	3	3	0	8	3	3	8	7	3	0
4	27	No	Travel_Rarely	591	Research & Development	2	1	Medical	1	Male	40	3	1	Laboratory Technician	2	Married	3468	16632	9	No	12	3	4	1	6	3	3	2	2	2	2
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1465	36	No	Travel_Frequently	884	Research & Development	23	2	Medical	3	Male	41	4	2	Laboratory Technician	4	Married	2571	12290	4	No	17	3	3	1	17	3	3	5	2	0	3
1466	39	No	Travel_Rarely	613	Research & Development	6	1	Medical	4	Male	42	2	3	Healthcare Representative	1	Married	9991	21457	4	No	15	3	1	1	9	5	3	7	7	1	7
1467	27	No	Travel_Rarely	155	Research & Development	4	3	Life Sciences	2	Male	87	4	2	Manufacturing Director	2	Married	6142	5174	1	Yes	20	4	2	1	6	0	3	6	2	0	3
1468	49	No	Travel_Frequently	1023	Sales	2	3	Medical	4	Male	63	2	2	Sales Executive	2	Married	5390	13243	2	No	14	3	4	0	17	3	2	9	6	0	8
1469	34	No	Travel_Rarely	628	Research & Development	8	3	Medical	2	Male	82	4	2	Laboratory Technician	3	Married	4404	10228	2	No	12	3	1	0	6	3	4	4	3	1	2

1470 rows × 31 columns

ibm.isnull().sum()

Age                         0
Attrition                   0
BusinessTravel              0
DailyRate                   0
Department                  0
DistanceFromHome            0
Education                   0
EducationField              0
EnvironmentSatisfaction     0
Gender                      0
HourlyRate                  0
JobInvolvement              0
JobLevel                    0
JobRole                     0
JobSatisfaction             0
MaritalStatus               0
MonthlyIncome               0
MonthlyRate                 0
NumCompaniesWorked          0
OverTime                    0
PercentSalaryHike           0
PerformanceRating           0
RelationshipSatisfaction    0
StockOptionLevel            0
TotalWorkingYears           0
TrainingTimesLastYear       0
WorkLifeBalance             0
YearsAtCompany              0
YearsInCurrentRole          0
YearsSinceLastPromotion     0
YearsWithCurrManager        0
dtype: int64

# replace Attrition (0 - No, 1 - Yes)
ibm.replace({'Attrition' : {'Yes': 1, 'No': 0}}, inplace = True)

# replace BusinessTravel (0 - Non-Travel, 1 - Travel_Rarely, 2 - Travel_Frequently)
ibm.replace({'BusinessTravel' : {'Non-Travel': 0, 'Travel_Rarely': 1, 'Travel_Frequently': 2}}, inplace = True)

#Department
dummy = pd.get_dummies(ibm['Department'])
ibm.insert(5,'Dp_Sales&Development', dummy['Research & Development'])
ibm.insert(6,'Dp_Sales', dummy['Sales'])
ibm.insert(7,'Dp_HumanResources', dummy['Human Resources'])

ibm.drop(columns = 'Department', inplace = True)

#EducationField
dummy = pd.get_dummies(ibm['EducationField'])
ibm.insert(11,'EF_Life Sciences',dummy['Life Sciences'])
ibm.insert(12,'EF_Medical',dummy['Medical'])
ibm.insert(13,'EF_Marketing',dummy['Marketing'])
ibm.insert(14,'EF_TechnicalDegree',dummy['Technical Degree'])
ibm.insert(15,'EF_HumanResources',dummy['Human Resources'])
ibm.insert(16,'EF_Other',dummy['Other'])

ibm.drop(columns = 'EducationField', inplace = True)

# replace Gender (0 - Male; 1 - Female)
ibm.replace({'Gender': {'Male': 0, 'Female': 1}}, inplace = True)

# Job role dummy variables
dummy=pd.get_dummies(ibm['JobRole'])
ibm.insert(23, 'JR_HealthcareRepresentive', dummy['Healthcare Representative'])
ibm.insert(24, 'JR_HumanResource', dummy['Human Resources'])
ibm.insert(25, 'JR_LaboratoryTechnician', dummy['Laboratory Technician'])
ibm.insert(26, 'JR_Manager', dummy['Manager'])
ibm.insert(27, 'JR_ManufacturingDirector', dummy['Manufacturing Director'])
ibm.insert(28, 'JR_ResearchDirector', dummy['Research Director'])
ibm.insert(29, 'JR_ResearchScientist', dummy['Research Scientist'])
ibm.insert(30, 'JR_SalesExecutive', dummy['Sales Executive'])
ibm.insert(31, 'JR_SalesRepresentative', dummy['Sales Representative'])

ibm.drop(columns = 'JobRole', inplace = True)

# MaritalStatus role dummy variables
dummy=pd.get_dummies(ibm['MaritalStatus'])
ibm.insert(34, 'MS_Married', dummy['Married'])
ibm.insert(35, 'MS_Single', dummy['Single'])
ibm.insert(36, 'MS_Divorced', dummy['Divorced'])

ibm.drop(columns = 'MaritalStatus', inplace = True)

# replace Overtime (0 - No; 1 - Yes)
ibm.replace({'OverTime': {'No': 0, 'Yes': 1}}, inplace = True)

# replace Over18 (0 - N; 1 - Y)
ibm.replace({'Over18': {'N': 0, 'Y': 1}}, inplace = True)

def iqr_outliers(data):
    out=[]
    
    firstQuartile = data.quantile(0.25)
    thirdQuartile = data.quantile(0.75)
    
    iqr = thirdQuartile-firstQuartile
    
    Lower_bound = firstQuartile - 1.5 * iqr
    Upper_bound = thirdQuartile + 1.5 * iqr
    
    for i in data:
        if i &gt; Upper_bound or i &lt; Lower_bound:
            out.append(i)
            
    print("Outliers:",out , "\nCount: ", len(out), "\n")
    return out

for c_name in ibm.columns:
    print (c_name)
    iqr_outliers(ibm[c_name])

Age
Outliers: [] 
Count:  0 

Attrition
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  237 

BusinessTravel
Outliers: [2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 2, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 0, 0, 2, 0, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 0, 2, 0, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 0, 2, 2, 0, 2, 2, 0, 0, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 0, 0, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 2, 0, 2, 0, 2, 2, 2, 2, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 0, 2, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 2] 
Count:  427 

DailyRate
Outliers: [] 
Count:  0 

Dp_Sales&Development
Outliers: [] 
Count:  0 

Dp_Sales
Outliers: [] 
Count:  0 

Dp_HumanResources
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  63 

DistanceFromHome
Outliers: [] 
Count:  0 

Education
Outliers: [] 
Count:  0 

EnvironmentSatisfaction
Outliers: [] 
Count:  0 

EF_Life Sciences
Outliers: [] 
Count:  0 

EF_Medical
Outliers: [] 
Count:  0 

EF_Marketing
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  159 

EF_TechnicalDegree
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  132 

EF_HumanResources
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  27 

EF_Other
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  82 

Gender
Outliers: [] 
Count:  0 

HourlyRate
Outliers: [] 
Count:  0 

JobInvolvement
Outliers: [] 
Count:  0 

JobLevel
Outliers: [] 
Count:  0 

JobSatisfaction
Outliers: [] 
Count:  0 

JR_HealthcareRepresentive
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  131 

JR_HumanResource
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  52 

JR_LaboratoryTechnician
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  259 

JR_Manager
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  102 

JR_ManufacturingDirector
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  145 

JR_ResearchDirector
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  80 

JR_ResearchScientist
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  292 

JR_SalesExecutive
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  326 

JR_SalesRepresentative
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  83 

MonthlyIncome
Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] 
Count:  114 

MonthlyRate
Outliers: [] 
Count:  0 

NumCompaniesWorked
Outliers: [9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9] 
Count:  52 

MS_Married
Outliers: [] 
Count:  0 

MS_Single
Outliers: [] 
Count:  0 

MS_Divorced
Outliers: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
Count:  327 

OverTime
Outliers: [] 
Count:  0 

PercentSalaryHike
Outliers: [] 
Count:  0 

PerformanceRating
Outliers: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4] 
Count:  226 

RelationshipSatisfaction
Outliers: [] 
Count:  0 

StockOptionLevel
Outliers: [3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3] 
Count:  85 

TotalWorkingYears
Outliers: [31, 29, 37, 38, 30, 40, 36, 34, 32, 33, 37, 30, 36, 31, 33, 32, 37, 31, 32, 32, 30, 34, 30, 40, 29, 35, 31, 33, 31, 29, 32, 30, 33, 30, 29, 31, 32, 33, 36, 34, 31, 36, 33, 31, 29, 33, 29, 32, 31, 35, 29, 32, 34, 36, 32, 30, 36, 29, 34, 37, 29, 29, 35] 
Count:  63 

TrainingTimesLastYear
Outliers: [0, 5, 5, 5, 6, 5, 5, 5, 6, 6, 0, 0, 0, 5, 0, 5, 5, 5, 6, 6, 5, 0, 6, 5, 5, 0, 5, 5, 6, 5, 5, 5, 0, 5, 5, 5, 5, 6, 6, 5, 5, 5, 5, 0, 0, 5, 5, 5, 6, 6, 5, 0, 5, 0, 5, 5, 0, 6, 0, 5, 5, 6, 6, 5, 6, 5, 0, 5, 5, 5, 5, 0, 6, 5, 5, 5, 5, 6, 5, 5, 6, 5, 5, 5, 0, 5, 0, 5, 5, 6, 5, 6, 5, 0, 5, 5, 0, 6, 6, 5, 6, 0, 5, 0, 6, 6, 6, 6, 5, 5, 0, 5, 0, 0, 6, 0, 6, 5, 6, 5, 5, 0, 5, 6, 6, 5, 5, 0, 0, 6, 0, 0, 5, 0, 5, 6, 5, 5, 6, 6, 5, 5, 5, 5, 5, 6, 5, 6, 6, 0, 6, 6, 5, 5, 0, 0, 6, 6, 0, 5, 0, 0, 0, 0, 0, 5, 5, 6, 5, 5, 0, 5, 5, 0, 5, 5, 6, 5, 5, 5, 6, 5, 5, 5, 0, 0, 5, 5, 5, 5, 6, 0, 0, 6, 6, 6, 6, 5, 5, 5, 6, 5, 0, 5, 5, 6, 5, 6, 6, 5, 6, 6, 5, 0, 5, 5, 5, 5, 5, 0, 0, 0, 6, 5, 6, 6, 5, 6, 0, 6, 6, 5, 6, 6, 5, 5, 5, 0] 
Count:  238 

WorkLifeBalance
Outliers: [] 
Count:  0 

YearsAtCompany
Outliers: [25, 22, 22, 27, 21, 22, 37, 25, 20, 40, 20, 24, 20, 24, 33, 20, 19, 22, 33, 24, 19, 21, 20, 36, 20, 20, 22, 24, 21, 21, 25, 21, 29, 20, 27, 20, 31, 32, 20, 20, 21, 22, 22, 34, 24, 26, 31, 20, 31, 26, 19, 21, 21, 32, 21, 19, 20, 22, 20, 21, 26, 20, 22, 24, 33, 29, 25, 21, 19, 19, 20, 19, 33, 19, 19, 20, 20, 20, 20, 20, 32, 20, 21, 33, 36, 26, 30, 22, 23, 23, 21, 21, 22, 22, 19, 22, 19, 22, 20, 20, 20, 22, 20, 20] 
Count:  104 

YearsInCurrentRole
Outliers: [15, 16, 18, 15, 18, 17, 16, 15, 16, 15, 16, 16, 15, 16, 17, 15, 15, 15, 17, 17, 16] 
Count:  21 

YearsSinceLastPromotion
Outliers: [8, 15, 8, 8, 9, 13, 12, 10, 11, 9, 12, 15, 15, 15, 9, 11, 11, 9, 12, 11, 15, 11, 10, 9, 11, 9, 8, 11, 11, 8, 13, 9, 9, 12, 10, 11, 15, 13, 9, 11, 10, 8, 8, 11, 9, 11, 12, 11, 14, 13, 14, 8, 11, 15, 10, 11, 11, 15, 11, 13, 11, 13, 15, 8, 13, 15, 11, 14, 15, 15, 9, 11, 9, 8, 9, 15, 11, 12, 9, 8, 10, 14, 8, 13, 13, 12, 14, 8, 8, 8, 14, 14, 8, 12, 13, 14, 14, 12, 11, 8, 11, 9, 12, 8, 9, 11, 9] 
Count:  107 

YearsWithCurrManager
Outliers: [17, 15, 15, 15, 15, 17, 16, 17, 15, 17, 17, 17, 17, 16] 
Count:  14

def remove_outliers(c_name):
    outliers = iqr_outliers(ibm[c_name])

    while (len(outliers)!=0):
        for i in outliers:
            ibm.drop(ibm.loc[ibm[c_name]==i].index, inplace = True)
        outliers = iqr_outliers(ibm[c_name])

remove_outliers('MonthlyIncome')

Outliers: [19094, 18947, 19545, 18740, 18844, 18172, 17328, 16959, 19537, 17181, 19926, 19033, 18722, 19999, 16792, 19232, 19517, 19068, 19202, 19436, 16872, 19045, 19144, 17584, 18665, 17068, 19272, 18300, 16659, 19406, 19197, 19566, 18041, 17046, 17861, 16835, 16595, 19502, 18200, 16627, 19513, 19141, 19189, 16856, 19859, 18430, 17639, 16752, 19246, 17159, 17924, 17099, 17444, 17399, 19419, 18303, 19973, 19845, 17650, 19237, 19627, 16756, 17665, 16885, 17465, 19626, 19943, 18606, 17048, 17856, 19081, 17779, 19740, 18711, 18265, 18213, 18824, 18789, 19847, 19190, 18061, 17123, 16880, 17861, 19187, 19717, 16799, 17328, 19701, 17169, 16598, 17007, 16606, 19586, 19331, 19613, 17567, 19049, 19658, 17426, 17603, 16704, 19833, 19038, 19328, 19392, 19665, 16823, 17174, 17875, 19161, 19636, 19431, 18880] 
Count:  114 

Outliers: [15427, 13458, 14756, 13245, 13664, 13503, 13549, 13872, 13734, 13591, 16064, 13675, 13496, 13603, 13525, 16015, 13964, 15992, 14336, 13212, 16555, 14118, 13610, 13237, 16184, 15402, 14814, 13770, 16307, 13826, 14275, 13582, 14852, 13194, 13973, 13726, 13320, 13120, 13499, 13758, 13191, 16124, 13577, 14026, 13142, 13695, 13402, 13247, 14732, 16422, 13757, 16032, 16328, 14411, 16437, 15202, 16413, 13269, 13966, 15972, 15379, 12936, 12965, 13116, 13464, 16291, 15787, 13225, 13348, 13341, 13206, 13744, 13570] 
Count:  73 

Outliers: [11994, 12490, 12185, 11849, 11996, 12061, 11878, 12504, 11935, 12808, 11836, 12742, 11904, 12169, 11916, 11957, 12031] 
Count:  17 

Outliers: [11713, 11691] 
Count:  2 

Outliers: [11631] 
Count:  1 

Outliers: [] 
Count:  0

ibm

	Age	Attrition	BusinessTravel	DailyRate	Dp_Sales&Development	Dp_Sales	Dp_HumanResources	DistanceFromHome	Education	EnvironmentSatisfaction	EF_Life Sciences	EF_Medical	EF_Marketing	EF_TechnicalDegree	EF_HumanResources	EF_Other	Gender	HourlyRate	JobInvolvement	JobLevel	JobSatisfaction	JR_HealthcareRepresentive	JR_HumanResource	JR_LaboratoryTechnician	JR_Manager	JR_ManufacturingDirector	JR_ResearchDirector	JR_ResearchScientist	JR_SalesExecutive	JR_SalesRepresentative	MonthlyIncome	MonthlyRate	NumCompaniesWorked	MS_Married	MS_Single	MS_Divorced	OverTime	PercentSalaryHike	PerformanceRating	RelationshipSatisfaction	StockOptionLevel	TotalWorkingYears	TrainingTimesLastYear	WorkLifeBalance	YearsAtCompany	YearsInCurrentRole	YearsSinceLastPromotion	YearsWithCurrManager
0	41	1	1	1102	0	1	0	1	2	2	1	0	0	0	0	0	1	94	3	2	4	0	0	0	0	0	0	0	1	0	5993	19479	8	0	1	0	1	11	3	1	0	8	0	1	6	4	0	5
1	49	0	2	279	1	0	0	8	1	3	1	0	0	0	0	0	0	61	2	2	2	0	0	0	0	0	0	1	0	0	5130	24907	1	1	0	0	0	23	4	4	1	10	3	3	10	7	1	7
2	37	1	1	1373	1	0	0	2	2	4	0	0	0	0	0	1	0	92	2	1	3	0	0	1	0	0	0	0	0	0	2090	2396	6	0	1	0	1	15	3	2	0	7	3	3	0	0	0	0
3	33	0	2	1392	1	0	0	3	4	4	1	0	0	0	0	0	1	56	3	1	3	0	0	0	0	0	0	1	0	0	2909	23159	1	1	0	0	1	11	3	3	0	8	3	3	8	7	3	0
4	27	0	1	591	1	0	0	2	1	1	0	1	0	0	0	0	0	40	3	1	2	0	0	1	0	0	0	0	0	0	3468	16632	9	1	0	0	0	12	3	4	1	6	3	3	2	2	2	2
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1465	36	0	2	884	1	0	0	23	2	3	0	1	0	0	0	0	0	41	4	2	4	0	0	1	0	0	0	0	0	0	2571	12290	4	1	0	0	0	17	3	3	1	17	3	3	5	2	0	3
1466	39	0	1	613	1	0	0	6	1	4	0	1	0	0	0	0	0	42	2	3	1	1	0	0	0	0	0	0	0	0	9991	21457	4	1	0	0	0	15	3	1	1	9	5	3	7	7	1	7
1467	27	0	1	155	1	0	0	4	3	2	1	0	0	0	0	0	0	87	4	2	2	0	0	0	0	1	0	0	0	0	6142	5174	1	1	0	0	1	20	4	2	1	6	0	3	6	2	0	3
1468	49	0	2	1023	0	1	0	2	3	4	0	1	0	0	0	0	0	63	2	2	2	0	0	0	0	0	0	0	1	0	5390	13243	2	1	0	0	0	14	3	4	0	17	3	2	9	6	0	8
1469	34	0	1	628	1	0	0	8	3	2	0	1	0	0	0	0	0	82	4	2	3	0	0	1	0	0	0	0	0	0	4404	10228	2	1	0	0	0	12	3	1	0	6	3	4	4	3	1	2

1263 rows × 48 columns

Classification

Support Vector Machine (prepared by Teh Liang Sean)

# import important library to do SVM
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn import metrics

#The target for SVM will be the attrition of IBM employees to know whether the employees will continue stay or leave IBM
x_svm_find = ibm.drop(columns = 'Attrition')
y_svm = ibm['Attrition']

# Try use SelectKBest and chi-squared (chi²) statistical test for non-negative feature to find top 15 best features
#Import library
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
#Use SelectKBest class to find top 15 best features
best_15_features = SelectKBest(score_func=chi2, k=15)
fit = best_15_features.fit(x_svm_find,y_svm)
dfscores = pd.DataFrame(fit.scores_)
dfcolumns = pd.DataFrame(x_svm_find.columns)
#Try to concat two dataframes for a better visualization 
top_15_feature_scores = pd.concat([dfcolumns,dfscores],axis=1)
#Name the dataframe columns
top_15_feature_scores.columns = ['Features','Score']  
#Show 15 best features
print(top_15_feature_scores.nlargest(15,'Score'))

                     Features         Score
29              MonthlyIncome  26471.159476
30                MonthlyRate   1308.443569
2                   DailyRate   1111.594737
44         YearsInCurrentRole    109.263859
43             YearsAtCompany    103.805057
46       YearsWithCurrManager    100.636711
40          TotalWorkingYears     95.843571
35                   OverTime     60.367656
6            DistanceFromHome     57.197704
0                         Age     46.705340
28     JR_SalesRepresentative     27.299127
33                  MS_Single     26.251695
39           StockOptionLevel     24.376114
20  JR_HealthcareRepresentive     10.935616
24   JR_ManufacturingDirector      9.987076

ibm_svm_features_df = pd.DataFrame()
# Set up data to do SVM using top 15 best features identified
ibm_svm_features_df.insert(0,'MonthlyIncome',ibm['MonthlyIncome'])
ibm_svm_features_df.insert(1,'MonthlyRate',ibm['MonthlyRate'])
ibm_svm_features_df.insert(2,'DailyRate',ibm['DailyRate'])
ibm_svm_features_df.insert(3,'YearsInCurrentRole',ibm['YearsInCurrentRole'])
ibm_svm_features_df.insert(4,'YearsAtCompany',ibm['YearsAtCompany'])
ibm_svm_features_df.insert(5,'YearsWithCurrManager',ibm['YearsWithCurrManager'])
ibm_svm_features_df.insert(6,'TotalWorkingYears',ibm['TotalWorkingYears'])
ibm_svm_features_df.insert(7,'OverTime',ibm['OverTime'])
ibm_svm_features_df.insert(8,'DistanceFromHome',ibm['DistanceFromHome'])
ibm_svm_features_df.insert(9,'Age',ibm['Age'])
ibm_svm_features_df.insert(10,'JR_SalesRepresentative',ibm['JR_SalesRepresentative'])
ibm_svm_features_df.insert(11,'MS_Single',ibm['MS_Single'])
ibm_svm_features_df.insert(12,'StockOptionLevel',ibm['StockOptionLevel'])
ibm_svm_features_df.insert(13,'JR_HealthcareRepresentive ',ibm['JR_HealthcareRepresentive'])
ibm_svm_features_df.insert(14,'JR_ManufacturingDirector',ibm['JR_ManufacturingDirector'])

ibm_svm_features_df

	MonthlyIncome	MonthlyRate	DailyRate	YearsInCurrentRole	YearsAtCompany	YearsWithCurrManager	TotalWorkingYears	OverTime	DistanceFromHome	Age	JR_SalesRepresentative	MS_Single	StockOptionLevel	JR_HealthcareRepresentive	JR_ManufacturingDirector
0	5993	19479	1102	4	6	5	8	1	1	41	0	1	0	0	0
1	5130	24907	279	7	10	7	10	0	8	49	0	0	1	0	0
2	2090	2396	1373	0	0	0	7	1	2	37	0	1	0	0	0
3	2909	23159	1392	7	8	0	8	1	3	33	0	0	0	0	0
4	3468	16632	591	2	2	2	6	0	2	27	0	0	1	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1465	2571	12290	884	2	5	3	17	0	23	36	0	0	1	0	0
1466	9991	21457	613	7	7	7	9	0	6	39	0	0	1	1	0
1467	6142	5174	155	2	6	3	6	1	4	27	0	0	1	0	1
1468	5390	13243	1023	6	9	8	17	0	2	49	0	0	0	0	0
1469	4404	10228	628	3	4	2	6	0	8	34	0	0	0	0	0

1263 rows × 15 columns

#assignment ibm_svm_features to x
x_svm = ibm_svm_features_df

#Try to scale all the numeric data of each features to make svm model train more effective
from sklearn.preprocessing import StandardScaler
s_scaler = StandardScaler()
x_scaled_svm = s_scaler.fit_transform(x_svm)

#Try to use tomek link to solve undersampling problem as attriction too few 'yes' value for imbalanced classification 
from imblearn.under_sampling import TomekLinks

tl_svm = TomekLinks(sampling_strategy='not minority')
x_tl_svm, y_tl_svm= tl_svm.fit_resample(x_svm, y_svm)

#Train the modals with 80% and test 20% of the data
x_train_svm, x_test_svm, y_train_svm, y_test_svm = train_test_split(x_tl_svm,y_tl_svm, test_size=0.2,random_state=40, stratify=y_tl_svm)

# Model 1 is using the manual tuning for some hyperparameters of SVM
model_1_svm=svm.SVC(C=2,kernel='sigmoid',gamma='scale',coef0=0.6,random_state=40,probability=True)
model_1_svm.fit(x_train_svm,y_train_svm)
y_predict_1_svm=model_1_svm.predict(x_test_svm)

# Modal 2 is using GridSearchCV to find the best hyperparameters for SVM using cross validation
# Only some hyperparameters are tuned 

# import GridSearchCV library
from sklearn.model_selection import GridSearchCV

#Try to tune the hyperparameter with
#kernel type: linear/rbf/sigmoid
#C which is the regularization parameter: range 0-1 increase by 0.1
#coef0 that is the independent term for kernel method (only for sigmoid): range 0.0-0.5 increase by 0.1
#degree for the polynomial ('poly') kernel method: range 0-5 increase by 1
#gamma that are kernel coefficient for 'rbf' and 'poly': scale/auto

param_grid={'kernel':('linear','rbf','sigmoid'),
        'C':[i for i in np.arange(1.0,3.0,0.1)],
        'coef0':[y for y in np.arange(0.0,1.5,0.1)],
        'degree':[z for z in np.arange(3,6,1)],
        'gamma':('auto','scale'),}
# set random state to 40
find_best_para_model=svm.SVC(random_state=40)
Grid_search_svm=GridSearchCV(find_best_para_model,param_grid, n_jobs=-1,verbose=2,cv=4)
# this may take some time to run
Grid_search_svm.fit(x_train_svm,y_train_svm)

Fitting 4 folds for each of 5400 candidates, totalling 21600 fits

# Show the best hyperparameter found by grid search
Grid_search_svm.best_params_

{'C': 2.8000000000000016,
 'coef0': 0.0,
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf'}

# Use hyperparameter found grid search to build modal 
model_2_svm=svm.SVC(C=2.8000000000000016,kernel='rbf',degree=3,gamma='scale',coef0=0.0,probability=True,random_state=40)
model_2_svm.fit(x_train_svm,y_train_svm)
y_predict_2_svm=model_2_svm.predict(x_test_svm)

#Evaluate accurracy of classification result
print('Accuracy of prediction classification result for 2 model')
print('Hyperparameters that try to tune manually (model 1): ',metrics.accuracy_score(y_test_svm, y_predict_1_svm))
print('Best hyperparameters found using GridSearchCV (model 2): ',metrics.accuracy_score(y_test_svm, y_predict_2_svm))

Accuracy of prediction classification result for 2 model
Hyperparameters that try to tune manually (model 1):  0.7416666666666667
Best hyperparameters found using GridSearchCV (model 2):  0.8166666666666667

#Evaluating classification result by confusion matrix
from sklearn.metrics import confusion_matrix
print (confusion_matrix(y_test_svm, y_predict_2_svm,[0,1]))

#Evaluating classification result by Precision, Recall and F1-Measure
from sklearn.metrics import classification_report
print (classification_report(y_test_svm, y_predict_2_svm))

[[183  12]
 [ 32  13]]
              precision    recall  f1-score   support

           0       0.85      0.94      0.89       195
           1       0.52      0.29      0.37        45

    accuracy                           0.82       240
   macro avg       0.69      0.61      0.63       240
weighted avg       0.79      0.82      0.79       240

C:\Users\USER\anaconda3\lib\site-packages\sklearn\utils\validation.py:70: FutureWarning: Pass labels=[0, 1] as keyword args. From version 1.0 (renaming of 0.25) passing these as positional arguments will result in an error
  warnings.warn(f"Pass {args_msg} as keyword args. From version "

from matplotlib import pyplot as plt
#Evaluating classification result by ROC curves
from sklearn.metrics import roc_curve
y_pred_prob_svm = model_2_svm.predict_proba(x_test_svm)[:,1]
fpr, tpr, threshold = roc_curve(y_test_svm, y_pred_prob_svm)
plt.plot([0, 1], [0, 1], 'k--')
plt.plot(fpr,tpr)
auc = roc_auc_score(y_test_svm,  y_pred_prob_svm)
plt.title(f'AUC: {auc}')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.show()

About us

We are a group of programmers helping each other build new things, whether it be writing complex encryption programs, or simple ciphers. Our goal is to work together to document and model beautiful, helpful and interesting algorithms using code. We are an open-source community - anyone can contribute. We check each other's work, communicate and collaborate to solve problems. We strive to be welcoming, respectful, yet make sure that our code follows the latest programming guidelines.